github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/cmd/compile/internal/ssa/gen/ARM64.rules (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  (AddPtr x y) -> (ADD x y)
     6  (Add64 x y) -> (ADD x y)
     7  (Add32 x y) -> (ADD x y)
     8  (Add16 x y) -> (ADD x y)
     9  (Add8 x y) -> (ADD x y)
    10  (Add32F x y) -> (FADDS x y)
    11  (Add64F x y) -> (FADDD x y)
    12  
    13  (SubPtr x y) -> (SUB x y)
    14  (Sub64 x y) -> (SUB x y)
    15  (Sub32 x y) -> (SUB x y)
    16  (Sub16 x y) -> (SUB x y)
    17  (Sub8 x y) -> (SUB x y)
    18  (Sub32F x y) -> (FSUBS x y)
    19  (Sub64F x y) -> (FSUBD x y)
    20  
    21  (Mul64 x y) -> (MUL x y)
    22  (Mul32 x y) -> (MULW x y)
    23  (Mul16 x y) -> (MULW x y)
    24  (Mul8 x y) -> (MULW x y)
    25  (Mul32F x y) -> (FMULS x y)
    26  (Mul64F x y) -> (FMULD x y)
    27  
    28  (Hmul64 x y) -> (MULH x y)
    29  (Hmul64u x y) -> (UMULH x y)
    30  (Hmul32 x y) -> (SRAconst (MULL <typ.Int64> x y) [32])
    31  (Hmul32u x y) -> (SRAconst (UMULL <typ.UInt64> x y) [32])
    32  (Mul64uhilo x y) -> (LoweredMuluhilo x y)
    33  
    34  (Div64 x y) -> (DIV x y)
    35  (Div64u x y) -> (UDIV x y)
    36  (Div32 x y) -> (DIVW x y)
    37  (Div32u x y) -> (UDIVW x y)
    38  (Div16 x y) -> (DIVW (SignExt16to32 x) (SignExt16to32 y))
    39  (Div16u x y) -> (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y))
    40  (Div8 x y) -> (DIVW (SignExt8to32 x) (SignExt8to32 y))
    41  (Div8u x y) -> (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y))
    42  (Div32F x y) -> (FDIVS x y)
    43  (Div64F x y) -> (FDIVD x y)
    44  
    45  (Mod64 x y) -> (MOD x y)
    46  (Mod64u x y) -> (UMOD x y)
    47  (Mod32 x y) -> (MODW x y)
    48  (Mod32u x y) -> (UMODW x y)
    49  (Mod16 x y) -> (MODW (SignExt16to32 x) (SignExt16to32 y))
    50  (Mod16u x y) -> (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y))
    51  (Mod8 x y) -> (MODW (SignExt8to32 x) (SignExt8to32 y))
    52  (Mod8u x y) -> (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y))
    53  
    54  // (x + y) / 2 with x>=y -> (x - y) / 2 + y
    55  (Avg64u <t> x y) -> (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
    56  
    57  (And64 x y) -> (AND x y)
    58  (And32 x y) -> (AND x y)
    59  (And16 x y) -> (AND x y)
    60  (And8 x y) -> (AND x y)
    61  
    62  (Or64 x y) -> (OR x y)
    63  (Or32 x y) -> (OR x y)
    64  (Or16 x y) -> (OR x y)
    65  (Or8 x y) -> (OR x y)
    66  
    67  (Xor64 x y) -> (XOR x y)
    68  (Xor32 x y) -> (XOR x y)
    69  (Xor16 x y) -> (XOR x y)
    70  (Xor8 x y) -> (XOR x y)
    71  
    72  // unary ops
    73  (Neg64 x) -> (NEG x)
    74  (Neg32 x) -> (NEG x)
    75  (Neg16 x) -> (NEG x)
    76  (Neg8 x) -> (NEG x)
    77  (Neg32F x) -> (FNEGS x)
    78  (Neg64F x) -> (FNEGD x)
    79  
    80  (Com64 x) -> (MVN x)
    81  (Com32 x) -> (MVN x)
    82  (Com16 x) -> (MVN x)
    83  (Com8 x) -> (MVN x)
    84  
    85  // math package intrinsics
    86  (Abs x) -> (FABSD x)
    87  (Sqrt x) -> (FSQRTD x)
    88  (Ceil  x) -> (FRINTPD x)
    89  (Floor x) -> (FRINTMD x)
    90  (Round x) -> (FRINTAD x)
    91  (RoundToEven x) -> (FRINTND x)
    92  (Trunc x) -> (FRINTZD x)
    93  
    94  // lowering rotates
    95  (RotateLeft32 x y) -> (RORW x (NEG <y.Type> y))
    96  (RotateLeft64 x y) -> (ROR x (NEG <y.Type> y))
    97  
    98  (Ctz64NonZero x) -> (Ctz64 x)
    99  (Ctz32NonZero x) -> (Ctz32 x)
   100  
   101  (Ctz64 <t> x) -> (CLZ (RBIT <t> x))
   102  (Ctz32 <t> x) -> (CLZW (RBITW <t> x))
   103  
   104  (PopCount64 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
   105  (PopCount32 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
   106  (PopCount16 <t> x) -> (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt16to64 x)))))
   107  
   108  // Load args directly into the register class where it will be used.
   109  (FMOVDgpfp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
   110  (FMOVDfpgp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
   111  
   112  // Similarly for stores, if we see a store after FPR <-> GPR move, then redirect store to use the other register set.
   113  (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) -> (FMOVDstore [off] {sym} ptr val mem)
   114  (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) -> (MOVDstore [off] {sym} ptr val mem)
   115  (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) -> (FMOVSstore [off] {sym} ptr val mem)
   116  (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) -> (MOVWstore [off] {sym} ptr val mem)
   117  
   118  // float <-> int register moves, with no conversion.
   119  // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}.
   120  (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) -> (FMOVDfpgp val)
   121  (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) -> (FMOVDgpfp val)
   122  (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) -> (FMOVSfpgp val)
   123  (FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) -> (FMOVSgpfp val)
   124  
   125  (BitLen64 x) -> (SUB (MOVDconst [64]) (CLZ <typ.Int> x))
   126  
   127  (Bswap64 x) -> (REV x)
   128  (Bswap32 x) -> (REVW x)
   129  
   130  (BitRev64 x) -> (RBIT x)
   131  (BitRev32 x) -> (RBITW x)
   132  (BitRev16 x) -> (SRLconst [48] (RBIT <typ.UInt64> x))
   133  (BitRev8 x) -> (SRLconst [56] (RBIT <typ.UInt64> x))
   134  
   135  // boolean ops -- booleans are represented with 0=false, 1=true
   136  (AndB x y) -> (AND x y)
   137  (OrB x y) -> (OR x y)
   138  (EqB x y) -> (XOR (MOVDconst [1]) (XOR <typ.Bool> x y))
   139  (NeqB x y) -> (XOR x y)
   140  (Not x) -> (XOR (MOVDconst [1]) x)
   141  
   142  // shifts
   143  // hardware instruction uses only the low 6 bits of the shift
   144  // we compare to 64 to ensure Go semantics for large shifts
   145  // Rules about rotates with non-const shift are based on the following rules,
   146  // if the following rules change, please also modify the rules based on them.
   147  (Lsh64x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   148  (Lsh64x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   149  (Lsh64x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   150  (Lsh64x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   151  
   152  (Lsh32x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   153  (Lsh32x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   154  (Lsh32x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   155  (Lsh32x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   156  
   157  (Lsh16x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   158  (Lsh16x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   159  (Lsh16x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   160  (Lsh16x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   161  
   162  (Lsh8x64 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   163  (Lsh8x32 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   164  (Lsh8x16 <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   165  (Lsh8x8  <t> x y) -> (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   166  
   167  (Rsh64Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
   168  (Rsh64Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   169  (Rsh64Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   170  (Rsh64Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   171  
   172  (Rsh32Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   173  (Rsh32Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   174  (Rsh32Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   175  (Rsh32Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   176  
   177  (Rsh16Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   178  (Rsh16Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   179  (Rsh16Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   180  (Rsh16Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   181  
   182  (Rsh8Ux64 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
   183  (Rsh8Ux32 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
   184  (Rsh8Ux16 <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
   185  (Rsh8Ux8  <t> x y) -> (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64  y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64  y)))
   186  
   187  (Rsh64x64 x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   188  (Rsh64x32 x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   189  (Rsh64x16 x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   190  (Rsh64x8  x y) -> (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   191  
   192  (Rsh32x64 x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   193  (Rsh32x32 x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   194  (Rsh32x16 x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   195  (Rsh32x8  x y) -> (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   196  
   197  (Rsh16x64 x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   198  (Rsh16x32 x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   199  (Rsh16x16 x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   200  (Rsh16x8  x y) -> (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   201  
   202  (Rsh8x64 x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
   203  (Rsh8x32 x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
   204  (Rsh8x16 x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
   205  (Rsh8x8  x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64  y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64  y))))
   206  
   207  // constants
   208  (Const64 [val]) -> (MOVDconst [val])
   209  (Const32 [val]) -> (MOVDconst [val])
   210  (Const16 [val]) -> (MOVDconst [val])
   211  (Const8 [val]) -> (MOVDconst [val])
   212  (Const32F [val]) -> (FMOVSconst [val])
   213  (Const64F [val]) -> (FMOVDconst [val])
   214  (ConstNil) -> (MOVDconst [0])
   215  (ConstBool [b]) -> (MOVDconst [b])
   216  
   217  (Slicemask <t> x) -> (SRAconst (NEG <t> x) [63])
   218  
   219  // truncations
   220  // Because we ignore high parts of registers, truncates are just copies.
   221  (Trunc16to8 x) -> x
   222  (Trunc32to8 x) -> x
   223  (Trunc32to16 x) -> x
   224  (Trunc64to8 x) -> x
   225  (Trunc64to16 x) -> x
   226  (Trunc64to32 x) -> x
   227  
   228  // Zero-/Sign-extensions
   229  (ZeroExt8to16 x) -> (MOVBUreg x)
   230  (ZeroExt8to32 x) -> (MOVBUreg x)
   231  (ZeroExt16to32 x) -> (MOVHUreg x)
   232  (ZeroExt8to64 x) -> (MOVBUreg x)
   233  (ZeroExt16to64 x) -> (MOVHUreg x)
   234  (ZeroExt32to64 x) -> (MOVWUreg x)
   235  
   236  (SignExt8to16 x) -> (MOVBreg x)
   237  (SignExt8to32 x) -> (MOVBreg x)
   238  (SignExt16to32 x) -> (MOVHreg x)
   239  (SignExt8to64 x) -> (MOVBreg x)
   240  (SignExt16to64 x) -> (MOVHreg x)
   241  (SignExt32to64 x) -> (MOVWreg x)
   242  
   243  // float <-> int conversion
   244  (Cvt32to32F x) -> (SCVTFWS x)
   245  (Cvt32to64F x) -> (SCVTFWD x)
   246  (Cvt64to32F x) -> (SCVTFS x)
   247  (Cvt64to64F x) -> (SCVTFD x)
   248  (Cvt32Uto32F x) -> (UCVTFWS x)
   249  (Cvt32Uto64F x) -> (UCVTFWD x)
   250  (Cvt64Uto32F x) -> (UCVTFS x)
   251  (Cvt64Uto64F x) -> (UCVTFD x)
   252  (Cvt32Fto32 x) -> (FCVTZSSW x)
   253  (Cvt64Fto32 x) -> (FCVTZSDW x)
   254  (Cvt32Fto64 x) -> (FCVTZSS x)
   255  (Cvt64Fto64 x) -> (FCVTZSD x)
   256  (Cvt32Fto32U x) -> (FCVTZUSW x)
   257  (Cvt64Fto32U x) -> (FCVTZUDW x)
   258  (Cvt32Fto64U x) -> (FCVTZUS x)
   259  (Cvt64Fto64U x) -> (FCVTZUD x)
   260  (Cvt32Fto64F x) -> (FCVTSD x)
   261  (Cvt64Fto32F x) -> (FCVTDS x)
   262  
   263  (Round32F x) -> (LoweredRound32F x)
   264  (Round64F x) -> (LoweredRound64F x)
   265  
   266  // comparisons
   267  (Eq8 x y)  -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   268  (Eq16 x y) -> (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   269  (Eq32 x y) -> (Equal (CMPW x y))
   270  (Eq64 x y) -> (Equal (CMP x y))
   271  (EqPtr x y) -> (Equal (CMP x y))
   272  (Eq32F x y) -> (Equal (FCMPS x y))
   273  (Eq64F x y) -> (Equal (FCMPD x y))
   274  
   275  (Neq8 x y)  -> (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   276  (Neq16 x y) -> (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   277  (Neq32 x y) -> (NotEqual (CMPW x y))
   278  (Neq64 x y) -> (NotEqual (CMP x y))
   279  (NeqPtr x y) -> (NotEqual (CMP x y))
   280  (Neq32F x y) -> (NotEqual (FCMPS x y))
   281  (Neq64F x y) -> (NotEqual (FCMPD x y))
   282  
   283  (Less8 x y)  -> (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   284  (Less16 x y) -> (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   285  (Less32 x y) -> (LessThan (CMPW x y))
   286  (Less64 x y) -> (LessThan (CMP x y))
   287  (Less32F x y) -> (GreaterThan (FCMPS y x)) // reverse operands to work around NaN
   288  (Less64F x y) -> (GreaterThan (FCMPD y x)) // reverse operands to work around NaN
   289  
   290  (Less8U x y)  -> (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   291  (Less16U x y) -> (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   292  (Less32U x y) -> (LessThanU (CMPW x y))
   293  (Less64U x y) -> (LessThanU (CMP x y))
   294  
   295  (Leq8 x y)  -> (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   296  (Leq16 x y) -> (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   297  (Leq32 x y) -> (LessEqual (CMPW x y))
   298  (Leq64 x y) -> (LessEqual (CMP x y))
   299  (Leq32F x y) -> (GreaterEqual (FCMPS y x)) // reverse operands to work around NaN
   300  (Leq64F x y) -> (GreaterEqual (FCMPD y x)) // reverse operands to work around NaN
   301  
   302  (Leq8U x y)  -> (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   303  (Leq16U x y) -> (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   304  (Leq32U x y) -> (LessEqualU (CMPW x y))
   305  (Leq64U x y) -> (LessEqualU (CMP x y))
   306  
   307  (Greater8 x y)  -> (GreaterThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   308  (Greater16 x y) -> (GreaterThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   309  (Greater32 x y) -> (GreaterThan (CMPW x y))
   310  (Greater64 x y) -> (GreaterThan (CMP x y))
   311  (Greater32F x y) -> (GreaterThan (FCMPS x y))
   312  (Greater64F x y) -> (GreaterThan (FCMPD x y))
   313  
   314  (Greater8U x y)  -> (GreaterThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   315  (Greater16U x y) -> (GreaterThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   316  (Greater32U x y) -> (GreaterThanU (CMPW x y))
   317  (Greater64U x y) -> (GreaterThanU (CMP x y))
   318  
   319  (Geq8 x y)  -> (GreaterEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
   320  (Geq16 x y) -> (GreaterEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
   321  (Geq32 x y) -> (GreaterEqual (CMPW x y))
   322  (Geq64 x y) -> (GreaterEqual (CMP x y))
   323  (Geq32F x y) -> (GreaterEqual (FCMPS x y))
   324  (Geq64F x y) -> (GreaterEqual (FCMPD x y))
   325  
   326  (Geq8U x y)  -> (GreaterEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
   327  (Geq16U x y) -> (GreaterEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
   328  (Geq32U x y) -> (GreaterEqualU (CMPW x y))
   329  (Geq64U x y) -> (GreaterEqualU (CMP x y))
   330  
   331  // CSEL needs a flag-generating argument. Synthesize a CMPW if necessary.
   332  (CondSelect x y bool) && flagArg(bool) != nil -> (CSEL {bool.Op} x y flagArg(bool))
   333  (CondSelect x y bool) && flagArg(bool) == nil -> (CSEL {OpARM64NotEqual} x y (CMPWconst [0] bool))
   334  
   335  (OffPtr [off] ptr:(SP)) -> (MOVDaddr [off] ptr)
   336  (OffPtr [off] ptr) -> (ADDconst [off] ptr)
   337  
   338  (Addr {sym} base) -> (MOVDaddr {sym} base)
   339  (LocalAddr {sym} base _) -> (MOVDaddr {sym} base)
   340  
   341  // loads
   342  (Load <t> ptr mem) && t.IsBoolean() -> (MOVBUload ptr mem)
   343  (Load <t> ptr mem) && (is8BitInt(t) && isSigned(t)) -> (MOVBload ptr mem)
   344  (Load <t> ptr mem) && (is8BitInt(t) && !isSigned(t)) -> (MOVBUload ptr mem)
   345  (Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) -> (MOVHload ptr mem)
   346  (Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) -> (MOVHUload ptr mem)
   347  (Load <t> ptr mem) && (is32BitInt(t) && isSigned(t)) -> (MOVWload ptr mem)
   348  (Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) -> (MOVWUload ptr mem)
   349  (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
   350  (Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
   351  (Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
   352  
   353  // stores
   354  (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
   355  (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
   356  (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type) -> (MOVWstore ptr val mem)
   357  (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type) -> (MOVDstore ptr val mem)
   358  (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
   359  (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
   360  
   361  // zeroing
   362  (Zero [0] _ mem) -> mem
   363  (Zero [1] ptr mem) -> (MOVBstore ptr (MOVDconst [0]) mem)
   364  (Zero [2] ptr mem) -> (MOVHstore ptr (MOVDconst [0]) mem)
   365  (Zero [4] ptr mem) -> (MOVWstore ptr (MOVDconst [0]) mem)
   366  (Zero [8] ptr mem) -> (MOVDstore ptr (MOVDconst [0]) mem)
   367  
   368  (Zero [3] ptr mem) ->
   369  	(MOVBstore [2] ptr (MOVDconst [0])
   370  		(MOVHstore ptr (MOVDconst [0]) mem))
   371  (Zero [5] ptr mem) ->
   372  	(MOVBstore [4] ptr (MOVDconst [0])
   373  		(MOVWstore ptr (MOVDconst [0]) mem))
   374  (Zero [6] ptr mem) ->
   375  	(MOVHstore [4] ptr (MOVDconst [0])
   376  		(MOVWstore ptr (MOVDconst [0]) mem))
   377  (Zero [7] ptr mem) ->
   378  	(MOVBstore [6] ptr (MOVDconst [0])
   379  		(MOVHstore [4] ptr (MOVDconst [0])
   380  			(MOVWstore ptr (MOVDconst [0]) mem)))
   381  (Zero [9] ptr mem) ->
   382  	(MOVBstore [8] ptr (MOVDconst [0])
   383  		(MOVDstore ptr (MOVDconst [0]) mem))
   384  (Zero [10] ptr mem) ->
   385  	(MOVHstore [8] ptr (MOVDconst [0])
   386  		(MOVDstore ptr (MOVDconst [0]) mem))
   387  (Zero [11] ptr mem) ->
   388  	(MOVBstore [10] ptr (MOVDconst [0])
   389  		(MOVHstore [8] ptr (MOVDconst [0])
   390  			(MOVDstore ptr (MOVDconst [0]) mem)))
   391  (Zero [12] ptr mem) ->
   392  	(MOVWstore [8] ptr (MOVDconst [0])
   393  		(MOVDstore ptr (MOVDconst [0]) mem))
   394  (Zero [13] ptr mem) ->
   395  	(MOVBstore [12] ptr (MOVDconst [0])
   396  		(MOVWstore [8] ptr (MOVDconst [0])
   397  			(MOVDstore ptr (MOVDconst [0]) mem)))
   398  (Zero [14] ptr mem) ->
   399  	(MOVHstore [12] ptr (MOVDconst [0])
   400  		(MOVWstore [8] ptr (MOVDconst [0])
   401  			(MOVDstore ptr (MOVDconst [0]) mem)))
   402  (Zero [15] ptr mem) ->
   403  	(MOVBstore [14] ptr (MOVDconst [0])
   404  		(MOVHstore [12] ptr (MOVDconst [0])
   405  			(MOVWstore [8] ptr (MOVDconst [0])
   406  				(MOVDstore ptr (MOVDconst [0]) mem))))
   407  (Zero [16] ptr mem) ->
   408  	(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)
   409  
   410  (Zero [32] ptr mem) ->
   411  	(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   412  		(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
   413  
   414  (Zero [48] ptr mem) ->
   415  	(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   416  		(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   417  			(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
   418  
   419  (Zero [64] ptr mem) ->
   420  	(STP [48] ptr (MOVDconst [0]) (MOVDconst [0])
   421  		(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
   422  			(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
   423  				(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
   424  
   425  // strip off fractional word zeroing
   426  (Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 ->
   427  	(Zero [8]
   428  		(OffPtr <ptr.Type> ptr [s-8])
   429  		(Zero [s-s%16] ptr mem))
   430  (Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 ->
   431  	(Zero [16]
   432  		(OffPtr <ptr.Type> ptr [s-16])
   433  		(Zero [s-s%16] ptr mem))
   434  
   435  // medium zeroing uses a duff device
   436  // 4, 16, and 64 are magic constants, see runtime/mkduff.go
   437  (Zero [s] ptr mem)
   438  	&& s%16 == 0 && s > 64 && s <= 16*64
   439  	&& !config.noDuffDevice ->
   440  	(DUFFZERO [4 * (64 - s/16)] ptr mem)
   441  
   442  // large zeroing uses a loop
   443  (Zero [s] ptr mem)
   444  	&& s%16 == 0 && (s > 16*64 || config.noDuffDevice) ->
   445  	(LoweredZero
   446  		ptr
   447  		(ADDconst <ptr.Type> [s-16] ptr)
   448  		mem)
   449  
   450  // moves
   451  (Move [0] _ _ mem) -> mem
   452  (Move [1] dst src mem) -> (MOVBstore dst (MOVBUload src mem) mem)
   453  (Move [2] dst src mem) -> (MOVHstore dst (MOVHUload src mem) mem)
   454  (Move [4] dst src mem) -> (MOVWstore dst (MOVWUload src mem) mem)
   455  (Move [8] dst src mem) -> (MOVDstore dst (MOVDload src mem) mem)
   456  
   457  (Move [3] dst src mem) ->
   458  	(MOVBstore [2] dst (MOVBUload [2] src mem)
   459  		(MOVHstore dst (MOVHUload src mem) mem))
   460  (Move [5] dst src mem) ->
   461  	(MOVBstore [4] dst (MOVBUload [4] src mem)
   462  		(MOVWstore dst (MOVWUload src mem) mem))
   463  (Move [6] dst src mem) ->
   464  	(MOVHstore [4] dst (MOVHUload [4] src mem)
   465  		(MOVWstore dst (MOVWUload src mem) mem))
   466  (Move [7] dst src mem) ->
   467  	(MOVBstore [6] dst (MOVBUload [6] src mem)
   468  		(MOVHstore [4] dst (MOVHUload [4] src mem)
   469  			(MOVWstore dst (MOVWUload src mem) mem)))
   470  (Move [12] dst src mem) ->
   471  	(MOVWstore [8] dst (MOVWUload [8] src mem)
   472  		(MOVDstore dst (MOVDload src mem) mem))
   473  (Move [16] dst src mem) ->
   474  	(MOVDstore [8] dst (MOVDload [8] src mem)
   475  		(MOVDstore dst (MOVDload src mem) mem))
   476  (Move [24] dst src mem) ->
   477  	(MOVDstore [16] dst (MOVDload [16] src mem)
   478  		(MOVDstore [8] dst (MOVDload [8] src mem)
   479  			(MOVDstore dst (MOVDload src mem) mem)))
   480  
   481  // strip off fractional word move
   482  (Move [s] dst src mem) && s%8 != 0 && s > 8 ->
   483  	(Move [s%8]
   484  		(OffPtr <dst.Type> dst [s-s%8])
   485  		(OffPtr <src.Type> src [s-s%8])
   486  		(Move [s-s%8] dst src mem))
   487  
   488  // medium move uses a duff device
   489  (Move [s] dst src mem)
   490  	&& s > 32 && s <= 16*64 && s%16 == 8
   491  	&& !config.noDuffDevice ->
   492  	(MOVDstore [s-8] dst (MOVDload [s-8] src mem)
   493  		(DUFFCOPY <types.TypeMem> [8*(64-(s-8)/16)] dst src mem))
   494  (Move [s] dst src mem)
   495  	&& s > 32 && s <= 16*64 && s%16 == 0
   496  	&& !config.noDuffDevice ->
   497  	(DUFFCOPY [8 * (64 - s/16)] dst src mem)
   498  // 8 is the number of bytes to encode:
   499  //
   500  // LDP.P   16(R16), (R26, R27)
   501  // STP.P   (R26, R27), 16(R17)
   502  //
   503  // 64 is number of these blocks. See runtime/duff_arm64.s:duffcopy
   504  
   505  // large move uses a loop
   506  (Move [s] dst src mem)
   507  	&& s > 24 && s%8 == 0 ->
   508  	(LoweredMove
   509  		dst
   510  		src
   511  		(ADDconst <src.Type> src [s-8])
   512  		mem)
   513  
   514  // calls
   515  (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
   516  (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
   517  (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
   518  
   519  // checks
   520  (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
   521  (IsNonNil ptr) -> (NotEqual (CMPconst [0] ptr))
   522  (IsInBounds idx len) -> (LessThanU (CMP idx len))
   523  (IsSliceInBounds idx len) -> (LessEqualU (CMP idx len))
   524  
   525  // pseudo-ops
   526  (GetClosurePtr) -> (LoweredGetClosurePtr)
   527  (GetCallerSP) -> (LoweredGetCallerSP)
   528  (GetCallerPC) -> (LoweredGetCallerPC)
   529  
   530  // Absorb pseudo-ops into blocks.
   531  (If (Equal cc) yes no) -> (EQ cc yes no)
   532  (If (NotEqual cc) yes no) -> (NE cc yes no)
   533  (If (LessThan cc) yes no) -> (LT cc yes no)
   534  (If (LessThanU cc) yes no) -> (ULT cc yes no)
   535  (If (LessEqual cc) yes no) -> (LE cc yes no)
   536  (If (LessEqualU cc) yes no) -> (ULE cc yes no)
   537  (If (GreaterThan cc) yes no) -> (GT cc yes no)
   538  (If (GreaterThanU cc) yes no) -> (UGT cc yes no)
   539  (If (GreaterEqual cc) yes no) -> (GE cc yes no)
   540  (If (GreaterEqualU cc) yes no) -> (UGE cc yes no)
   541  
   542  (If cond yes no) -> (NZ cond yes no)
   543  
   544  // atomic intrinsics
   545  // Note: these ops do not accept offset.
   546  (AtomicLoad32  ptr mem) -> (LDARW ptr mem)
   547  (AtomicLoad64  ptr mem) -> (LDAR  ptr mem)
   548  (AtomicLoadPtr ptr mem) -> (LDAR  ptr mem)
   549  
   550  (AtomicStore32      ptr val mem) -> (STLRW ptr val mem)
   551  (AtomicStore64      ptr val mem) -> (STLR  ptr val mem)
   552  (AtomicStorePtrNoWB ptr val mem) -> (STLR  ptr val mem)
   553  
   554  (AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
   555  (AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
   556  
   557  (AtomicAdd32 ptr val mem) -> (LoweredAtomicAdd32 ptr val mem)
   558  (AtomicAdd64 ptr val mem) -> (LoweredAtomicAdd64 ptr val mem)
   559  
   560  (AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
   561  (AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
   562  
   563  // Currently the updated value is not used, but we need a register to temporarily hold it.
   564  (AtomicAnd8 ptr val mem) -> (Select1 (LoweredAtomicAnd8 ptr val mem))
   565  (AtomicOr8  ptr val mem) -> (Select1 (LoweredAtomicOr8  ptr val mem))
   566  
   567  (AtomicAdd32Variant ptr val mem) -> (LoweredAtomicAdd32Variant ptr val mem)
   568  (AtomicAdd64Variant ptr val mem) -> (LoweredAtomicAdd64Variant ptr val mem)
   569  
   570  // Write barrier.
   571  (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
   572  
   573  // Optimizations
   574  
   575  // Absorb boolean tests into block
   576  (NZ (Equal cc) yes no) -> (EQ cc yes no)
   577  (NZ (NotEqual cc) yes no) -> (NE cc yes no)
   578  (NZ (LessThan cc) yes no) -> (LT cc yes no)
   579  (NZ (LessThanU cc) yes no) -> (ULT cc yes no)
   580  (NZ (LessEqual cc) yes no) -> (LE cc yes no)
   581  (NZ (LessEqualU cc) yes no) -> (ULE cc yes no)
   582  (NZ (GreaterThan cc) yes no) -> (GT cc yes no)
   583  (NZ (GreaterThanU cc) yes no) -> (UGT cc yes no)
   584  (NZ (GreaterEqual cc) yes no) -> (GE cc yes no)
   585  (NZ (GreaterEqualU cc) yes no) -> (UGE cc yes no)
   586  
   587  (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTWconst [c] y) yes no)
   588  (NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTWconst [c] y) yes no)
   589  (LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTWconst [c] y) yes no)
   590  (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTWconst [c] y) yes no)
   591  (GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTWconst [c] y) yes no)
   592  (GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTWconst [c] y) yes no)
   593  
   594  (EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TST x y) yes no)
   595  (NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TST x y) yes no)
   596  (LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LT (TST x y) yes no)
   597  (LE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LE (TST x y) yes no)
   598  (GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GT (TST x y) yes no)
   599  (GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GE (TST x y) yes no)
   600  
   601  (EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TSTW x y) yes no)
   602  (NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TSTW x y) yes no)
   603  (LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LT (TSTW x y) yes no)
   604  (LE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (LE (TSTW x y) yes no)
   605  (GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GT (TSTW x y) yes no)
   606  (GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (GE (TSTW x y) yes no)
   607  
   608  (EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTconst [c] y) yes no)
   609  (NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTconst [c] y) yes no)
   610  (LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTconst [c] y) yes no)
   611  (LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTconst [c] y) yes no)
   612  (GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTconst [c] y) yes no)
   613  (GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTconst [c] y) yes no)
   614  
   615  (EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (CMNconst [c] y) yes no)
   616  (NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (NE (CMNconst [c] y) yes no)
   617  (LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LT (CMNconst [c] y) yes no)
   618  (LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LE (CMNconst [c] y) yes no)
   619  (GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GT (CMNconst [c] y) yes no)
   620  (GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GE (CMNconst [c] y) yes no)
   621  
   622  (EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (CMNWconst [c] y) yes no)
   623  (NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (NE (CMNWconst [c] y) yes no)
   624  (LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LT (CMNWconst [c] y) yes no)
   625  (LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (LE (CMNWconst [c] y) yes no)
   626  (GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GT (CMNWconst [c] y) yes no)
   627  (GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 -> (GE (CMNWconst [c] y) yes no)
   628  
   629  (EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
   630  (NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
   631  (LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LT (CMN x y) yes no)
   632  (LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LE (CMN x y) yes no)
   633  (GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GT (CMN x y) yes no)
   634  (GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GE (CMN x y) yes no)
   635  
   636  (EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (EQ (CMNW x y) yes no)
   637  (NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (NE (CMNW x y) yes no)
   638  (LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LT (CMNW x y) yes no)
   639  (LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (LE (CMNW x y) yes no)
   640  (GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GT (CMNW x y) yes no)
   641  (GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (GE (CMNW x y) yes no)
   642  
   643  (EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
   644  (NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
   645  (LT (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (LT (CMN x y) yes no)
   646  (LE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (LE (CMN x y) yes no)
   647  (GT (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (GT (CMN x y) yes no)
   648  (GE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (GE (CMN x y) yes no)
   649  
   650  (EQ (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (EQ (CMNW x y) yes no)
   651  (NE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (NE (CMNW x y) yes no)
   652  (LT (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (LT (CMNW x y) yes no)
   653  (LE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (LE (CMNW x y) yes no)
   654  (GT (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (GT (CMNW x y) yes no)
   655  (GE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 -> (GE (CMNW x y) yes no)
   656  
   657  (EQ (CMPconst [0] x) yes no) -> (Z x yes no)
   658  (NE (CMPconst [0] x) yes no) -> (NZ x yes no)
   659  (EQ (CMPWconst [0] x) yes no) -> (ZW x yes no)
   660  (NE (CMPWconst [0] x) yes no) -> (NZW x yes no)
   661  
   662  (EQ (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (EQ (CMN a (MUL <x.Type> x y)) yes no)
   663  (NE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (NE (CMN a (MUL <x.Type> x y)) yes no)
   664  (LT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (LT (CMN a (MUL <x.Type> x y)) yes no)
   665  (LE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (LE (CMN a (MUL <x.Type> x y)) yes no)
   666  (GT (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (GT (CMN a (MUL <x.Type> x y)) yes no)
   667  (GE (CMPconst [0]  z:(MADD a x y)) yes no) && z.Uses==1 -> (GE (CMN a (MUL <x.Type> x y)) yes no)
   668  
   669  (EQ (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (EQ (CMP a (MUL <x.Type> x y)) yes no)
   670  (NE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (NE (CMP a (MUL <x.Type> x y)) yes no)
   671  (LE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (LE (CMP a (MUL <x.Type> x y)) yes no)
   672  (LT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (LT (CMP a (MUL <x.Type> x y)) yes no)
   673  (GE (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (GE (CMP a (MUL <x.Type> x y)) yes no)
   674  (GT (CMPconst [0]  z:(MSUB a x y)) yes no) && z.Uses==1 -> (GT (CMP a (MUL <x.Type> x y)) yes no)
   675  
   676  (EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (EQ (CMNW a (MULW <x.Type> x y)) yes no)
   677  (NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (NE (CMNW a (MULW <x.Type> x y)) yes no)
   678  (LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LE (CMNW a (MULW <x.Type> x y)) yes no)
   679  (LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LT (CMNW a (MULW <x.Type> x y)) yes no)
   680  (GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GE (CMNW a (MULW <x.Type> x y)) yes no)
   681  (GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GT (CMNW a (MULW <x.Type> x y)) yes no)
   682  
   683  (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (EQ (CMPW a (MULW <x.Type> x y)) yes no)
   684  (NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (NE (CMPW a (MULW <x.Type> x y)) yes no)
   685  (LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LE (CMPW a (MULW <x.Type> x y)) yes no)
   686  (LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LT (CMPW a (MULW <x.Type> x y)) yes no)
   687  (GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GE (CMPW a (MULW <x.Type> x y)) yes no)
   688  (GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GT (CMPW a (MULW <x.Type> x y)) yes no)
   689  
   690  // Absorb bit-tests into block
   691  (Z  (ANDconst [c] x) yes no) && oneBit(c) -> (TBZ  {ntz(c)} x yes no)
   692  (NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
   693  (ZW  (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ  {ntz(int64(uint32(c)))} x yes no)
   694  (NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
   695  (EQ (TSTconst [c] x) yes no) && oneBit(c) -> (TBZ  {ntz(c)} x yes no)
   696  (NE (TSTconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
   697  (EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ  {ntz(int64(uint32(c)))} x yes no)
   698  (NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
   699  
   700  // Test sign-bit for signed comparisons against zero
   701  (GE (CMPWconst [0] x) yes no) -> (TBZ  {int64(31)} x yes no)
   702  (GE (CMPconst [0] x) yes no) -> (TBZ  {int64(63)} x yes no)
   703  (LT (CMPWconst [0] x) yes no) -> (TBNZ  {int64(31)} x yes no)
   704  (LT (CMPconst [0] x) yes no) -> (TBNZ  {int64(63)} x yes no)
   705  
   706  // fold offset into address
   707  (ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) -> (MOVDaddr [off1+off2] {sym} ptr)
   708  
   709  // fold address into load/store
   710  (MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   711  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   712  	(MOVBload [off1+off2] {sym} ptr mem)
   713  (MOVBUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   714  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   715  	(MOVBUload [off1+off2] {sym} ptr mem)
   716  (MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   717  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   718  	(MOVHload [off1+off2] {sym} ptr mem)
   719  (MOVHUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   720  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   721  	(MOVHUload [off1+off2] {sym} ptr mem)
   722  (MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   723  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   724  	(MOVWload [off1+off2] {sym} ptr mem)
   725  (MOVWUload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   726  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   727  	(MOVWUload [off1+off2] {sym} ptr mem)
   728  (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   729  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   730  	(MOVDload [off1+off2] {sym} ptr mem)
   731  (FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   732  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   733  	(FMOVSload [off1+off2] {sym} ptr mem)
   734  (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   735  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   736  	(FMOVDload [off1+off2] {sym} ptr mem)
   737  
   738  // register indexed load
   739  (MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx ptr idx mem)
   740  (MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx ptr idx mem)
   741  (MOVWload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx ptr idx mem)
   742  (MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx ptr idx mem)
   743  (MOVHload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx ptr idx mem)
   744  (MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBUloadidx ptr idx mem)
   745  (MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBloadidx ptr idx mem)
   746  (FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (FMOVSloadidx ptr idx mem)
   747  (FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (FMOVDloadidx ptr idx mem)
   748  (MOVDloadidx ptr (MOVDconst [c]) mem) -> (MOVDload [c] ptr mem)
   749  (MOVDloadidx (MOVDconst [c]) ptr mem) -> (MOVDload [c] ptr mem)
   750  (MOVWUloadidx ptr (MOVDconst [c]) mem) -> (MOVWUload [c] ptr mem)
   751  (MOVWUloadidx (MOVDconst [c]) ptr mem) -> (MOVWUload [c] ptr mem)
   752  (MOVWloadidx ptr (MOVDconst [c]) mem) -> (MOVWload [c] ptr mem)
   753  (MOVWloadidx (MOVDconst [c]) ptr mem) -> (MOVWload [c] ptr mem)
   754  (MOVHUloadidx ptr (MOVDconst [c]) mem) -> (MOVHUload [c] ptr mem)
   755  (MOVHUloadidx (MOVDconst [c]) ptr mem) -> (MOVHUload [c] ptr mem)
   756  (MOVHloadidx ptr (MOVDconst [c]) mem) -> (MOVHload [c] ptr mem)
   757  (MOVHloadidx (MOVDconst [c]) ptr mem) -> (MOVHload [c] ptr mem)
   758  (MOVBUloadidx ptr (MOVDconst [c]) mem) -> (MOVBUload [c] ptr mem)
   759  (MOVBUloadidx (MOVDconst [c]) ptr mem) -> (MOVBUload [c] ptr mem)
   760  (MOVBloadidx ptr (MOVDconst [c]) mem) -> (MOVBload [c] ptr mem)
   761  (MOVBloadidx (MOVDconst [c]) ptr mem) -> (MOVBload [c] ptr mem)
   762  (FMOVSloadidx ptr (MOVDconst [c]) mem) -> (FMOVSload [c] ptr mem)
   763  (FMOVSloadidx (MOVDconst [c]) ptr mem) -> (FMOVSload [c] ptr mem)
   764  (FMOVDloadidx ptr (MOVDconst [c]) mem) -> (FMOVDload [c] ptr mem)
   765  (FMOVDloadidx (MOVDconst [c]) ptr mem) -> (FMOVDload [c] ptr mem)
   766  
   767  // shifted register indexed load
   768  (MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx8 ptr idx mem)
   769  (MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx4 ptr idx mem)
   770  (MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx4 ptr idx mem)
   771  (MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx2 ptr idx mem)
   772  (MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx2 ptr idx mem)
   773  (MOVDloadidx ptr (SLLconst [3] idx) mem) -> (MOVDloadidx8 ptr idx mem)
   774  (MOVWloadidx ptr (SLLconst [2] idx) mem) -> (MOVWloadidx4 ptr idx mem)
   775  (MOVWUloadidx ptr (SLLconst [2] idx) mem) -> (MOVWUloadidx4 ptr idx mem)
   776  (MOVHloadidx ptr (SLLconst [1] idx) mem) -> (MOVHloadidx2 ptr idx mem)
   777  (MOVHUloadidx ptr (SLLconst [1] idx) mem) -> (MOVHUloadidx2 ptr idx mem)
   778  (MOVHloadidx ptr (ADD idx idx) mem) -> (MOVHloadidx2 ptr idx mem)
   779  (MOVHUloadidx ptr (ADD idx idx) mem) -> (MOVHUloadidx2 ptr idx mem)
   780  (MOVDloadidx (SLLconst [3] idx) ptr mem) -> (MOVDloadidx8 ptr idx mem)
   781  (MOVWloadidx (SLLconst [2] idx) ptr mem) -> (MOVWloadidx4 ptr idx mem)
   782  (MOVWUloadidx (SLLconst [2] idx) ptr mem) -> (MOVWUloadidx4 ptr idx mem)
   783  (MOVHloadidx (ADD idx idx) ptr mem) -> (MOVHloadidx2 ptr idx mem)
   784  (MOVHUloadidx (ADD idx idx) ptr mem) -> (MOVHUloadidx2 ptr idx mem)
   785  (MOVDloadidx8 ptr (MOVDconst [c]) mem) -> (MOVDload [c<<3] ptr mem)
   786  (MOVWUloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWUload [c<<2] ptr mem)
   787  (MOVWloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWload [c<<2] ptr mem)
   788  (MOVHUloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHUload [c<<1] ptr mem)
   789  (MOVHloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHload [c<<1] ptr mem)
   790  
   791  (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   792  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   793  	(MOVBstore [off1+off2] {sym} ptr val mem)
   794  (MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   795  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   796  	(MOVHstore [off1+off2] {sym} ptr val mem)
   797  (MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   798  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   799  	(MOVWstore [off1+off2] {sym} ptr val mem)
   800  (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   801  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   802  	(MOVDstore [off1+off2] {sym} ptr val mem)
   803  (STP [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(off1+off2)
   804  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   805  	(STP [off1+off2] {sym} ptr val1 val2 mem)
   806  (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   807  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   808  	(FMOVSstore [off1+off2] {sym} ptr val mem)
   809  (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
   810  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   811  	(FMOVDstore [off1+off2] {sym} ptr val mem)
   812  (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   813  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   814  	(MOVBstorezero [off1+off2] {sym} ptr mem)
   815  (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   816  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   817  	(MOVHstorezero [off1+off2] {sym} ptr mem)
   818  (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   819  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   820  	(MOVWstorezero [off1+off2] {sym} ptr mem)
   821  (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   822  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   823  	(MOVDstorezero [off1+off2] {sym} ptr mem)
   824  (MOVQstorezero [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(off1+off2)
   825  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   826  	(MOVQstorezero [off1+off2] {sym} ptr mem)
   827  
   828  // register indexed store
   829  (MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx ptr idx val mem)
   830  (MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx ptr idx val mem)
   831  (MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx ptr idx val mem)
   832  (MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVBstoreidx ptr idx val mem)
   833  (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (FMOVDstoreidx ptr idx val mem)
   834  (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (FMOVSstoreidx ptr idx val mem)
   835  (MOVDstoreidx ptr (MOVDconst [c]) val mem) -> (MOVDstore [c] ptr val mem)
   836  (MOVDstoreidx (MOVDconst [c]) idx val mem) -> (MOVDstore [c] idx val mem)
   837  (MOVWstoreidx ptr (MOVDconst [c]) val mem) -> (MOVWstore [c] ptr val mem)
   838  (MOVWstoreidx (MOVDconst [c]) idx val mem) -> (MOVWstore [c] idx val mem)
   839  (MOVHstoreidx ptr (MOVDconst [c]) val mem) -> (MOVHstore [c] ptr val mem)
   840  (MOVHstoreidx (MOVDconst [c]) idx val mem) -> (MOVHstore [c] idx val mem)
   841  (MOVBstoreidx ptr (MOVDconst [c]) val mem) -> (MOVBstore [c] ptr val mem)
   842  (MOVBstoreidx (MOVDconst [c]) idx val mem) -> (MOVBstore [c] idx val mem)
   843  (FMOVDstoreidx ptr (MOVDconst [c]) val mem) -> (FMOVDstore [c] ptr val mem)
   844  (FMOVDstoreidx (MOVDconst [c]) idx val mem) -> (FMOVDstore [c] idx val mem)
   845  (FMOVSstoreidx ptr (MOVDconst [c]) val mem) -> (FMOVSstore [c] ptr val mem)
   846  (FMOVSstoreidx (MOVDconst [c]) idx val mem) -> (FMOVSstore [c] idx val mem)
   847  
   848  // shifted register indexed store
   849  (MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx8 ptr idx val mem)
   850  (MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx4 ptr idx val mem)
   851  (MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx2 ptr idx val mem)
   852  (MOVDstoreidx ptr (SLLconst [3] idx) val mem) -> (MOVDstoreidx8 ptr idx val mem)
   853  (MOVWstoreidx ptr (SLLconst [2] idx) val mem) -> (MOVWstoreidx4 ptr idx val mem)
   854  (MOVHstoreidx ptr (SLLconst [1] idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
   855  (MOVHstoreidx ptr (ADD idx idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
   856  (MOVDstoreidx (SLLconst [3] idx) ptr val mem) -> (MOVDstoreidx8 ptr idx val mem)
   857  (MOVWstoreidx (SLLconst [2] idx) ptr val mem) -> (MOVWstoreidx4 ptr idx val mem)
   858  (MOVHstoreidx (SLLconst [1] idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
   859  (MOVHstoreidx (ADD idx idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
   860  (MOVDstoreidx8 ptr (MOVDconst [c]) val mem) -> (MOVDstore [c<<3] ptr val mem)
   861  (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) -> (MOVWstore [c<<2] ptr val mem)
   862  (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) -> (MOVHstore [c<<1] ptr val mem)
   863  
   864  (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   865  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   866  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   867  	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   868  (MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   869  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   870  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   871  	(MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   872  (MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   873  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   874  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   875  	(MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   876  (MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   877  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   878  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   879  	(MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   880  (MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   881  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   882  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   883  	(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   884  (MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   885  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   886  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   887  	(MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   888  (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   889  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   890  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   891  	(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   892  (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   893  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   894  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   895  	(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   896  (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   897  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   898  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   899  	(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   900  
   901  (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   902  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   903  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   904  	(MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   905  (MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   906  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   907  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   908  	(MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   909  (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   910  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   911  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   912  	(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   913  (MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   914  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   915  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   916  	(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   917  (STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
   918  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   919  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   920  	(STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
   921  (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   922  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   923  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   924  	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   925  (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
   926  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   927  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   928  	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
   929  (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   930  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   931  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   932  	(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   933  (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   934  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   935  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   936  	(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   937  (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   938  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   939  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   940  	(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   941  (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   942  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   943  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   944  	(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   945  (MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
   946  	&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
   947  	&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
   948  	(MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
   949  
   950  // store zero
   951  (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
   952  (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem)
   953  (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem)
   954  (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
   955  (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) -> (MOVQstorezero [off] {sym} ptr mem)
   956  
   957  // register indexed store zero
   958  (MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx ptr idx mem)
   959  (MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx ptr idx mem)
   960  (MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx ptr idx mem)
   961  (MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBstorezeroidx ptr idx mem)
   962  (MOVDstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx ptr idx mem)
   963  (MOVWstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx ptr idx mem)
   964  (MOVHstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx ptr idx mem)
   965  (MOVBstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVBstorezeroidx ptr idx mem)
   966  (MOVDstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c] ptr mem)
   967  (MOVDstorezeroidx (MOVDconst [c]) idx mem) -> (MOVDstorezero [c] idx mem)
   968  (MOVWstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c] ptr mem)
   969  (MOVWstorezeroidx (MOVDconst [c]) idx mem) -> (MOVWstorezero [c] idx mem)
   970  (MOVHstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c] ptr mem)
   971  (MOVHstorezeroidx (MOVDconst [c]) idx mem) -> (MOVHstorezero [c] idx mem)
   972  (MOVBstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVBstorezero [c] ptr mem)
   973  (MOVBstorezeroidx (MOVDconst [c]) idx mem) -> (MOVBstorezero [c] idx mem)
   974  
   975  // shifted register indexed store zero
   976  (MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx8 ptr idx mem)
   977  (MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx4 ptr idx mem)
   978  (MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx2 ptr idx mem)
   979  (MOVDstorezeroidx ptr (SLLconst [3] idx) mem) -> (MOVDstorezeroidx8 ptr idx mem)
   980  (MOVWstorezeroidx ptr (SLLconst [2] idx) mem) -> (MOVWstorezeroidx4 ptr idx mem)
   981  (MOVHstorezeroidx ptr (SLLconst [1] idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
   982  (MOVHstorezeroidx ptr (ADD idx idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
   983  (MOVDstorezeroidx (SLLconst [3] idx) ptr mem) -> (MOVDstorezeroidx8 ptr idx mem)
   984  (MOVWstorezeroidx (SLLconst [2] idx) ptr mem) -> (MOVWstorezeroidx4 ptr idx mem)
   985  (MOVHstorezeroidx (SLLconst [1] idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
   986  (MOVHstorezeroidx (ADD idx idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
   987  (MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx8 ptr idx mem)
   988  (MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx4 ptr idx mem)
   989  (MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx2 ptr idx mem)
   990  (MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c<<3] ptr mem)
   991  (MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c<<2] ptr mem)
   992  (MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c<<1] ptr mem)
   993  
   994  // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
   995  // these seem to have bad interaction with other rules, resulting in slower code
   996  //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
   997  //(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x)
   998  //(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x)
   999  //(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x)
  1000  //(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x)
  1001  //(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x)
  1002  //(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1003  //(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1004  //(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
  1005  
  1006  (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1007  (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1008  (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1009  (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1010  (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1011  (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1012  (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
  1013  
  1014  (MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1015  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1016  (MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
  1017  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1018  (MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1019  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1020  (MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
  1021  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1022  (MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1023  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1024  (MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
  1025  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1026  (MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
  1027  	&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
  1028  
  1029  (MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1030  (MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1031  (MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1032  (MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1033  (MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
  1034  
  1035  // don't extend after proper load
  1036  (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
  1037  (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1038  (MOVHreg x:(MOVBload _ _)) -> (MOVDreg x)
  1039  (MOVHreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1040  (MOVHreg x:(MOVHload _ _)) -> (MOVDreg x)
  1041  (MOVHUreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1042  (MOVHUreg x:(MOVHUload _ _)) -> (MOVDreg x)
  1043  (MOVWreg x:(MOVBload _ _)) -> (MOVDreg x)
  1044  (MOVWreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1045  (MOVWreg x:(MOVHload _ _)) -> (MOVDreg x)
  1046  (MOVWreg x:(MOVHUload _ _)) -> (MOVDreg x)
  1047  (MOVWreg x:(MOVWload _ _)) -> (MOVDreg x)
  1048  (MOVWUreg x:(MOVBUload _ _)) -> (MOVDreg x)
  1049  (MOVWUreg x:(MOVHUload _ _)) -> (MOVDreg x)
  1050  (MOVWUreg x:(MOVWUload _ _)) -> (MOVDreg x)
  1051  (MOVBreg x:(MOVBloadidx _  _ _)) -> (MOVDreg x)
  1052  (MOVBUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1053  (MOVHreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
  1054  (MOVHreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1055  (MOVHreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
  1056  (MOVHUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1057  (MOVHUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
  1058  (MOVWreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
  1059  (MOVWreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1060  (MOVWreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
  1061  (MOVWreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
  1062  (MOVWreg x:(MOVWloadidx _ _ _)) -> (MOVDreg x)
  1063  (MOVWUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
  1064  (MOVWUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
  1065  (MOVWUreg x:(MOVWUloadidx _ _ _)) -> (MOVDreg x)
  1066  (MOVHreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
  1067  (MOVHUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
  1068  (MOVWreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
  1069  (MOVWreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
  1070  (MOVWreg x:(MOVWloadidx4 _ _ _)) -> (MOVDreg x)
  1071  (MOVWUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
  1072  (MOVWUreg x:(MOVWUloadidx4 _ _ _)) -> (MOVDreg x)
  1073  
  1074  // fold double extensions
  1075  (MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
  1076  (MOVBUreg x:(MOVBUreg _)) -> (MOVDreg x)
  1077  (MOVHreg x:(MOVBreg _)) -> (MOVDreg x)
  1078  (MOVHreg x:(MOVBUreg _)) -> (MOVDreg x)
  1079  (MOVHreg x:(MOVHreg _)) -> (MOVDreg x)
  1080  (MOVHUreg x:(MOVBUreg _)) -> (MOVDreg x)
  1081  (MOVHUreg x:(MOVHUreg _)) -> (MOVDreg x)
  1082  (MOVWreg x:(MOVBreg _)) -> (MOVDreg x)
  1083  (MOVWreg x:(MOVBUreg _)) -> (MOVDreg x)
  1084  (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
  1085  (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
  1086  (MOVWreg x:(MOVWreg _)) -> (MOVDreg x)
  1087  (MOVWUreg x:(MOVBUreg _)) -> (MOVDreg x)
  1088  (MOVWUreg x:(MOVHUreg _)) -> (MOVDreg x)
  1089  (MOVWUreg x:(MOVWUreg _)) -> (MOVDreg x)
  1090  
  1091  // don't extend before store
  1092  (MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1093  (MOVBstore [off] {sym} ptr (MOVBUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1094  (MOVBstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1095  (MOVBstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1096  (MOVBstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1097  (MOVBstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
  1098  (MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1099  (MOVHstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1100  (MOVHstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1101  (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
  1102  (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
  1103  (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
  1104  (MOVBstoreidx ptr idx (MOVBreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1105  (MOVBstoreidx ptr idx (MOVBUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1106  (MOVBstoreidx ptr idx (MOVHreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1107  (MOVBstoreidx ptr idx (MOVHUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1108  (MOVBstoreidx ptr idx (MOVWreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1109  (MOVBstoreidx ptr idx (MOVWUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
  1110  (MOVHstoreidx ptr idx (MOVHreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1111  (MOVHstoreidx ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1112  (MOVHstoreidx ptr idx (MOVWreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1113  (MOVHstoreidx ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
  1114  (MOVWstoreidx ptr idx (MOVWreg x) mem) -> (MOVWstoreidx ptr idx x mem)
  1115  (MOVWstoreidx ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx ptr idx x mem)
  1116  (MOVHstoreidx2 ptr idx (MOVHreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1117  (MOVHstoreidx2 ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1118  (MOVHstoreidx2 ptr idx (MOVWreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1119  (MOVHstoreidx2 ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
  1120  (MOVWstoreidx4 ptr idx (MOVWreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
  1121  (MOVWstoreidx4 ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
  1122  
  1123  // if a register move has only 1 use, just use the same register without emitting instruction
  1124  // MOVDnop doesn't emit instruction, only for ensuring the type.
  1125  (MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
  1126  
  1127  // fold constant into arithmatic ops
  1128  (ADD x (MOVDconst [c])) -> (ADDconst [c] x)
  1129  (SUB x (MOVDconst [c])) -> (SUBconst [c] x)
  1130  (AND x (MOVDconst [c])) -> (ANDconst [c] x)
  1131  (OR  x (MOVDconst [c])) -> (ORconst  [c] x)
  1132  (XOR x (MOVDconst [c])) -> (XORconst [c] x)
  1133  (TST x (MOVDconst [c])) -> (TSTconst [c] x)
  1134  (TSTW x (MOVDconst [c])) -> (TSTWconst [c] x)
  1135  (CMN x (MOVDconst [c])) -> (CMNconst [c] x)
  1136  (CMNW x (MOVDconst [c])) -> (CMNWconst [c] x)
  1137  (BIC x (MOVDconst [c])) -> (ANDconst [^c] x)
  1138  (EON x (MOVDconst [c])) -> (XORconst [^c] x)
  1139  (ORN x (MOVDconst [c])) -> (ORconst  [^c] x)
  1140  
  1141  (SLL x (MOVDconst [c])) -> (SLLconst x [c&63]) // Note: I don't think we ever generate bad constant shifts (i.e. c>=64)
  1142  (SRL x (MOVDconst [c])) -> (SRLconst x [c&63])
  1143  (SRA x (MOVDconst [c])) -> (SRAconst x [c&63])
  1144  
  1145  (CMP x (MOVDconst [c])) -> (CMPconst [c] x)
  1146  (CMP (MOVDconst [c]) x) -> (InvertFlags (CMPconst [c] x))
  1147  (CMPW x (MOVDconst [c])) -> (CMPWconst [int64(int32(c))] x)
  1148  (CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst [int64(int32(c))] x))
  1149  
  1150  // mul-neg -> mneg
  1151  (NEG (MUL x y)) -> (MNEG x y)
  1152  (NEG (MULW x y)) -> (MNEGW x y)
  1153  (MUL (NEG x) y) -> (MNEG x y)
  1154  (MULW (NEG x) y) -> (MNEGW x y)
  1155  
  1156  // madd/msub
  1157  (ADD a l:(MUL  x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
  1158  (SUB a l:(MUL  x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
  1159  (ADD a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MSUB a x y)
  1160  (SUB a l:(MNEG x y)) && l.Uses==1 && clobber(l) -> (MADD a x y)
  1161  
  1162  (ADD a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
  1163  (SUB a l:(MULW  x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
  1164  (ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
  1165  (SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
  1166  
  1167  // mul by constant
  1168  (MUL x (MOVDconst [-1])) -> (NEG x)
  1169  (MUL _ (MOVDconst [0])) -> (MOVDconst [0])
  1170  (MUL x (MOVDconst [1])) -> x
  1171  (MUL x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
  1172  (MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
  1173  (MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
  1174  (MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1175  (MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1176  (MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1177  (MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1178  
  1179  (MULW x (MOVDconst [c])) && int32(c)==-1 -> (NEG x)
  1180  (MULW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
  1181  (MULW x (MOVDconst [c])) && int32(c)==1 -> x
  1182  (MULW x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
  1183  (MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
  1184  (MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
  1185  (MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
  1186  (MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
  1187  (MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
  1188  (MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
  1189  
  1190  // mneg by constant
  1191  (MNEG x (MOVDconst [-1])) -> x
  1192  (MNEG _ (MOVDconst [0])) -> (MOVDconst [0])
  1193  (MNEG x (MOVDconst [1])) -> (NEG x)
  1194  (MNEG x (MOVDconst [c])) && isPowerOfTwo(c) -> (NEG (SLLconst <x.Type> [log2(c)] x))
  1195  (MNEG x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1196  (MNEG x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
  1197  (MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1198  (MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1199  (MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1200  (MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1201  
  1202  (MNEGW x (MOVDconst [c])) && int32(c)==-1 -> x
  1203  (MNEGW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
  1204  (MNEGW x (MOVDconst [c])) && int32(c)==1 -> (NEG x)
  1205  (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c) -> (NEG (SLLconst <x.Type> [log2(c)] x))
  1206  (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1207  (MNEGW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
  1208  (MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
  1209  (MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
  1210  (MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
  1211  (MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
  1212  
  1213  (MADD a x (MOVDconst [-1])) -> (SUB a x)
  1214  (MADD a _ (MOVDconst [0])) -> a
  1215  (MADD a x (MOVDconst [1])) -> (ADD a x)
  1216  (MADD a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1217  (MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1218  (MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1219  (MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1220  (MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1221  (MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1222  (MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1223  
  1224  (MADD a (MOVDconst [-1]) x) -> (SUB a x)
  1225  (MADD a (MOVDconst [0]) _) -> a
  1226  (MADD a (MOVDconst [1]) x) -> (ADD a x)
  1227  (MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1228  (MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1229  (MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1230  (MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1231  (MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1232  (MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1233  (MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1234  
  1235  (MADDW a x (MOVDconst [c])) && int32(c)==-1 -> (SUB a x)
  1236  (MADDW a _ (MOVDconst [c])) && int32(c)==0 -> a
  1237  (MADDW a x (MOVDconst [c])) && int32(c)==1 -> (ADD a x)
  1238  (MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1239  (MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1240  (MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1241  (MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1242  (MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1243  (MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1244  (MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1245  
  1246  (MADDW a (MOVDconst [c]) x) && int32(c)==-1 -> (SUB a x)
  1247  (MADDW a (MOVDconst [c]) _) && int32(c)==0 -> a
  1248  (MADDW a (MOVDconst [c]) x) && int32(c)==1 -> (ADD a x)
  1249  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (ADDshiftLL a x [log2(c)])
  1250  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1251  (MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1252  (MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1253  (MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1254  (MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1255  (MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1256  
  1257  (MSUB a x (MOVDconst [-1])) -> (ADD a x)
  1258  (MSUB a _ (MOVDconst [0])) -> a
  1259  (MSUB a x (MOVDconst [1])) -> (SUB a x)
  1260  (MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1261  (MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1262  (MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1263  (MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1264  (MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1265  (MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1266  (MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1267  
  1268  (MSUB a (MOVDconst [-1]) x) -> (ADD a x)
  1269  (MSUB a (MOVDconst [0]) _) -> a
  1270  (MSUB a (MOVDconst [1]) x) -> (SUB a x)
  1271  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1272  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1273  (MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1274  (MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1275  (MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1276  (MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1277  (MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1278  
  1279  (MSUBW a x (MOVDconst [c])) && int32(c)==-1 -> (ADD a x)
  1280  (MSUBW a _ (MOVDconst [c])) && int32(c)==0 -> a
  1281  (MSUBW a x (MOVDconst [c])) && int32(c)==1 -> (SUB a x)
  1282  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1283  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1284  (MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1285  (MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1286  (MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1287  (MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1288  (MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1289  
  1290  (MSUBW a (MOVDconst [c]) x) && int32(c)==-1 -> (ADD a x)
  1291  (MSUBW a (MOVDconst [c]) _) && int32(c)==0 -> a
  1292  (MSUBW a (MOVDconst [c]) x) && int32(c)==1 -> (SUB a x)
  1293  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SUBshiftLL a x [log2(c)])
  1294  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 -> (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
  1295  (MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 -> (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
  1296  (MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
  1297  (MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
  1298  (MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
  1299  (MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
  1300  
  1301  // div by constant
  1302  (UDIV x (MOVDconst [1])) -> x
  1303  (UDIV x (MOVDconst [c])) && isPowerOfTwo(c) -> (SRLconst [log2(c)] x)
  1304  (UDIVW x (MOVDconst [c])) && uint32(c)==1 -> x
  1305  (UDIVW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) -> (SRLconst [log2(c)] x)
  1306  (UMOD _ (MOVDconst [1])) -> (MOVDconst [0])
  1307  (UMOD x (MOVDconst [c])) && isPowerOfTwo(c) -> (ANDconst [c-1] x)
  1308  (UMODW _ (MOVDconst [c])) && uint32(c)==1 -> (MOVDconst [0])
  1309  (UMODW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) -> (ANDconst [c-1] x)
  1310  
  1311  // generic simplifications
  1312  (ADD x (NEG y)) -> (SUB x y)
  1313  (SUB x x) -> (MOVDconst [0])
  1314  (AND x x) -> x
  1315  (OR  x x) -> x
  1316  (XOR x x) -> (MOVDconst [0])
  1317  (BIC x x) -> (MOVDconst [0])
  1318  (EON x x) -> (MOVDconst [-1])
  1319  (ORN x x) -> (MOVDconst [-1])
  1320  (AND x (MVN y)) -> (BIC x y)
  1321  (XOR x (MVN y)) -> (EON x y)
  1322  (OR  x (MVN y)) -> (ORN x y)
  1323  (CSEL {cc} x (MOVDconst [0]) flag) -> (CSEL0 {cc} x flag)
  1324  (CSEL {cc} (MOVDconst [0]) y flag) -> (CSEL0 {arm64Negate(cc.(Op))} y flag)
  1325  (SUB x (SUB y z)) -> (SUB (ADD <v.Type> x z) y)
  1326  (SUB (SUB x y) z) -> (SUB x (ADD <y.Type> y z))
  1327  
  1328  // remove redundant *const ops
  1329  (ADDconst [0]  x) -> x
  1330  (SUBconst [0]  x) -> x
  1331  (ANDconst [0]  _) -> (MOVDconst [0])
  1332  (ANDconst [-1] x) -> x
  1333  (ORconst  [0]  x) -> x
  1334  (ORconst  [-1] _) -> (MOVDconst [-1])
  1335  (XORconst [0]  x) -> x
  1336  (XORconst [-1] x) -> (MVN x)
  1337  
  1338  // generic constant folding
  1339  (ADDconst [c] (MOVDconst [d]))  -> (MOVDconst [c+d])
  1340  (ADDconst [c] (ADDconst [d] x)) -> (ADDconst [c+d] x)
  1341  (ADDconst [c] (SUBconst [d] x)) -> (ADDconst [c-d] x)
  1342  (SUBconst [c] (MOVDconst [d]))  -> (MOVDconst [d-c])
  1343  (SUBconst [c] (SUBconst [d] x)) -> (ADDconst [-c-d] x)
  1344  (SUBconst [c] (ADDconst [d] x)) -> (ADDconst [-c+d] x)
  1345  (SLLconst [c] (MOVDconst [d]))  -> (MOVDconst [d<<uint64(c)])
  1346  (SRLconst [c] (MOVDconst [d]))  -> (MOVDconst [int64(uint64(d)>>uint64(c))])
  1347  (SRAconst [c] (MOVDconst [d]))  -> (MOVDconst [d>>uint64(c)])
  1348  (MUL   (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c*d])
  1349  (MULW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)*int32(d))])
  1350  (MNEG  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-c*d])
  1351  (MNEGW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [-int64(int32(c)*int32(d))])
  1352  (MADD  (MOVDconst [c]) x y) -> (ADDconst [c] (MUL   <x.Type> x y))
  1353  (MADDW (MOVDconst [c]) x y) -> (ADDconst [c] (MULW  <x.Type> x y))
  1354  (MSUB  (MOVDconst [c]) x y) -> (ADDconst [c] (MNEG  <x.Type> x y))
  1355  (MSUBW (MOVDconst [c]) x y) -> (ADDconst [c] (MNEGW <x.Type> x y))
  1356  (MADD  a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [c*d] a)
  1357  (MADDW a (MOVDconst [c]) (MOVDconst [d])) -> (ADDconst [int64(int32(c)*int32(d))] a)
  1358  (MSUB  a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [c*d] a)
  1359  (MSUBW a (MOVDconst [c]) (MOVDconst [d])) -> (SUBconst [int64(int32(c)*int32(d))] a)
  1360  (DIV   (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c/d])
  1361  (UDIV  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint64(c)/uint64(d))])
  1362  (DIVW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)/int32(d))])
  1363  (UDIVW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint32(c)/uint32(d))])
  1364  (MOD   (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c%d])
  1365  (UMOD  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint64(c)%uint64(d))])
  1366  (MODW  (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(int32(c)%int32(d))])
  1367  (UMODW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint32(c)%uint32(d))])
  1368  (ANDconst [c] (MOVDconst [d]))  -> (MOVDconst [c&d])
  1369  (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
  1370  (ANDconst [c] (MOVWUreg x)) -> (ANDconst [c&(1<<32-1)] x)
  1371  (ANDconst [c] (MOVHUreg x)) -> (ANDconst [c&(1<<16-1)] x)
  1372  (ANDconst [c] (MOVBUreg x)) -> (ANDconst [c&(1<<8-1)] x)
  1373  (MOVWUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<32-1)] x)
  1374  (MOVHUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<16-1)] x)
  1375  (MOVBUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<8-1)] x)
  1376  (ORconst  [c] (MOVDconst [d]))  -> (MOVDconst [c|d])
  1377  (ORconst  [c] (ORconst [d] x))  -> (ORconst [c|d] x)
  1378  (XORconst [c] (MOVDconst [d]))  -> (MOVDconst [c^d])
  1379  (XORconst [c] (XORconst [d] x)) -> (XORconst [c^d] x)
  1380  (MVN (MOVDconst [c])) -> (MOVDconst [^c])
  1381  (NEG (MOVDconst [c])) -> (MOVDconst [-c])
  1382  (MOVBreg  (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
  1383  (MOVBUreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
  1384  (MOVHreg  (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
  1385  (MOVHUreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
  1386  (MOVWreg  (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
  1387  (MOVWUreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
  1388  (MOVDreg  (MOVDconst [c])) -> (MOVDconst [c])
  1389  
  1390  // constant comparisons
  1391  (CMPconst  (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
  1392  (CMPconst  (MOVDconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT)
  1393  (CMPconst  (MOVDconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT)
  1394  (CMPconst  (MOVDconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT)
  1395  (CMPconst  (MOVDconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT)
  1396  (CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
  1397  (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT)
  1398  (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
  1399  (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
  1400  (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
  1401  (TSTconst  (MOVDconst [x]) [y]) && int64(x&y)==0 -> (FlagEQ)
  1402  (TSTconst  (MOVDconst [x]) [y]) && int64(x&y)<0  -> (FlagLT_UGT)
  1403  (TSTconst  (MOVDconst [x]) [y]) && int64(x&y)>0  -> (FlagGT_UGT)
  1404  (TSTWconst (MOVDconst [x]) [y]) && int32(x&y)==0 -> (FlagEQ)
  1405  (TSTWconst (MOVDconst [x]) [y]) && int32(x&y)<0  -> (FlagLT_UGT)
  1406  (TSTWconst (MOVDconst [x]) [y]) && int32(x&y)>0  -> (FlagGT_UGT)
  1407  (CMNconst  (MOVDconst [x]) [y]) && int64(x)==int64(-y) -> (FlagEQ)
  1408  (CMNconst  (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)<uint64(-y) -> (FlagLT_ULT)
  1409  (CMNconst  (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)>uint64(-y) -> (FlagLT_UGT)
  1410  (CMNconst  (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)<uint64(-y) -> (FlagGT_ULT)
  1411  (CMNconst  (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)>uint64(-y) -> (FlagGT_UGT)
  1412  (CMNWconst (MOVDconst [x]) [y]) && int32(x)==int32(-y) -> (FlagEQ)
  1413  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)<uint32(-y) -> (FlagLT_ULT)
  1414  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)>uint32(-y) -> (FlagLT_UGT)
  1415  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)<uint32(-y) -> (FlagGT_ULT)
  1416  (CMNWconst  (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)>uint32(-y) -> (FlagGT_UGT)
  1417  
  1418  
  1419  // other known comparisons
  1420  (CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT_ULT)
  1421  (CMPconst (MOVHUreg _) [c]) && 0xffff < c -> (FlagLT_ULT)
  1422  (CMPconst (MOVWUreg _) [c]) && 0xffffffff < c -> (FlagLT_ULT)
  1423  (CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT)
  1424  (CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT)
  1425  (CMPWconst (MOVBUreg _) [c]) && 0xff < int32(c) -> (FlagLT_ULT)
  1426  (CMPWconst (MOVHUreg _) [c]) && 0xffff < int32(c) -> (FlagLT_ULT)
  1427  
  1428  // absorb flag constants into branches
  1429  (EQ (FlagEQ) yes no) -> (First nil yes no)
  1430  (EQ (FlagLT_ULT) yes no) -> (First nil no yes)
  1431  (EQ (FlagLT_UGT) yes no) -> (First nil no yes)
  1432  (EQ (FlagGT_ULT) yes no) -> (First nil no yes)
  1433  (EQ (FlagGT_UGT) yes no) -> (First nil no yes)
  1434  
  1435  (NE (FlagEQ) yes no) -> (First nil no yes)
  1436  (NE (FlagLT_ULT) yes no) -> (First nil yes no)
  1437  (NE (FlagLT_UGT) yes no) -> (First nil yes no)
  1438  (NE (FlagGT_ULT) yes no) -> (First nil yes no)
  1439  (NE (FlagGT_UGT) yes no) -> (First nil yes no)
  1440  
  1441  (LT (FlagEQ) yes no) -> (First nil no yes)
  1442  (LT (FlagLT_ULT) yes no) -> (First nil yes no)
  1443  (LT (FlagLT_UGT) yes no) -> (First nil yes no)
  1444  (LT (FlagGT_ULT) yes no) -> (First nil no yes)
  1445  (LT (FlagGT_UGT) yes no) -> (First nil no yes)
  1446  
  1447  (LE (FlagEQ) yes no) -> (First nil yes no)
  1448  (LE (FlagLT_ULT) yes no) -> (First nil yes no)
  1449  (LE (FlagLT_UGT) yes no) -> (First nil yes no)
  1450  (LE (FlagGT_ULT) yes no) -> (First nil no yes)
  1451  (LE (FlagGT_UGT) yes no) -> (First nil no yes)
  1452  
  1453  (GT (FlagEQ) yes no) -> (First nil no yes)
  1454  (GT (FlagLT_ULT) yes no) -> (First nil no yes)
  1455  (GT (FlagLT_UGT) yes no) -> (First nil no yes)
  1456  (GT (FlagGT_ULT) yes no) -> (First nil yes no)
  1457  (GT (FlagGT_UGT) yes no) -> (First nil yes no)
  1458  
  1459  (GE (FlagEQ) yes no) -> (First nil yes no)
  1460  (GE (FlagLT_ULT) yes no) -> (First nil no yes)
  1461  (GE (FlagLT_UGT) yes no) -> (First nil no yes)
  1462  (GE (FlagGT_ULT) yes no) -> (First nil yes no)
  1463  (GE (FlagGT_UGT) yes no) -> (First nil yes no)
  1464  
  1465  (ULT (FlagEQ) yes no) -> (First nil no yes)
  1466  (ULT (FlagLT_ULT) yes no) -> (First nil yes no)
  1467  (ULT (FlagLT_UGT) yes no) -> (First nil no yes)
  1468  (ULT (FlagGT_ULT) yes no) -> (First nil yes no)
  1469  (ULT (FlagGT_UGT) yes no) -> (First nil no yes)
  1470  
  1471  (ULE (FlagEQ) yes no) -> (First nil yes no)
  1472  (ULE (FlagLT_ULT) yes no) -> (First nil yes no)
  1473  (ULE (FlagLT_UGT) yes no) -> (First nil no yes)
  1474  (ULE (FlagGT_ULT) yes no) -> (First nil yes no)
  1475  (ULE (FlagGT_UGT) yes no) -> (First nil no yes)
  1476  
  1477  (UGT (FlagEQ) yes no) -> (First nil no yes)
  1478  (UGT (FlagLT_ULT) yes no) -> (First nil no yes)
  1479  (UGT (FlagLT_UGT) yes no) -> (First nil yes no)
  1480  (UGT (FlagGT_ULT) yes no) -> (First nil no yes)
  1481  (UGT (FlagGT_UGT) yes no) -> (First nil yes no)
  1482  
  1483  (UGE (FlagEQ) yes no) -> (First nil yes no)
  1484  (UGE (FlagLT_ULT) yes no) -> (First nil no yes)
  1485  (UGE (FlagLT_UGT) yes no) -> (First nil yes no)
  1486  (UGE (FlagGT_ULT) yes no) -> (First nil no yes)
  1487  (UGE (FlagGT_UGT) yes no) -> (First nil yes no)
  1488  
  1489  (Z (MOVDconst [0]) yes no) -> (First nil yes no)
  1490  (Z (MOVDconst [c]) yes no) && c != 0 -> (First nil no yes)
  1491  (NZ (MOVDconst [0]) yes no) -> (First nil no yes)
  1492  (NZ (MOVDconst [c]) yes no) && c != 0 -> (First nil yes no)
  1493  (ZW (MOVDconst [c]) yes no) && int32(c) == 0 -> (First nil yes no)
  1494  (ZW (MOVDconst [c]) yes no) && int32(c) != 0 -> (First nil no yes)
  1495  (NZW (MOVDconst [c]) yes no) && int32(c) == 0 -> (First nil no yes)
  1496  (NZW (MOVDconst [c]) yes no) && int32(c) != 0 -> (First nil yes no)
  1497  
  1498  // absorb InvertFlags into branches
  1499  (LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
  1500  (GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
  1501  (LE (InvertFlags cmp) yes no) -> (GE cmp yes no)
  1502  (GE (InvertFlags cmp) yes no) -> (LE cmp yes no)
  1503  (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no)
  1504  (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no)
  1505  (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no)
  1506  (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no)
  1507  (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no)
  1508  (NE (InvertFlags cmp) yes no) -> (NE cmp yes no)
  1509  
  1510  // absorb InvertFlags into CSEL(0)
  1511  (CSEL {cc} x y (InvertFlags cmp)) -> (CSEL {arm64Invert(cc.(Op))} x y cmp)
  1512  (CSEL0 {cc} x (InvertFlags cmp)) -> (CSEL0 {arm64Invert(cc.(Op))} x cmp)
  1513  
  1514  // absorb flag constants into boolean values
  1515  (Equal (FlagEQ)) -> (MOVDconst [1])
  1516  (Equal (FlagLT_ULT)) -> (MOVDconst [0])
  1517  (Equal (FlagLT_UGT)) -> (MOVDconst [0])
  1518  (Equal (FlagGT_ULT)) -> (MOVDconst [0])
  1519  (Equal (FlagGT_UGT)) -> (MOVDconst [0])
  1520  
  1521  (NotEqual (FlagEQ)) -> (MOVDconst [0])
  1522  (NotEqual (FlagLT_ULT)) -> (MOVDconst [1])
  1523  (NotEqual (FlagLT_UGT)) -> (MOVDconst [1])
  1524  (NotEqual (FlagGT_ULT)) -> (MOVDconst [1])
  1525  (NotEqual (FlagGT_UGT)) -> (MOVDconst [1])
  1526  
  1527  (LessThan (FlagEQ)) -> (MOVDconst [0])
  1528  (LessThan (FlagLT_ULT)) -> (MOVDconst [1])
  1529  (LessThan (FlagLT_UGT)) -> (MOVDconst [1])
  1530  (LessThan (FlagGT_ULT)) -> (MOVDconst [0])
  1531  (LessThan (FlagGT_UGT)) -> (MOVDconst [0])
  1532  
  1533  (LessThanU (FlagEQ)) -> (MOVDconst [0])
  1534  (LessThanU (FlagLT_ULT)) -> (MOVDconst [1])
  1535  (LessThanU (FlagLT_UGT)) -> (MOVDconst [0])
  1536  (LessThanU (FlagGT_ULT)) -> (MOVDconst [1])
  1537  (LessThanU (FlagGT_UGT)) -> (MOVDconst [0])
  1538  
  1539  (LessEqual (FlagEQ)) -> (MOVDconst [1])
  1540  (LessEqual (FlagLT_ULT)) -> (MOVDconst [1])
  1541  (LessEqual (FlagLT_UGT)) -> (MOVDconst [1])
  1542  (LessEqual (FlagGT_ULT)) -> (MOVDconst [0])
  1543  (LessEqual (FlagGT_UGT)) -> (MOVDconst [0])
  1544  
  1545  (LessEqualU (FlagEQ)) -> (MOVDconst [1])
  1546  (LessEqualU (FlagLT_ULT)) -> (MOVDconst [1])
  1547  (LessEqualU (FlagLT_UGT)) -> (MOVDconst [0])
  1548  (LessEqualU (FlagGT_ULT)) -> (MOVDconst [1])
  1549  (LessEqualU (FlagGT_UGT)) -> (MOVDconst [0])
  1550  
  1551  (GreaterThan (FlagEQ)) -> (MOVDconst [0])
  1552  (GreaterThan (FlagLT_ULT)) -> (MOVDconst [0])
  1553  (GreaterThan (FlagLT_UGT)) -> (MOVDconst [0])
  1554  (GreaterThan (FlagGT_ULT)) -> (MOVDconst [1])
  1555  (GreaterThan (FlagGT_UGT)) -> (MOVDconst [1])
  1556  
  1557  (GreaterThanU (FlagEQ)) -> (MOVDconst [0])
  1558  (GreaterThanU (FlagLT_ULT)) -> (MOVDconst [0])
  1559  (GreaterThanU (FlagLT_UGT)) -> (MOVDconst [1])
  1560  (GreaterThanU (FlagGT_ULT)) -> (MOVDconst [0])
  1561  (GreaterThanU (FlagGT_UGT)) -> (MOVDconst [1])
  1562  
  1563  (GreaterEqual (FlagEQ)) -> (MOVDconst [1])
  1564  (GreaterEqual (FlagLT_ULT)) -> (MOVDconst [0])
  1565  (GreaterEqual (FlagLT_UGT)) -> (MOVDconst [0])
  1566  (GreaterEqual (FlagGT_ULT)) -> (MOVDconst [1])
  1567  (GreaterEqual (FlagGT_UGT)) -> (MOVDconst [1])
  1568  
  1569  (GreaterEqualU (FlagEQ)) -> (MOVDconst [1])
  1570  (GreaterEqualU (FlagLT_ULT)) -> (MOVDconst [0])
  1571  (GreaterEqualU (FlagLT_UGT)) -> (MOVDconst [1])
  1572  (GreaterEqualU (FlagGT_ULT)) -> (MOVDconst [0])
  1573  (GreaterEqualU (FlagGT_UGT)) -> (MOVDconst [1])
  1574  
  1575  // absorb InvertFlags into boolean values
  1576  (Equal (InvertFlags x)) -> (Equal x)
  1577  (NotEqual (InvertFlags x)) -> (NotEqual x)
  1578  (LessThan (InvertFlags x)) -> (GreaterThan x)
  1579  (LessThanU (InvertFlags x)) -> (GreaterThanU x)
  1580  (GreaterThan (InvertFlags x)) -> (LessThan x)
  1581  (GreaterThanU (InvertFlags x)) -> (LessThanU x)
  1582  (LessEqual (InvertFlags x)) -> (GreaterEqual x)
  1583  (LessEqualU (InvertFlags x)) -> (GreaterEqualU x)
  1584  (GreaterEqual (InvertFlags x)) -> (LessEqual x)
  1585  (GreaterEqualU (InvertFlags x)) -> (LessEqualU x)
  1586  
  1587  // Boolean-generating instructions always
  1588  // zero upper bit of the register; no need to zero-extend
  1589  (MOVBUreg x) && x.Type.IsBoolean() -> (MOVDreg x)
  1590  
  1591  // absorb flag constants into conditional instructions
  1592  (CSEL {cc} x _ flag) && ccARM64Eval(cc, flag) > 0 -> x
  1593  (CSEL {cc} _ y flag) && ccARM64Eval(cc, flag) < 0 -> y
  1594  (CSEL0 {cc} x flag) && ccARM64Eval(cc, flag) > 0 -> x
  1595  (CSEL0 {cc} _ flag) && ccARM64Eval(cc, flag) < 0 -> (MOVDconst [0])
  1596  
  1597  // absorb flags back into boolean CSEL
  1598  (CSEL {cc} x y (CMPWconst [0] bool)) && cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil ->
  1599        (CSEL {bool.Op} x y flagArg(bool))
  1600  (CSEL {cc} x y (CMPWconst [0] bool)) && cc.(Op) == OpARM64Equal && flagArg(bool) != nil ->
  1601        (CSEL {arm64Negate(bool.Op)} x y flagArg(bool))
  1602  (CSEL0 {cc} x (CMPWconst [0] bool)) && cc.(Op) == OpARM64NotEqual && flagArg(bool) != nil ->
  1603        (CSEL0 {bool.Op} x flagArg(bool))
  1604  (CSEL0 {cc} x (CMPWconst [0] bool)) && cc.(Op) == OpARM64Equal && flagArg(bool) != nil ->
  1605        (CSEL0 {arm64Negate(bool.Op)} x flagArg(bool))
  1606  
  1607  // absorb shifts into ops
  1608  (NEG x:(SLLconst [c] y)) && clobberIfDead(x) -> (NEGshiftLL [c] y)
  1609  (NEG x:(SRLconst [c] y)) && clobberIfDead(x) -> (NEGshiftRL [c] y)
  1610  (NEG x:(SRAconst [c] y)) && clobberIfDead(x) -> (NEGshiftRA [c] y)
  1611  (MVN x:(SLLconst [c] y)) && clobberIfDead(x) -> (MVNshiftLL [c] y)
  1612  (MVN x:(SRLconst [c] y)) && clobberIfDead(x) -> (MVNshiftRL [c] y)
  1613  (MVN x:(SRAconst [c] y)) && clobberIfDead(x) -> (MVNshiftRA [c] y)
  1614  (ADD x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ADDshiftLL x0 y [c])
  1615  (ADD x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ADDshiftRL x0 y [c])
  1616  (ADD x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ADDshiftRA x0 y [c])
  1617  (SUB x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (SUBshiftLL x0 y [c])
  1618  (SUB x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (SUBshiftRL x0 y [c])
  1619  (SUB x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (SUBshiftRA x0 y [c])
  1620  (AND x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ANDshiftLL x0 y [c])
  1621  (AND x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ANDshiftRL x0 y [c])
  1622  (AND x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ANDshiftRA x0 y [c])
  1623  (OR  x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ORshiftLL  x0 y [c]) // useful for combined load
  1624  (OR  x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ORshiftRL  x0 y [c])
  1625  (OR  x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ORshiftRA  x0 y [c])
  1626  (XOR x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (XORshiftLL x0 y [c])
  1627  (XOR x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (XORshiftRL x0 y [c])
  1628  (XOR x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (XORshiftRA x0 y [c])
  1629  (BIC x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (BICshiftLL x0 y [c])
  1630  (BIC x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (BICshiftRL x0 y [c])
  1631  (BIC x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (BICshiftRA x0 y [c])
  1632  (ORN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (ORNshiftLL x0 y [c])
  1633  (ORN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (ORNshiftRL x0 y [c])
  1634  (ORN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (ORNshiftRA x0 y [c])
  1635  (EON x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (EONshiftLL x0 y [c])
  1636  (EON x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (EONshiftRL x0 y [c])
  1637  (EON x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (EONshiftRA x0 y [c])
  1638  (CMP x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (CMPshiftLL x0 y [c])
  1639  (CMP x0:(SLLconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftLL x1 y [c]))
  1640  (CMP x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (CMPshiftRL x0 y [c])
  1641  (CMP x0:(SRLconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftRL x1 y [c]))
  1642  (CMP x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (CMPshiftRA x0 y [c])
  1643  (CMP x0:(SRAconst [c] y) x1) && clobberIfDead(x0) -> (InvertFlags (CMPshiftRA x1 y [c]))
  1644  (CMN x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (CMNshiftLL x0 y [c])
  1645  (CMN x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (CMNshiftRL x0 y [c])
  1646  (CMN x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (CMNshiftRA x0 y [c])
  1647  (TST x0 x1:(SLLconst [c] y)) && clobberIfDead(x1) -> (TSTshiftLL x0 y [c])
  1648  (TST x0 x1:(SRLconst [c] y)) && clobberIfDead(x1) -> (TSTshiftRL x0 y [c])
  1649  (TST x0 x1:(SRAconst [c] y)) && clobberIfDead(x1) -> (TSTshiftRA x0 y [c])
  1650  
  1651  // prefer *const ops to *shift ops
  1652  (ADDshiftLL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SLLconst <x.Type> x [d]))
  1653  (ADDshiftRL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRLconst <x.Type> x [d]))
  1654  (ADDshiftRA (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRAconst <x.Type> x [d]))
  1655  (ANDshiftLL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SLLconst <x.Type> x [d]))
  1656  (ANDshiftRL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRLconst <x.Type> x [d]))
  1657  (ANDshiftRA (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRAconst <x.Type> x [d]))
  1658  (ORshiftLL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SLLconst <x.Type> x [d]))
  1659  (ORshiftRL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRLconst <x.Type> x [d]))
  1660  (ORshiftRA  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRAconst <x.Type> x [d]))
  1661  (XORshiftLL (MOVDconst [c]) x [d]) -> (XORconst [c] (SLLconst <x.Type> x [d]))
  1662  (XORshiftRL (MOVDconst [c]) x [d]) -> (XORconst [c] (SRLconst <x.Type> x [d]))
  1663  (XORshiftRA (MOVDconst [c]) x [d]) -> (XORconst [c] (SRAconst <x.Type> x [d]))
  1664  (CMPshiftLL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
  1665  (CMPshiftRL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
  1666  (CMPshiftRA (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
  1667  (CMNshiftLL (MOVDconst [c]) x [d]) -> (CMNconst [c] (SLLconst <x.Type> x [d]))
  1668  (CMNshiftRL (MOVDconst [c]) x [d]) -> (CMNconst [c] (SRLconst <x.Type> x [d]))
  1669  (CMNshiftRA (MOVDconst [c]) x [d]) -> (CMNconst [c] (SRAconst <x.Type> x [d]))
  1670  (TSTshiftLL (MOVDconst [c]) x [d]) -> (TSTconst [c] (SLLconst <x.Type> x [d]))
  1671  (TSTshiftRL (MOVDconst [c]) x [d]) -> (TSTconst [c] (SRLconst <x.Type> x [d]))
  1672  (TSTshiftRA (MOVDconst [c]) x [d]) -> (TSTconst [c] (SRAconst <x.Type> x [d]))
  1673  
  1674  // constant folding in *shift ops
  1675  (MVNshiftLL (MOVDconst [c]) [d]) -> (MOVDconst [^int64(uint64(c)<<uint64(d))])
  1676  (MVNshiftRL (MOVDconst [c]) [d]) -> (MOVDconst [^int64(uint64(c)>>uint64(d))])
  1677  (MVNshiftRA (MOVDconst [c]) [d]) -> (MOVDconst [^(c>>uint64(d))])
  1678  (NEGshiftLL (MOVDconst [c]) [d]) -> (MOVDconst [-int64(uint64(c)<<uint64(d))])
  1679  (NEGshiftRL (MOVDconst [c]) [d]) -> (MOVDconst [-int64(uint64(c)>>uint64(d))])
  1680  (NEGshiftRA (MOVDconst [c]) [d]) -> (MOVDconst [-(c>>uint64(d))])
  1681  (ADDshiftLL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)<<uint64(d))])
  1682  (ADDshiftRL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)>>uint64(d))])
  1683  (ADDshiftRA x (MOVDconst [c]) [d]) -> (ADDconst x [c>>uint64(d)])
  1684  (SUBshiftLL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)<<uint64(d))])
  1685  (SUBshiftRL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)>>uint64(d))])
  1686  (SUBshiftRA x (MOVDconst [c]) [d]) -> (SUBconst x [c>>uint64(d)])
  1687  (ANDshiftLL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)<<uint64(d))])
  1688  (ANDshiftRL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)>>uint64(d))])
  1689  (ANDshiftRA x (MOVDconst [c]) [d]) -> (ANDconst x [c>>uint64(d)])
  1690  (ORshiftLL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)<<uint64(d))])
  1691  (ORshiftRL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)>>uint64(d))])
  1692  (ORshiftRA  x (MOVDconst [c]) [d]) -> (ORconst  x [c>>uint64(d)])
  1693  (XORshiftLL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)<<uint64(d))])
  1694  (XORshiftRL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)>>uint64(d))])
  1695  (XORshiftRA x (MOVDconst [c]) [d]) -> (XORconst x [c>>uint64(d)])
  1696  (BICshiftLL x (MOVDconst [c]) [d]) -> (ANDconst x [^int64(uint64(c)<<uint64(d))])
  1697  (BICshiftRL x (MOVDconst [c]) [d]) -> (ANDconst x [^int64(uint64(c)>>uint64(d))])
  1698  (BICshiftRA x (MOVDconst [c]) [d]) -> (ANDconst x [^(c>>uint64(d))])
  1699  (ORNshiftLL x (MOVDconst [c]) [d]) -> (ORconst  x [^int64(uint64(c)<<uint64(d))])
  1700  (ORNshiftRL x (MOVDconst [c]) [d]) -> (ORconst  x [^int64(uint64(c)>>uint64(d))])
  1701  (ORNshiftRA x (MOVDconst [c]) [d]) -> (ORconst  x [^(c>>uint64(d))])
  1702  (EONshiftLL x (MOVDconst [c]) [d]) -> (XORconst x [^int64(uint64(c)<<uint64(d))])
  1703  (EONshiftRL x (MOVDconst [c]) [d]) -> (XORconst x [^int64(uint64(c)>>uint64(d))])
  1704  (EONshiftRA x (MOVDconst [c]) [d]) -> (XORconst x [^(c>>uint64(d))])
  1705  (CMPshiftLL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)<<uint64(d))])
  1706  (CMPshiftRL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)>>uint64(d))])
  1707  (CMPshiftRA x (MOVDconst [c]) [d]) -> (CMPconst x [c>>uint64(d)])
  1708  (CMNshiftLL x (MOVDconst [c]) [d]) -> (CMNconst x [int64(uint64(c)<<uint64(d))])
  1709  (CMNshiftRL x (MOVDconst [c]) [d]) -> (CMNconst x [int64(uint64(c)>>uint64(d))])
  1710  (CMNshiftRA x (MOVDconst [c]) [d]) -> (CMNconst x [c>>uint64(d)])
  1711  (TSTshiftLL x (MOVDconst [c]) [d]) -> (TSTconst x [int64(uint64(c)<<uint64(d))])
  1712  (TSTshiftRL x (MOVDconst [c]) [d]) -> (TSTconst x [int64(uint64(c)>>uint64(d))])
  1713  (TSTshiftRA x (MOVDconst [c]) [d]) -> (TSTconst x [c>>uint64(d)])
  1714  
  1715  // simplification with *shift ops
  1716  (SUBshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1717  (SUBshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1718  (SUBshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1719  (ANDshiftLL x y:(SLLconst x [c]) [d]) && c==d -> y
  1720  (ANDshiftRL x y:(SRLconst x [c]) [d]) && c==d -> y
  1721  (ANDshiftRA x y:(SRAconst x [c]) [d]) && c==d -> y
  1722  (ORshiftLL  x y:(SLLconst x [c]) [d]) && c==d -> y
  1723  (ORshiftRL  x y:(SRLconst x [c]) [d]) && c==d -> y
  1724  (ORshiftRA  x y:(SRAconst x [c]) [d]) && c==d -> y
  1725  (XORshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1726  (XORshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1727  (XORshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1728  (BICshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1729  (BICshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1730  (BICshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
  1731  (EONshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1732  (EONshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1733  (EONshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1734  (ORNshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1735  (ORNshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1736  (ORNshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [-1])
  1737  
  1738  // Generate rotates with const shift
  1739  (ADDshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
  1740  ( ORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
  1741  (XORshiftLL [c] (SRLconst x [64-c]) x) -> (RORconst [64-c] x)
  1742  (ADDshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [   c] x)
  1743  ( ORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [   c] x)
  1744  (XORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [   c] x)
  1745  
  1746  (ADDshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
  1747  	-> (RORWconst [32-c] x)
  1748  ( ORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
  1749  	-> (RORWconst [32-c] x)
  1750  (XORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
  1751  	-> (RORWconst [32-c] x)
  1752  (ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
  1753  ( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
  1754  (XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
  1755  
  1756  (RORconst [c] (RORconst [d] x)) -> (RORconst [(c+d)&63] x)
  1757  (RORWconst [c] (RORWconst [d] x)) -> (RORWconst [(c+d)&31] x)
  1758  
  1759  // Generate rotates with non-const shift.
  1760  // These rules match the Go source code like
  1761  //	y &= 63
  1762  //	x << y | x >> (64-y)
  1763  // "|" can also be "^" or "+".
  1764  // As arm64 does not have a ROL instruction, so ROL(x, y) is replaced by ROR(x, -y).
  1765  ((ADD|OR|XOR) (SLL x (ANDconst <t> [63] y))
  1766  	(CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
  1767  		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
  1768  	-> (ROR x (NEG <t> y))
  1769  ((ADD|OR|XOR) (SRL <typ.UInt64> x (ANDconst <t> [63] y))
  1770  	(CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
  1771  		(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc.(Op) == OpARM64LessThanU
  1772  	-> (ROR x y)
  1773  
  1774  // These rules match the Go source code like
  1775  //	y &= 31
  1776  //	x << y | x >> (32-y)
  1777  // "|" can also be "^" or "+".
  1778  // As arm64 does not have a ROLW instruction, so ROLW(x, y) is replaced by RORW(x, -y).
  1779  ((ADD|OR|XOR) (SLL x (ANDconst <t> [31] y))
  1780  	(CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
  1781  		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
  1782  	-> (RORW x (NEG <t> y))
  1783  ((ADD|OR|XOR) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y))
  1784  	(CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
  1785  		(CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc.(Op) == OpARM64LessThanU
  1786  	-> (RORW x y)
  1787  
  1788  // Extract from reg pair
  1789  (ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
  1790  ( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
  1791  (XORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
  1792  
  1793  (ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
  1794  	-> (EXTRWconst [32-c] x2 x)
  1795  ( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
  1796  	-> (EXTRWconst [32-c] x2 x)
  1797  (XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
  1798  	-> (EXTRWconst [32-c] x2 x)
  1799  
  1800  // Generic rules rewrite certain AND to a pair of shifts.
  1801  // However, on ARM64 the bitmask can fit into an instruction.
  1802  // Rewrite it back to AND.
  1803  (SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
  1804  (SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
  1805  
  1806  // Special case setting bit as 1. An example is math.Copysign(c,-1)
  1807  (ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0  -> (ORconst [c1] x)
  1808  
  1809  // bitfield ops
  1810  
  1811  // sbfiz
  1812  // (x << lc) >> rc
  1813  (SRAconst [rc] (SLLconst [lc] x)) && lc > rc -> (SBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
  1814  (MOVWreg (SLLconst [lc] x)) && lc < 32 -> (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
  1815  (MOVHreg (SLLconst [lc] x)) && lc < 16 -> (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
  1816  (MOVBreg (SLLconst [lc] x)) && lc < 8 -> (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x)
  1817  
  1818  // sbfx
  1819  // (x << lc) >> rc
  1820  (SRAconst [rc] (SLLconst [lc] x)) && lc <= rc -> (SBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
  1821  (SRAconst [rc] (MOVWreg x)) && rc < 32 -> (SBFX [arm64BFAuxInt(rc, 32-rc)] x)
  1822  (SRAconst [rc] (MOVHreg x)) && rc < 16 -> (SBFX [arm64BFAuxInt(rc, 16-rc)] x)
  1823  (SRAconst [rc] (MOVBreg x)) && rc < 8 -> (SBFX [arm64BFAuxInt(rc, 8-rc)] x)
  1824  
  1825  // sbfiz/sbfx combinations: merge shifts into bitfield ops
  1826  (SRAconst [sc] (SBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
  1827  	-> (SBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
  1828  (SRAconst [sc] (SBFIZ [bfc] x)) && sc >= getARM64BFlsb(bfc)
  1829  	&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
  1830  	-> (SBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
  1831  
  1832  // ubfiz
  1833  // (x & ac) << sc
  1834  (SLLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, 0)
  1835  	-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  1836  (SLLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 32)] x)
  1837  (SLLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 16)] x)
  1838  (SLLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 8)] x)
  1839  // (x << sc) & ac
  1840  (ANDconst [ac] (SLLconst [sc] x)) && isARM64BFMask(sc, ac, sc)
  1841  	-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  1842  (MOVWUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, sc)
  1843  	-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
  1844  (MOVHUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, sc)
  1845  	-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
  1846  (MOVBUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, sc)
  1847  	-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
  1848  // (x << lc) >> rc
  1849  (SRLconst [rc] (SLLconst [lc] x)) && lc > rc -> (UBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
  1850  
  1851  // ubfx
  1852  // (x >> sc) & ac
  1853  (ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0)
  1854  	-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
  1855  (MOVWUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFX [arm64BFAuxInt(sc, 32)] x)
  1856  (MOVHUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFX [arm64BFAuxInt(sc, 16)] x)
  1857  (MOVBUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFX [arm64BFAuxInt(sc, 8)] x)
  1858  // (x & ac) >> sc
  1859  (SRLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, sc)
  1860  	-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
  1861  (SRLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, sc)
  1862  	-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
  1863  (SRLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, sc)
  1864  	-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
  1865  (SRLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, sc)
  1866  	-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
  1867  // (x << lc) >> rc
  1868  (SRLconst [rc] (SLLconst [lc] x)) && lc < rc -> (UBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
  1869  
  1870  // ubfiz/ubfx combinations: merge shifts into bitfield ops
  1871  (SRLconst [sc] (UBFX [bfc] x)) && sc < getARM64BFwidth(bfc)
  1872  	-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
  1873  (UBFX [bfc] (SRLconst [sc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
  1874  	-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
  1875  (SLLconst [sc] (UBFIZ [bfc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
  1876  	-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
  1877  (UBFIZ [bfc] (SLLconst [sc] x)) && sc < getARM64BFwidth(bfc)
  1878  	-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
  1879  // ((x << c1) >> c2) >> c3
  1880  (SRLconst [sc] (UBFIZ [bfc] x)) && sc == getARM64BFlsb(bfc)
  1881  	-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
  1882  (SRLconst [sc] (UBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
  1883  	-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
  1884  (SRLconst [sc] (UBFIZ [bfc] x)) && sc > getARM64BFlsb(bfc)
  1885  	&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
  1886  	-> (UBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
  1887  // ((x << c1) << c2) >> c3
  1888  (UBFX [bfc] (SLLconst [sc] x)) && sc == getARM64BFlsb(bfc)
  1889  	-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
  1890  (UBFX [bfc] (SLLconst [sc] x)) && sc < getARM64BFlsb(bfc)
  1891  	-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
  1892  (UBFX [bfc] (SLLconst [sc] x)) && sc > getARM64BFlsb(bfc)
  1893  	&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
  1894  	-> (UBFIZ [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
  1895  
  1896  // bfi
  1897  (OR (UBFIZ [bfc] x) (ANDconst [ac] y))
  1898  	&& ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
  1899  	-> (BFI [bfc] y x)
  1900  (ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
  1901  	&& lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
  1902  	-> (BFI [arm64BFAuxInt(lc-rc, 64-lc)] x y)
  1903  // bfxil
  1904  (OR (UBFX [bfc] x) (ANDconst [ac] y)) && ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
  1905  	-> (BFXIL [bfc] y x)
  1906  (ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == getARM64BFwidth(bfc)
  1907  	-> (BFXIL [bfc] y x)
  1908  
  1909  // do combined loads
  1910  // little endian loads
  1911  // b[0] | b[1]<<8 -> load 16-bit
  1912  (ORshiftLL <t> [8]
  1913  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
  1914  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  1915  	&& i1 == i0+1
  1916  	&& x0.Uses == 1 && x1.Uses == 1
  1917  	&& y0.Uses == 1 && y1.Uses == 1
  1918  	&& mergePoint(b,x0,x1) != nil
  1919  	&& clobber(x0) && clobber(x1)
  1920  	&& clobber(y0) && clobber(y1)
  1921  	-> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  1922  (ORshiftLL <t> [8]
  1923  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
  1924  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  1925  	&& s == nil
  1926  	&& x0.Uses == 1 && x1.Uses == 1
  1927  	&& y0.Uses == 1 && y1.Uses == 1
  1928  	&& mergePoint(b,x0,x1) != nil
  1929  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  1930  	&& clobber(x0) && clobber(x1)
  1931  	&& clobber(y0) && clobber(y1)
  1932  	-> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
  1933  (ORshiftLL <t> [8]
  1934  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
  1935  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  1936  	&& x0.Uses == 1 && x1.Uses == 1
  1937  	&& y0.Uses == 1 && y1.Uses == 1
  1938  	&& mergePoint(b,x0,x1) != nil
  1939  	&& clobber(x0) && clobber(x1)
  1940  	&& clobber(y0) && clobber(y1)
  1941  	-> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
  1942  
  1943  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
  1944  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  1945  	            x0:(MOVHUload [i0] {s} p mem)
  1946  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  1947  	y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
  1948  	&& i2 == i0+2
  1949  	&& i3 == i0+3
  1950  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  1951  	&& y1.Uses == 1 && y2.Uses == 1
  1952  	&& o0.Uses == 1
  1953  	&& mergePoint(b,x0,x1,x2) != nil
  1954  	&& clobber(x0) && clobber(x1) && clobber(x2)
  1955  	&& clobber(y1) && clobber(y2)
  1956  	&& clobber(o0)
  1957  	-> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  1958  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  1959  	            x0:(MOVHUloadidx ptr0 idx0 mem)
  1960  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
  1961  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  1962  	&& s == nil
  1963  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  1964  	&& y1.Uses == 1 && y2.Uses == 1
  1965  	&& o0.Uses == 1
  1966  	&& mergePoint(b,x0,x1,x2) != nil
  1967  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  1968  	&& isSamePtr(p1, p)
  1969  	&& clobber(x0) && clobber(x1) && clobber(x2)
  1970  	&& clobber(y1) && clobber(y2)
  1971  	&& clobber(o0)
  1972  	-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
  1973  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  1974  	            x0:(MOVHUloadidx ptr idx mem)
  1975  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  1976  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  1977  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  1978  	&& y1.Uses == 1 && y2.Uses == 1
  1979  	&& o0.Uses == 1
  1980  	&& mergePoint(b,x0,x1,x2) != nil
  1981  	&& clobber(x0) && clobber(x1) && clobber(x2)
  1982  	&& clobber(y1) && clobber(y2)
  1983  	&& clobber(o0)
  1984  	-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
  1985  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  1986  	            x0:(MOVHUloadidx2 ptr0 idx0 mem)
  1987  	y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
  1988  	y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
  1989  	&& s == nil
  1990  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  1991  	&& y1.Uses == 1 && y2.Uses == 1
  1992  	&& o0.Uses == 1
  1993  	&& mergePoint(b,x0,x1,x2) != nil
  1994  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  1995  	&& isSamePtr(p1, p)
  1996  	&& clobber(x0) && clobber(x1) && clobber(x2)
  1997  	&& clobber(y1) && clobber(y2)
  1998  	&& clobber(o0)
  1999  	-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
  2000  
  2001  // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
  2002  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2003  	            x0:(MOVWUload [i0] {s} p mem)
  2004  	y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
  2005  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2006  	y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
  2007  	y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
  2008  	&& i4 == i0+4
  2009  	&& i5 == i0+5
  2010  	&& i6 == i0+6
  2011  	&& i7 == i0+7
  2012  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2013  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2014  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2015  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2016  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2017  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2018  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2019  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  2020  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2021  	            x0:(MOVWUloadidx ptr0 idx0 mem)
  2022  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
  2023  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2024  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2025  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2026  	&& s == nil
  2027  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2028  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2029  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2030  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2031  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2032  	&& isSamePtr(p1, p)
  2033  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2034  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2035  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2036  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
  2037  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2038  	            x0:(MOVWUloadidx4 ptr0 idx0 mem)
  2039  	y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
  2040  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2041  	y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
  2042  	y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
  2043  	&& s == nil
  2044  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2045  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2046  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2047  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2048  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2049  	&& isSamePtr(p1, p)
  2050  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2051  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2052  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2053  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
  2054  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2055  	            x0:(MOVWUloadidx ptr idx mem)
  2056  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2057  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2058  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2059  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2060  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2061  	&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2062  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2063  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2064  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2065  	&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2066  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2067  	-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
  2068  
  2069  // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
  2070  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2071  	y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
  2072  	y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
  2073  	y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
  2074  	y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
  2075  	&& i1 == i0+1
  2076  	&& i2 == i0+2
  2077  	&& i3 == i0+3
  2078  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2079  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2080  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2081  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2082  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2083  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2084  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2085  	-> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  2086  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2087  	y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
  2088  	y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
  2089  	y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2090  	y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
  2091  	&& s == nil
  2092  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2093  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2094  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2095  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2096  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2097  	&& isSamePtr(p1, p)
  2098  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2099  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2100  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2101  	-> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
  2102  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2103  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2104  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2105  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2106  	y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
  2107  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2108  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2109  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2110  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2111  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2112  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2113  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2114  	-> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
  2115  
  2116  // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit
  2117  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2118  	y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
  2119  	y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
  2120  	y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
  2121  	y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
  2122  	y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
  2123  	y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
  2124  	y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
  2125  	y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
  2126  	&& i1 == i0+1
  2127  	&& i2 == i0+2
  2128  	&& i3 == i0+3
  2129  	&& i4 == i0+4
  2130  	&& i5 == i0+5
  2131  	&& i6 == i0+6
  2132  	&& i7 == i0+7
  2133  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2134  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2135  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2136  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2137  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2138  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2139  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2140  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2141  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2142  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2143  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2144  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2145  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2146  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
  2147  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2148  	y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
  2149  	y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
  2150  	y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
  2151  	y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
  2152  	y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
  2153  	y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
  2154  	y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2155  	y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
  2156  	&& s == nil
  2157  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2158  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2159  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2160  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2161  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2162  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2163  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2164  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2165  	&& isSamePtr(p1, p)
  2166  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2167  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2168  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2169  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2170  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2171  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2172  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
  2173  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2174  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2175  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2176  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2177  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2178  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2179  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2180  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2181  	y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
  2182  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2183  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2184  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2185  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2186  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2187  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2188  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2189  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2190  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2191  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2192  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2193  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2194  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2195  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
  2196  
  2197  // big endian loads
  2198  // b[1] | b[0]<<8 -> load 16-bit, reverse
  2199  (ORshiftLL <t> [8]
  2200  	y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
  2201  	y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
  2202  	&& i1 == i0+1
  2203  	&& x0.Uses == 1 && x1.Uses == 1
  2204  	&& y0.Uses == 1 && y1.Uses == 1
  2205  	&& mergePoint(b,x0,x1) != nil
  2206  	&& clobber(x0) && clobber(x1)
  2207  	&& clobber(y0) && clobber(y1)
  2208  	-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
  2209  (ORshiftLL <t> [8]
  2210  	y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
  2211  	y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
  2212  	&& s == nil
  2213  	&& x0.Uses == 1 && x1.Uses == 1
  2214  	&& y0.Uses == 1 && y1.Uses == 1
  2215  	&& mergePoint(b,x0,x1) != nil
  2216  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2217  	&& clobber(x0) && clobber(x1)
  2218  	&& clobber(y0) && clobber(y1)
  2219  	-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
  2220  (ORshiftLL <t> [8]
  2221  	y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
  2222  	y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
  2223  	&& x0.Uses == 1 && x1.Uses == 1
  2224  	&& y0.Uses == 1 && y1.Uses == 1
  2225  	&& mergePoint(b,x0,x1) != nil
  2226  	&& clobber(x0) && clobber(x1)
  2227  	&& clobber(y0) && clobber(y1)
  2228  	-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
  2229  
  2230  // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
  2231  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2232  	y0:(REV16W  x0:(MOVHUload [i2] {s} p mem))
  2233  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2234  	y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
  2235  	&& i1 == i0+1
  2236  	&& i2 == i0+2
  2237  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2238  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2239  	&& o0.Uses == 1
  2240  	&& mergePoint(b,x0,x1,x2) != nil
  2241  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2242  	&& clobber(y0) && clobber(y1) && clobber(y2)
  2243  	&& clobber(o0)
  2244  	-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2245  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2246  	y0:(REV16W  x0:(MOVHUload [2] {s} p mem))
  2247  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2248  	y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
  2249  	&& s == nil
  2250  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2251  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2252  	&& o0.Uses == 1
  2253  	&& mergePoint(b,x0,x1,x2) != nil
  2254  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2255  	&& isSamePtr(p1, p)
  2256  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2257  	&& clobber(y0) && clobber(y1) && clobber(y2)
  2258  	&& clobber(o0)
  2259  	-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2260  (ORshiftLL <t> [24] o0:(ORshiftLL [16]
  2261  	y0:(REV16W  x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
  2262  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2263  	y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
  2264  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
  2265  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
  2266  	&& o0.Uses == 1
  2267  	&& mergePoint(b,x0,x1,x2) != nil
  2268  	&& clobber(x0) && clobber(x1) && clobber(x2)
  2269  	&& clobber(y0) && clobber(y1) && clobber(y2)
  2270  	&& clobber(o0)
  2271  	-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2272  
  2273  // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
  2274  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2275  	y0:(REVW    x0:(MOVWUload [i4] {s} p mem))
  2276  	y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
  2277  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2278  	y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
  2279  	y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
  2280  	&& i1 == i0+1
  2281  	&& i2 == i0+2
  2282  	&& i3 == i0+3
  2283  	&& i4 == i0+4
  2284  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2285  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2286  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2287  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2288  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2289  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2290  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2291  	-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2292  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2293  	y0:(REVW    x0:(MOVWUload [4] {s} p mem))
  2294  	y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
  2295  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2296  	y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2297  	y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
  2298  	&& s == nil
  2299  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2300  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2301  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2302  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2303  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2304  	&& isSamePtr(p1, p)
  2305  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2306  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2307  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2308  	-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2309  (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
  2310  	y0:(REVW    x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
  2311  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2312  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2313  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2314  	y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
  2315  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
  2316  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
  2317  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
  2318  	&& mergePoint(b,x0,x1,x2,x3,x4) != nil
  2319  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
  2320  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
  2321  	&& clobber(o0) && clobber(o1) && clobber(o2)
  2322  	-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2323  
  2324  // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
  2325  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2326  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2327  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2328  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2329  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2330  	&& i1 == i0+1
  2331  	&& i2 == i0+2
  2332  	&& i3 == i0+3
  2333  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2334  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2335  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2336  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2337  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2338  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2339  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2340  	-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2341  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2342  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2343  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2344  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2345  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2346  	&& s == nil
  2347  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2348  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2349  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2350  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2351  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2352  	&& isSamePtr(p1, p)
  2353  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2354  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2355  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2356  	-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
  2357  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
  2358  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2359  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2360  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2361  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2362  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2363  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2364  	&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
  2365  	&& mergePoint(b,x0,x1,x2,x3) != nil
  2366  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2367  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2368  	&& clobber(o0) && clobber(o1) && clobber(s0)
  2369  	-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
  2370  
  2371  // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
  2372  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2373  	y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
  2374  	y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
  2375  	y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
  2376  	y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
  2377  	y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
  2378  	y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
  2379  	y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
  2380  	y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
  2381  	&& i1 == i0+1
  2382  	&& i2 == i0+2
  2383  	&& i3 == i0+3
  2384  	&& i4 == i0+4
  2385  	&& i5 == i0+5
  2386  	&& i6 == i0+6
  2387  	&& i7 == i0+7
  2388  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2389  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2390  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2391  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2392  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2393  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2394  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2395  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2396  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2397  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2398  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2399  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2400  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2401  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
  2402  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2403  	y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
  2404  	y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
  2405  	y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
  2406  	y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
  2407  	y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
  2408  	y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
  2409  	y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
  2410  	y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
  2411  	&& s == nil
  2412  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2413  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2414  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2415  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2416  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2417  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2418  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2419  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2420  	&& isSamePtr(p1, p)
  2421  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2422  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2423  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2424  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2425  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2426  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2427  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
  2428  (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
  2429  	y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
  2430  	y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
  2431  	y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
  2432  	y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
  2433  	y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
  2434  	y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
  2435  	y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
  2436  	y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
  2437  	&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
  2438  	&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
  2439  	&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
  2440  	&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
  2441  	&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
  2442  	&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
  2443  	&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
  2444  	&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
  2445  	&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
  2446  	&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
  2447  	&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
  2448  	&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
  2449  	&& clobber(o4) && clobber(o5) && clobber(s0)
  2450  	-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
  2451  
  2452  // Combine zero stores into larger (unaligned) stores.
  2453  (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
  2454  	&& x.Uses == 1
  2455  	&& areAdjacentOffsets(i,j,1)
  2456  	&& is32Bit(min(i,j))
  2457  	&& isSamePtr(ptr0, ptr1)
  2458  	&& clobber(x)
  2459  	-> (MOVHstorezero [min(i,j)] {s} ptr0 mem)
  2460  (MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
  2461  	&& x.Uses == 1
  2462  	&& s == nil
  2463  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2464  	&& clobber(x)
  2465  	-> (MOVHstorezeroidx ptr1 idx1 mem)
  2466  (MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
  2467  	&& x.Uses == 1
  2468  	&& clobber(x)
  2469  	-> (MOVHstorezeroidx ptr idx mem)
  2470  (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
  2471  	&& x.Uses == 1
  2472  	&& areAdjacentOffsets(i,j,2)
  2473  	&& is32Bit(min(i,j))
  2474  	&& isSamePtr(ptr0, ptr1)
  2475  	&& clobber(x)
  2476  	-> (MOVWstorezero [min(i,j)] {s} ptr0 mem)
  2477  (MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
  2478  	&& x.Uses == 1
  2479  	&& s == nil
  2480  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2481  	&& clobber(x)
  2482  	-> (MOVWstorezeroidx ptr1 idx1 mem)
  2483  (MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
  2484  	&& x.Uses == 1
  2485  	&& clobber(x)
  2486  	-> (MOVWstorezeroidx ptr idx mem)
  2487  (MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
  2488  	&& x.Uses == 1
  2489  	&& s == nil
  2490  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2491  	&& clobber(x)
  2492  	-> (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
  2493  (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
  2494  	&& x.Uses == 1
  2495  	&& areAdjacentOffsets(i,j,4)
  2496  	&& is32Bit(min(i,j))
  2497  	&& isSamePtr(ptr0, ptr1)
  2498  	&& clobber(x)
  2499  	-> (MOVDstorezero [min(i,j)] {s} ptr0 mem)
  2500  (MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
  2501  	&& x.Uses == 1
  2502  	&& s == nil
  2503  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2504  	&& clobber(x)
  2505  	-> (MOVDstorezeroidx ptr1 idx1 mem)
  2506  (MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
  2507  	&& x.Uses == 1
  2508  	&& clobber(x)
  2509  	-> (MOVDstorezeroidx ptr idx mem)
  2510  (MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
  2511  	&& x.Uses == 1
  2512  	&& s == nil
  2513  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2514  	&& clobber(x)
  2515  	-> (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
  2516  (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
  2517  	&& x.Uses == 1
  2518  	&& areAdjacentOffsets(i,j,8)
  2519  	&& is32Bit(min(i,j))
  2520  	&& isSamePtr(ptr0, ptr1)
  2521  	&& clobber(x)
  2522  	-> (MOVQstorezero [min(i,j)] {s} ptr0 mem)
  2523  (MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
  2524  	&& x.Uses == 1
  2525  	&& s == nil
  2526  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2527  	&& clobber(x)
  2528  	-> (MOVQstorezero [0] {s} p0 mem)
  2529  (MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
  2530  	&& x.Uses == 1
  2531  	&& s == nil
  2532  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2533  	&& clobber(x)
  2534  	-> (MOVQstorezero [0] {s} p0 mem)
  2535  
  2536  // Combine stores into larger (unaligned) stores.
  2537  (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2538  	&& x.Uses == 1
  2539  	&& isSamePtr(ptr0, ptr1)
  2540  	&& clobber(x)
  2541  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2542  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2543  	&& x.Uses == 1
  2544  	&& s == nil
  2545  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2546  	&& clobber(x)
  2547  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2548  (MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
  2549  	&& x.Uses == 1
  2550  	&& clobber(x)
  2551  	-> (MOVHstoreidx ptr idx w mem)
  2552  (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2553  	&& x.Uses == 1
  2554  	&& isSamePtr(ptr0, ptr1)
  2555  	&& clobber(x)
  2556  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2557  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2558  	&& x.Uses == 1
  2559  	&& s == nil
  2560  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2561  	&& clobber(x)
  2562  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2563  (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2564  	&& x.Uses == 1
  2565  	&& isSamePtr(ptr0, ptr1)
  2566  	&& clobber(x)
  2567  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2568  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
  2569  	&& x.Uses == 1
  2570  	&& s == nil
  2571  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2572  	&& clobber(x)
  2573  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2574  (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
  2575  	&& x.Uses == 1
  2576  	&& isSamePtr(ptr0, ptr1)
  2577  	&& clobber(x)
  2578  	-> (MOVHstore [i-1] {s} ptr0 w mem)
  2579  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
  2580  	&& x.Uses == 1
  2581  	&& s == nil
  2582  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2583  	&& clobber(x)
  2584  	-> (MOVHstoreidx ptr1 idx1 w mem)
  2585  (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
  2586  	&& x.Uses == 1
  2587  	&& isSamePtr(ptr0, ptr1)
  2588  	&& clobber(x)
  2589  	-> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2590  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
  2591  	&& x.Uses == 1
  2592  	&& s == nil
  2593  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2594  	&& clobber(x)
  2595  	-> (MOVHstoreidx ptr1 idx1 w0 mem)
  2596  (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
  2597  	&& x.Uses == 1
  2598  	&& isSamePtr(ptr0, ptr1)
  2599  	&& getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
  2600  	&& getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
  2601  	&& getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
  2602  	&& clobber(x)
  2603  	-> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2604  (MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
  2605  	&& x.Uses == 1
  2606  	&& s == nil
  2607  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2608  	&& getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
  2609  	&& getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
  2610  	&& getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
  2611  	&& clobber(x)
  2612  	-> (MOVHstoreidx ptr1 idx1 w0 mem)
  2613  (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2614  	&& x.Uses == 1
  2615  	&& isSamePtr(ptr0, ptr1)
  2616  	&& clobber(x)
  2617  	-> (MOVHstore [i-1] {s} ptr0 w0 mem)
  2618  (MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
  2619  	&& x.Uses == 1
  2620  	&& s == nil
  2621  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2622  	&& clobber(x)
  2623  	-> (MOVHstoreidx ptr1 idx1 w0 mem)
  2624  (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2625  	&& x.Uses == 1
  2626  	&& isSamePtr(ptr0, ptr1)
  2627  	&& clobber(x)
  2628  	-> (MOVWstore [i-2] {s} ptr0 w mem)
  2629  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2630  	&& x.Uses == 1
  2631  	&& s == nil
  2632  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2633  	&& clobber(x)
  2634  	-> (MOVWstoreidx ptr1 idx1 w mem)
  2635  (MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
  2636  	&& x.Uses == 1
  2637  	&& clobber(x)
  2638  	-> (MOVWstoreidx ptr idx w mem)
  2639  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2640  	&& x.Uses == 1
  2641  	&& s == nil
  2642  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2643  	&& clobber(x)
  2644  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2645  (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2646  	&& x.Uses == 1
  2647  	&& isSamePtr(ptr0, ptr1)
  2648  	&& clobber(x)
  2649  	-> (MOVWstore [i-2] {s} ptr0 w mem)
  2650  (MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
  2651  	&& x.Uses == 1
  2652  	&& s == nil
  2653  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2654  	&& clobber(x)
  2655  	-> (MOVWstoreidx ptr1 idx1 w mem)
  2656  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2657  	&& x.Uses == 1
  2658  	&& s == nil
  2659  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2660  	&& clobber(x)
  2661  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2662  (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
  2663  	&& x.Uses == 1
  2664  	&& isSamePtr(ptr0, ptr1)
  2665  	&& clobber(x)
  2666  	-> (MOVWstore [i-2] {s} ptr0 w mem)
  2667  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
  2668  	&& x.Uses == 1
  2669  	&& s == nil
  2670  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2671  	&& clobber(x)
  2672  	-> (MOVWstoreidx ptr1 idx1 w mem)
  2673  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
  2674  	&& x.Uses == 1
  2675  	&& s == nil
  2676  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2677  	&& clobber(x)
  2678  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
  2679  (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
  2680  	&& x.Uses == 1
  2681  	&& isSamePtr(ptr0, ptr1)
  2682  	&& clobber(x)
  2683  	-> (MOVWstore [i-2] {s} ptr0 w0 mem)
  2684  (MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2685  	&& x.Uses == 1
  2686  	&& s == nil
  2687  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2688  	&& clobber(x)
  2689  	-> (MOVWstoreidx ptr1 idx1 w0 mem)
  2690  (MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
  2691  	&& x.Uses == 1
  2692  	&& s == nil
  2693  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2694  	&& clobber(x)
  2695  	-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
  2696  (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
  2697  	&& x.Uses == 1
  2698  	&& isSamePtr(ptr0, ptr1)
  2699  	&& clobber(x)
  2700  	-> (MOVDstore [i-4] {s} ptr0 w mem)
  2701  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
  2702  	&& x.Uses == 1
  2703  	&& s == nil
  2704  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2705  	&& clobber(x)
  2706  	-> (MOVDstoreidx ptr1 idx1 w mem)
  2707  (MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
  2708  	&& x.Uses == 1
  2709  	&& clobber(x)
  2710  	-> (MOVDstoreidx ptr idx w mem)
  2711  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
  2712  	&& x.Uses == 1
  2713  	&& s == nil
  2714  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2715  	&& clobber(x)
  2716  	-> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
  2717  (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
  2718  	&& x.Uses == 1
  2719  	&& isSamePtr(ptr0, ptr1)
  2720  	&& clobber(x)
  2721  	-> (MOVDstore [i-4] {s} ptr0 w0 mem)
  2722  (MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2723  	&& x.Uses == 1
  2724  	&& s == nil
  2725  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2726  	&& clobber(x)
  2727  	-> (MOVDstoreidx ptr1 idx1 w0 mem)
  2728  (MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
  2729  	&& x.Uses == 1
  2730  	&& s == nil
  2731  	&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
  2732  	&& clobber(x)
  2733  	-> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
  2734  (MOVBstore [i] {s} ptr w
  2735  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2736  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2737  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
  2738  	x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
  2739  	x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
  2740  	x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
  2741  	x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
  2742  	&& x0.Uses == 1
  2743  	&& x1.Uses == 1
  2744  	&& x2.Uses == 1
  2745  	&& x3.Uses == 1
  2746  	&& x4.Uses == 1
  2747  	&& x5.Uses == 1
  2748  	&& x6.Uses == 1
  2749  	&& clobber(x0)
  2750  	&& clobber(x1)
  2751  	&& clobber(x2)
  2752  	&& clobber(x3)
  2753  	&& clobber(x4)
  2754  	&& clobber(x5)
  2755  	&& clobber(x6)
  2756  	-> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
  2757  (MOVBstore [7] {s} p w
  2758  	x0:(MOVBstore [6] {s} p (SRLconst [8] w)
  2759  	x1:(MOVBstore [5] {s} p (SRLconst [16] w)
  2760  	x2:(MOVBstore [4] {s} p (SRLconst [24] w)
  2761  	x3:(MOVBstore [3] {s} p (SRLconst [32] w)
  2762  	x4:(MOVBstore [2] {s} p (SRLconst [40] w)
  2763  	x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
  2764  	x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
  2765  	&& x0.Uses == 1
  2766  	&& x1.Uses == 1
  2767  	&& x2.Uses == 1
  2768  	&& x3.Uses == 1
  2769  	&& x4.Uses == 1
  2770  	&& x5.Uses == 1
  2771  	&& x6.Uses == 1
  2772  	&& s == nil
  2773  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2774  	&& isSamePtr(p1, p)
  2775  	&& clobber(x0)
  2776  	&& clobber(x1)
  2777  	&& clobber(x2)
  2778  	&& clobber(x3)
  2779  	&& clobber(x4)
  2780  	&& clobber(x5)
  2781  	&& clobber(x6)
  2782  	-> (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
  2783  (MOVBstore [i] {s} ptr w
  2784  	x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w)
  2785  	x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w)
  2786  	x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
  2787  	&& x0.Uses == 1
  2788  	&& x1.Uses == 1
  2789  	&& x2.Uses == 1
  2790  	&& clobber(x0)
  2791  	&& clobber(x1)
  2792  	&& clobber(x2)
  2793  	-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2794  (MOVBstore [3] {s} p w
  2795  	x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w)
  2796  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w)
  2797  	x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
  2798  	&& x0.Uses == 1
  2799  	&& x1.Uses == 1
  2800  	&& x2.Uses == 1
  2801  	&& s == nil
  2802  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2803  	&& isSamePtr(p1, p)
  2804  	&& clobber(x0)
  2805  	&& clobber(x1)
  2806  	&& clobber(x2)
  2807  	-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2808  (MOVBstoreidx ptr (ADDconst [3] idx) w
  2809  	x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
  2810  	x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
  2811  	x2:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
  2812  	&& x0.Uses == 1
  2813  	&& x1.Uses == 1
  2814  	&& x2.Uses == 1
  2815  	&& clobber(x0)
  2816  	&& clobber(x1)
  2817  	&& clobber(x2)
  2818  	-> (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
  2819  (MOVBstoreidx ptr idx w
  2820  	x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
  2821  	x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
  2822  	x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
  2823  	&& x0.Uses == 1
  2824  	&& x1.Uses == 1
  2825  	&& x2.Uses == 1
  2826  	&& clobber(x0)
  2827  	&& clobber(x1)
  2828  	&& clobber(x2)
  2829  	-> (MOVWstoreidx ptr idx w mem)
  2830  (MOVBstore [i] {s} ptr w
  2831  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
  2832  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
  2833  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
  2834  	&& x0.Uses == 1
  2835  	&& x1.Uses == 1
  2836  	&& x2.Uses == 1
  2837  	&& clobber(x0)
  2838  	&& clobber(x1)
  2839  	&& clobber(x2)
  2840  	-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2841  (MOVBstore [3] {s} p w
  2842  	x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
  2843  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
  2844  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
  2845  	&& x0.Uses == 1
  2846  	&& x1.Uses == 1
  2847  	&& x2.Uses == 1
  2848  	&& s == nil
  2849  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2850  	&& isSamePtr(p1, p)
  2851  	&& clobber(x0)
  2852  	&& clobber(x1)
  2853  	&& clobber(x2)
  2854  	-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2855  (MOVBstore [i] {s} ptr w
  2856  	x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
  2857  	x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
  2858  	x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
  2859  	&& x0.Uses == 1
  2860  	&& x1.Uses == 1
  2861  	&& x2.Uses == 1
  2862  	&& clobber(x0)
  2863  	&& clobber(x1)
  2864  	&& clobber(x2)
  2865  	-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
  2866  (MOVBstore [3] {s} p w
  2867  	x0:(MOVBstore [2] {s} p (SRLconst [8] w)
  2868  	x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
  2869  	x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
  2870  	&& x0.Uses == 1
  2871  	&& x1.Uses == 1
  2872  	&& x2.Uses == 1
  2873  	&& s == nil
  2874  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2875  	&& isSamePtr(p1, p)
  2876  	&& clobber(x0)
  2877  	&& clobber(x1)
  2878  	&& clobber(x2)
  2879  	-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
  2880  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
  2881  	&& x.Uses == 1
  2882  	&& clobber(x)
  2883  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2884  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
  2885  	&& x.Uses == 1
  2886  	&& s == nil
  2887  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2888  	&& clobber(x)
  2889  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2890  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
  2891  	&& x.Uses == 1
  2892  	&& clobber(x)
  2893  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2894  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem))
  2895  	&& x.Uses == 1
  2896  	&& s == nil
  2897  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2898  	&& clobber(x)
  2899  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2900  (MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(8, 8)] w) mem))
  2901  	&& x.Uses == 1
  2902  	&& clobber(x)
  2903  	-> (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
  2904  (MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 8)] w) mem))
  2905  	&& x.Uses == 1
  2906  	&& clobber(x)
  2907  	-> (MOVHstoreidx ptr idx w mem)
  2908  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
  2909  	&& x.Uses == 1
  2910  	&& clobber(x)
  2911  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2912  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
  2913  	&& x.Uses == 1
  2914  	&& s == nil
  2915  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2916  	&& clobber(x)
  2917  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2918  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
  2919  	&& x.Uses == 1
  2920  	&& clobber(x)
  2921  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2922  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem))
  2923  	&& x.Uses == 1
  2924  	&& s == nil
  2925  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2926  	&& clobber(x)
  2927  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2928  (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
  2929  	&& x.Uses == 1
  2930  	&& clobber(x)
  2931  	-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
  2932  (MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
  2933  	&& x.Uses == 1
  2934  	&& s == nil
  2935  	&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
  2936  	&& clobber(x)
  2937  	-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
  2938  
  2939  // FP simplification
  2940  (FNEGS (FMULS x y)) -> (FNMULS x y)
  2941  (FNEGD (FMULD x y)) -> (FNMULD x y)
  2942  (FMULS (FNEGS x) y) -> (FNMULS x y)
  2943  (FMULD (FNEGD x) y) -> (FNMULD x y)
  2944  (FNEGS (FNMULS x y)) -> (FMULS x y)
  2945  (FNEGD (FNMULD x y)) -> (FMULD x y)
  2946  (FNMULS (FNEGS x) y) -> (FMULS x y)
  2947  (FNMULD (FNEGD x) y) -> (FMULD x y)
  2948  (FADDS a (FMULS x y)) -> (FMADDS a x y)
  2949  (FADDD a (FMULD x y)) -> (FMADDD a x y)
  2950  (FSUBS a (FMULS x y)) -> (FMSUBS a x y)
  2951  (FSUBD a (FMULD x y)) -> (FMSUBD a x y)
  2952  (FSUBS (FMULS x y) a) -> (FNMSUBS a x y)
  2953  (FSUBD (FMULD x y) a) -> (FNMSUBD a x y)
  2954  (FADDS a (FNMULS x y)) -> (FMSUBS a x y)
  2955  (FADDD a (FNMULD x y)) -> (FMSUBD a x y)
  2956  (FSUBS a (FNMULS x y)) -> (FMADDS a x y)
  2957  (FSUBD a (FNMULD x y)) -> (FMADDD a x y)
  2958  (FSUBS (FNMULS x y) a) -> (FNMADDS a x y)
  2959  (FSUBD (FNMULD x y) a) -> (FNMADDD a x y)
  2960  
  2961  (MOVBUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read8(sym, off))])
  2962  (MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read16(sym, off, config.BigEndian))])
  2963  (MOVWUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read32(sym, off, config.BigEndian))])
  2964  (MOVDload  [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read64(sym, off, config.BigEndian))])