github.com/corona10/go@v0.0.0-20180224231303-7a218942be57/src/cmd/compile/internal/ssa/gen/S390X.rules (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Lowering arithmetic
     6  (Add(64|Ptr)  x y) -> (ADD  x y)
     7  (Add(32|16|8)  x y) -> (ADDW  x y)
     8  (Add32F x y) -> (FADDS x y)
     9  (Add64F x y) -> (FADD x y)
    10  
    11  (Sub(64|Ptr)  x y) -> (SUB  x y)
    12  (Sub(32|16|8)  x y) -> (SUBW  x y)
    13  (Sub32F x y) -> (FSUBS x y)
    14  (Sub64F x y) -> (FSUB x y)
    15  
    16  (Mul64  x y) -> (MULLD  x y)
    17  (Mul(32|16|8)  x y) -> (MULLW  x y)
    18  (Mul32F x y) -> (FMULS x y)
    19  (Mul64F x y) -> (FMUL x y)
    20  
    21  (Div32F x y) -> (FDIVS x y)
    22  (Div64F x y) -> (FDIV x y)
    23  
    24  (Div64  x y) -> (DIVD  x y)
    25  (Div64u x y) -> (DIVDU x y)
    26  // DIVW/DIVWU has a 64-bit dividend and a 32-bit divisor,
    27  // so a sign/zero extension of the dividend is required.
    28  (Div32  x y) -> (DIVW  (MOVWreg x) y)
    29  (Div32u x y) -> (DIVWU (MOVWZreg x) y)
    30  (Div16  x y) -> (DIVW  (MOVHreg x) (MOVHreg y))
    31  (Div16u x y) -> (DIVWU (MOVHZreg x) (MOVHZreg y))
    32  (Div8   x y) -> (DIVW  (MOVBreg x) (MOVBreg y))
    33  (Div8u  x y) -> (DIVWU (MOVBZreg x) (MOVBZreg y))
    34  
    35  (Hmul(64|64u)  x y) -> (MULH(D|DU)  x y)
    36  (Hmul32  x y) -> (SRDconst [32] (MULLD (MOVWreg x) (MOVWreg y)))
    37  (Hmul32u x y) -> (SRDconst [32] (MULLD (MOVWZreg x) (MOVWZreg y)))
    38  
    39  (Mod(64|64u)  x y) -> (MOD(D|DU)  x y)
    40  // MODW/MODWU has a 64-bit dividend and a 32-bit divisor,
    41  // so a sign/zero extension of the dividend is required.
    42  (Mod32  x y) -> (MODW  (MOVWreg x) y)
    43  (Mod32u x y) -> (MODWU (MOVWZreg x) y)
    44  (Mod16  x y) -> (MODW  (MOVHreg x) (MOVHreg y))
    45  (Mod16u x y) -> (MODWU (MOVHZreg x) (MOVHZreg y))
    46  (Mod8   x y) -> (MODW  (MOVBreg x) (MOVBreg y))
    47  (Mod8u  x y) -> (MODWU (MOVBZreg x) (MOVBZreg y))
    48  
    49  // (x + y) / 2 with x>=y -> (x - y) / 2 + y
    50  (Avg64u <t> x y) -> (ADD (SRDconst <t> (SUB <t> x y) [1]) y)
    51  
    52  (And64 x y) -> (AND x y)
    53  (And(32|16|8) x y) -> (ANDW x y)
    54  
    55  (Or64 x y) -> (OR x y)
    56  (Or(32|16|8) x y) -> (ORW x y)
    57  
    58  (Xor64 x y) -> (XOR x y)
    59  (Xor(32|16|8) x y) -> (XORW x y)
    60  
    61  (Neg64  x) -> (NEG x)
    62  (Neg32  x) -> (NEGW x)
    63  (Neg16  x) -> (NEGW (MOVHreg x))
    64  (Neg8   x) -> (NEGW (MOVBreg x))
    65  (Neg32F x) -> (FNEGS x)
    66  (Neg64F x) -> (FNEG x)
    67  
    68  (Com64 x) -> (NOT x)
    69  (Com(32|16|8) x) -> (NOTW x)
    70  (NOT x) && true -> (XOR (MOVDconst [-1]) x)
    71  (NOTW x) && true -> (XORWconst [-1] x)
    72  
    73  // Lowering boolean ops
    74  (AndB x y) -> (ANDW x y)
    75  (OrB x y) -> (ORW x y)
    76  (Not x) -> (XORWconst [1] x)
    77  
    78  // Lowering pointer arithmetic
    79  (OffPtr [off] ptr:(SP)) -> (MOVDaddr [off] ptr)
    80  (OffPtr [off] ptr) && is32Bit(off) -> (ADDconst [off] ptr)
    81  (OffPtr [off] ptr) -> (ADD (MOVDconst [off]) ptr)
    82  
    83  // Ctz(x) = 64 - findLeftmostOne((x-1)&^x)
    84  (Ctz64 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (AND <t> (SUBconst <t> [1] x) (NOT <t> x))))
    85  (Ctz32 <t> x) -> (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW <t> (SUBWconst <t> [1] x) (NOTW <t> x)))))
    86  
    87  (BitLen64 x) -> (SUB (MOVDconst [64]) (FLOGR x))
    88  
    89  (Bswap64 x) -> (MOVDBR x)
    90  (Bswap32 x) -> (MOVWBR x)
    91  
    92  // math package intrinsics
    93  (Sqrt        x) -> (FSQRT x)
    94  (Floor       x) -> (FIDBR [7] x)
    95  (Ceil        x) -> (FIDBR [6] x)
    96  (Trunc       x) -> (FIDBR [5] x)
    97  (RoundToEven x) -> (FIDBR [4] x)
    98  (Round       x) -> (FIDBR [1] x)
    99  
   100  // Atomic loads.
   101  (AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
   102  (AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem)
   103  (AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem)
   104  
   105  // Atomic stores.
   106  (AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem)
   107  (AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem)
   108  (AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem)
   109  
   110  // Atomic adds.
   111  (AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))
   112  (AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (LAAG ptr val mem))
   113  (Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDW val (Select0 <t> tuple))
   114  (Select1     (AddTupleFirst32   _ tuple)) -> (Select1 tuple)
   115  (Select0 <t> (AddTupleFirst64 val tuple)) -> (ADD val (Select0 <t> tuple))
   116  (Select1     (AddTupleFirst64   _ tuple)) -> (Select1 tuple)
   117  
   118  // Atomic exchanges.
   119  (AtomicExchange32 ptr val mem) -> (LoweredAtomicExchange32 ptr val mem)
   120  (AtomicExchange64 ptr val mem) -> (LoweredAtomicExchange64 ptr val mem)
   121  
   122  // Atomic compare and swap.
   123  (AtomicCompareAndSwap32 ptr old new_ mem) -> (LoweredAtomicCas32 ptr old new_ mem)
   124  (AtomicCompareAndSwap64 ptr old new_ mem) -> (LoweredAtomicCas64 ptr old new_ mem)
   125  
   126  // Lowering extension
   127  // Note: we always extend to 64 bits even though some ops don't need that many result bits.
   128  (SignExt8to(16|32|64)  x) -> (MOVBreg x)
   129  (SignExt16to(32|64) x) -> (MOVHreg x)
   130  (SignExt32to64 x) -> (MOVWreg x)
   131  
   132  (ZeroExt8to(16|32|64)  x) -> (MOVBZreg x)
   133  (ZeroExt16to(32|64) x) -> (MOVHZreg x)
   134  (ZeroExt32to64 x) -> (MOVWZreg x)
   135  
   136  (Slicemask <t> x) -> (SRADconst (NEG <t> x) [63])
   137  
   138  // Lowering truncation
   139  // Because we ignore high parts of registers, truncates are just copies.
   140  (Trunc(16|32|64)to8  x) -> x
   141  (Trunc(32|64)to16 x) -> x
   142  (Trunc64to32 x) -> x
   143  
   144  // Lowering float <-> int
   145  (Cvt32to32F x) -> (CEFBRA x)
   146  (Cvt32to64F x) -> (CDFBRA x)
   147  (Cvt64to32F x) -> (CEGBRA x)
   148  (Cvt64to64F x) -> (CDGBRA x)
   149  
   150  (Cvt32Fto32 x) -> (CFEBRA x)
   151  (Cvt32Fto64 x) -> (CGEBRA x)
   152  (Cvt64Fto32 x) -> (CFDBRA x)
   153  (Cvt64Fto64 x) -> (CGDBRA x)
   154  
   155  (Cvt32Fto64F x) -> (LDEBR x)
   156  (Cvt64Fto32F x) -> (LEDBR x)
   157  
   158  (Round(32|64)F x) -> (LoweredRound(32|64)F x)
   159  
   160  // Lowering shifts
   161  // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
   162  //   result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
   163  (Lsh64x64 <t> x y) -> (AND (SLD <t> x y) (SUBEcarrymask <t> (CMPUconst y [63])))
   164  (Lsh64x32 <t> x y) -> (AND (SLD <t> x y) (SUBEcarrymask <t> (CMPWUconst y [63])))
   165  (Lsh64x16 <t> x y) -> (AND (SLD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVHZreg y) [63])))
   166  (Lsh64x8  <t> x y) -> (AND (SLD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVBZreg y) [63])))
   167  
   168  (Lsh32x64 <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPUconst y [31])))
   169  (Lsh32x32 <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst y [31])))
   170  (Lsh32x16 <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVHZreg y) [31])))
   171  (Lsh32x8  <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
   172  
   173  (Lsh16x64 <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPUconst y [31])))
   174  (Lsh16x32 <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst y [31])))
   175  (Lsh16x16 <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVHZreg y) [31])))
   176  (Lsh16x8  <t> x y) -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
   177  
   178  (Lsh8x64 <t> x y)  -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPUconst y [31])))
   179  (Lsh8x32 <t> x y)  -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst y [31])))
   180  (Lsh8x16 <t> x y)  -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVHZreg y) [31])))
   181  (Lsh8x8  <t> x y)  -> (ANDW (SLW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
   182  
   183  (Rsh64Ux64 <t> x y) -> (AND (SRD <t> x y) (SUBEcarrymask <t> (CMPUconst y [63])))
   184  (Rsh64Ux32 <t> x y) -> (AND (SRD <t> x y) (SUBEcarrymask <t> (CMPWUconst y [63])))
   185  (Rsh64Ux16 <t> x y) -> (AND (SRD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVHZreg y) [63])))
   186  (Rsh64Ux8  <t> x y) -> (AND (SRD <t> x y) (SUBEcarrymask <t> (CMPWUconst (MOVBZreg y) [63])))
   187  
   188  (Rsh32Ux64 <t> x y) -> (ANDW (SRW <t> x y) (SUBEWcarrymask <t> (CMPUconst y [31])))
   189  (Rsh32Ux32 <t> x y) -> (ANDW (SRW <t> x y) (SUBEWcarrymask <t> (CMPWUconst y [31])))
   190  (Rsh32Ux16 <t> x y) -> (ANDW (SRW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVHZreg y) [31])))
   191  (Rsh32Ux8  <t> x y) -> (ANDW (SRW <t> x y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [31])))
   192  
   193  (Rsh16Ux64 <t> x y) -> (ANDW (SRW <t> (MOVHZreg x) y) (SUBEWcarrymask <t> (CMPUconst y [15])))
   194  (Rsh16Ux32 <t> x y) -> (ANDW (SRW <t> (MOVHZreg x) y) (SUBEWcarrymask <t> (CMPWUconst y [15])))
   195  (Rsh16Ux16 <t> x y) -> (ANDW (SRW <t> (MOVHZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVHZreg y) [15])))
   196  (Rsh16Ux8  <t> x y) -> (ANDW (SRW <t> (MOVHZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [15])))
   197  
   198  (Rsh8Ux64 <t> x y)  -> (ANDW (SRW <t> (MOVBZreg x) y) (SUBEWcarrymask <t> (CMPUconst y [7])))
   199  (Rsh8Ux32 <t> x y)  -> (ANDW (SRW <t> (MOVBZreg x) y) (SUBEWcarrymask <t> (CMPWUconst y [7])))
   200  (Rsh8Ux16 <t> x y)  -> (ANDW (SRW <t> (MOVBZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVHZreg y) [7])))
   201  (Rsh8Ux8  <t> x y)  -> (ANDW (SRW <t> (MOVBZreg x) y) (SUBEWcarrymask <t> (CMPWUconst (MOVBZreg y) [7])))
   202  
   203  // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
   204  // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width.
   205  (Rsh64x64 <t> x y) -> (SRAD <t> x (OR <y.Type> y (NOT <y.Type> (SUBEcarrymask <y.Type> (CMPUconst y [63])))))
   206  (Rsh64x32 <t> x y) -> (SRAD <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst y [63])))))
   207  (Rsh64x16 <t> x y) -> (SRAD <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVHZreg y) [63])))))
   208  (Rsh64x8  <t> x y) -> (SRAD <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [63])))))
   209  
   210  (Rsh32x64 <t> x y) -> (SRAW <t> x (OR <y.Type> y (NOT <y.Type> (SUBEcarrymask <y.Type> (CMPUconst y [31])))))
   211  (Rsh32x32 <t> x y) -> (SRAW <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst y [31])))))
   212  (Rsh32x16 <t> x y) -> (SRAW <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVHZreg y) [31])))))
   213  (Rsh32x8  <t> x y) -> (SRAW <t> x (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [31])))))
   214  
   215  (Rsh16x64 <t> x y) -> (SRAW <t> (MOVHreg x) (OR <y.Type> y (NOT <y.Type> (SUBEcarrymask <y.Type> (CMPUconst y [15])))))
   216  (Rsh16x32 <t> x y) -> (SRAW <t> (MOVHreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst y [15])))))
   217  (Rsh16x16 <t> x y) -> (SRAW <t> (MOVHreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVHZreg y) [15])))))
   218  (Rsh16x8  <t> x y) -> (SRAW <t> (MOVHreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [15])))))
   219  
   220  (Rsh8x64 <t> x y)  -> (SRAW <t> (MOVBreg x) (OR <y.Type> y (NOT <y.Type> (SUBEcarrymask <y.Type> (CMPUconst y [7])))))
   221  (Rsh8x32 <t> x y)  -> (SRAW <t> (MOVBreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst y [7])))))
   222  (Rsh8x16 <t> x y)  -> (SRAW <t> (MOVBreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVHZreg y) [7])))))
   223  (Rsh8x8  <t> x y)  -> (SRAW <t> (MOVBreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [7])))))
   224  
   225  // Lowering comparisons
   226  (Less64  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   227  (Less32  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   228  (Less16  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
   229  (Less8   x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   230  (Less64U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   231  (Less32U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   232  (Less16U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
   233  (Less8U  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
   234  // Use SETG with reversed operands to dodge NaN case.
   235  (Less64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
   236  (Less32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
   237  
   238  (Leq64  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   239  (Leq32  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   240  (Leq16  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
   241  (Leq8   x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   242  (Leq64U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   243  (Leq32U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   244  (Leq16U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
   245  (Leq8U  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
   246  // Use SETGE with reversed operands to dodge NaN case.
   247  (Leq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
   248  (Leq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
   249  
   250  (Greater64  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   251  (Greater32  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   252  (Greater16  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
   253  (Greater8   x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   254  (Greater64U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   255  (Greater32U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   256  (Greater16U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
   257  (Greater8U  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
   258  (Greater64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   259  (Greater32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   260  
   261  (Geq64  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   262  (Geq32  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   263  (Geq16  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
   264  (Geq8   x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   265  (Geq64U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
   266  (Geq32U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
   267  (Geq16U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
   268  (Geq8U  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
   269  (Geq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   270  (Geq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   271  
   272  (Eq64  x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   273  (Eq32  x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   274  (Eq16  x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
   275  (Eq8   x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   276  (EqB   x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   277  (EqPtr x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   278  (Eq64F x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   279  (Eq32F x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   280  
   281  (Neq64  x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   282  (Neq32  x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
   283  (Neq16  x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
   284  (Neq8   x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   285  (NeqB   x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
   286  (NeqPtr x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
   287  (Neq64F x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
   288  (Neq32F x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
   289  
   290  // Lowering loads
   291  (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
   292  (Load <t> ptr mem) && is32BitInt(t) && isSigned(t) -> (MOVWload ptr mem)
   293  (Load <t> ptr mem) && is32BitInt(t) && !isSigned(t) -> (MOVWZload ptr mem)
   294  (Load <t> ptr mem) && is16BitInt(t) && isSigned(t) -> (MOVHload ptr mem)
   295  (Load <t> ptr mem) && is16BitInt(t) && !isSigned(t) -> (MOVHZload ptr mem)
   296  (Load <t> ptr mem) && is8BitInt(t) && isSigned(t) -> (MOVBload ptr mem)
   297  (Load <t> ptr mem) && (t.IsBoolean() || (is8BitInt(t) && !isSigned(t))) -> (MOVBZload ptr mem)
   298  (Load <t> ptr mem) && is32BitFloat(t) -> (FMOVSload ptr mem)
   299  (Load <t> ptr mem) && is64BitFloat(t) -> (FMOVDload ptr mem)
   300  
   301  // Lowering stores
   302  // These more-specific FP versions of Store pattern should come first.
   303  (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem)
   304  (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem)
   305  
   306  (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVDstore ptr val mem)
   307  (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVWstore ptr val mem)
   308  (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
   309  (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
   310  
   311  // Lowering moves
   312  
   313  // Load and store for small copies.
   314  (Move [0] _ _ mem) -> mem
   315  (Move [1] dst src mem) -> (MOVBstore dst (MOVBZload src mem) mem)
   316  (Move [2] dst src mem) -> (MOVHstore dst (MOVHZload src mem) mem)
   317  (Move [4] dst src mem) -> (MOVWstore dst (MOVWZload src mem) mem)
   318  (Move [8] dst src mem) -> (MOVDstore dst (MOVDload src mem) mem)
   319  (Move [16] dst src mem) ->
   320  	(MOVDstore [8] dst (MOVDload [8] src mem)
   321  		(MOVDstore dst (MOVDload src mem) mem))
   322  (Move [24] dst src mem) ->
   323          (MOVDstore [16] dst (MOVDload [16] src mem)
   324  	        (MOVDstore [8] dst (MOVDload [8] src mem)
   325                  (MOVDstore dst (MOVDload src mem) mem)))
   326  (Move [3] dst src mem) ->
   327  	(MOVBstore [2] dst (MOVBZload [2] src mem)
   328  		(MOVHstore dst (MOVHZload src mem) mem))
   329  (Move [5] dst src mem) ->
   330  	(MOVBstore [4] dst (MOVBZload [4] src mem)
   331  		(MOVWstore dst (MOVWZload src mem) mem))
   332  (Move [6] dst src mem) ->
   333  	(MOVHstore [4] dst (MOVHZload [4] src mem)
   334  		(MOVWstore dst (MOVWZload src mem) mem))
   335  (Move [7] dst src mem) ->
   336  	(MOVBstore [6] dst (MOVBZload [6] src mem)
   337  		(MOVHstore [4] dst (MOVHZload [4] src mem)
   338  			(MOVWstore dst (MOVWZload src mem) mem)))
   339  
   340  // MVC for other moves. Use up to 4 instructions (sizes up to 1024 bytes).
   341  (Move [s] dst src mem) && s > 0 && s <= 256 ->
   342  	(MVC [makeValAndOff(s, 0)] dst src mem)
   343  (Move [s] dst src mem) && s > 256 && s <= 512 ->
   344  	(MVC [makeValAndOff(s-256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))
   345  (Move [s] dst src mem) && s > 512 && s <= 768 ->
   346  	(MVC [makeValAndOff(s-512, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem)))
   347  (Move [s] dst src mem) && s > 768 && s <= 1024 ->
   348  	(MVC [makeValAndOff(s-768, 768)] dst src (MVC [makeValAndOff(256, 512)] dst src (MVC [makeValAndOff(256, 256)] dst src (MVC [makeValAndOff(256, 0)] dst src mem))))
   349  
   350  // Move more than 1024 bytes using a loop.
   351  (Move [s] dst src mem) && s > 1024 ->
   352  	(LoweredMove [s%256] dst src (ADDconst <src.Type> src [(s/256)*256]) mem)
   353  
   354  // Lowering Zero instructions
   355  (Zero [0] _ mem) -> mem
   356  (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
   357  (Zero [2] destptr mem) -> (MOVHstoreconst [0] destptr mem)
   358  (Zero [4] destptr mem) -> (MOVWstoreconst [0] destptr mem)
   359  (Zero [8] destptr mem) -> (MOVDstoreconst [0] destptr mem)
   360  (Zero [3] destptr mem) ->
   361  	(MOVBstoreconst [makeValAndOff(0,2)] destptr
   362  		(MOVHstoreconst [0] destptr mem))
   363  (Zero [5] destptr mem) ->
   364  	(MOVBstoreconst [makeValAndOff(0,4)] destptr
   365  		(MOVWstoreconst [0] destptr mem))
   366  (Zero [6] destptr mem) ->
   367  	(MOVHstoreconst [makeValAndOff(0,4)] destptr
   368  		(MOVWstoreconst [0] destptr mem))
   369  (Zero [7] destptr mem) ->
   370  	(MOVWstoreconst [makeValAndOff(0,3)] destptr
   371  		(MOVWstoreconst [0] destptr mem))
   372  
   373  (Zero [s] destptr mem) && s > 0 && s <= 1024 ->
   374  	(CLEAR [makeValAndOff(s, 0)] destptr mem)
   375  
   376  // Move more than 1024 bytes using a loop.
   377  (Zero [s] destptr mem) && s > 1024 ->
   378  	(LoweredZero [s%256] destptr (ADDconst <destptr.Type> destptr [(s/256)*256]) mem)
   379  
   380  // Lowering constants
   381  (Const(64|32|16|8)  [val]) -> (MOVDconst [val])
   382  (Const(32|64)F [val]) -> (FMOV(S|D)const [val])
   383  (ConstNil) -> (MOVDconst [0])
   384  (ConstBool [b]) -> (MOVDconst [b])
   385  
   386  // Lowering calls
   387  (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem)
   388  (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem)
   389  (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem)
   390  
   391  // Miscellaneous
   392  (Convert <t> x mem) -> (MOVDconvert <t> x mem)
   393  (IsNonNil p) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPconst p [0]))
   394  (IsInBounds idx len) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
   395  (IsSliceInBounds idx len) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU idx len))
   396  (NilCheck ptr mem) -> (LoweredNilCheck ptr mem)
   397  (GetG mem) -> (LoweredGetG mem)
   398  (GetClosurePtr) -> (LoweredGetClosurePtr)
   399  (GetCallerSP) -> (LoweredGetCallerSP)
   400  (Addr {sym} base) -> (MOVDaddr {sym} base)
   401  (ITab (Load ptr mem)) -> (MOVDload ptr mem)
   402  
   403  // block rewrites
   404  (If (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (LT cmp yes no)
   405  (If (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (LE cmp yes no)
   406  (If (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GT cmp yes no)
   407  (If (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GE cmp yes no)
   408  (If (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (EQ cmp yes no)
   409  (If (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (NE cmp yes no)
   410  
   411  // Special case for floating point - LF/LEF not generated.
   412  (If (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GTF cmp yes no)
   413  (If (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GEF cmp yes no)
   414  
   415  (If cond yes no) -> (NE (CMPWconst [0] (MOVBZreg <typ.Bool> cond)) yes no)
   416  
   417  // Write barrier.
   418  (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem)
   419  
   420  // ***************************
   421  // Above: lowering rules
   422  // Below: optimizations
   423  // ***************************
   424  // TODO: Should the optimizations be a separate pass?
   425  
   426  // Fold unnecessary type conversions.
   427  (MOVDreg <t> x) && t.Compare(x.Type) == types.CMPeq -> x
   428  (MOVDnop <t> x) && t.Compare(x.Type) == types.CMPeq -> x
   429  
   430  // Propagate constants through type conversions.
   431  (MOVDreg (MOVDconst [c])) -> (MOVDconst [c])
   432  (MOVDnop (MOVDconst [c])) -> (MOVDconst [c])
   433  
   434  // If a register move has only 1 use, just use the same register without emitting instruction.
   435  // MOVDnop doesn't emit instruction, only for ensuring the type.
   436  (MOVDreg x) && x.Uses == 1 -> (MOVDnop x)
   437  
   438  // Fold type changes into loads.
   439  (MOVDreg <t> x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <t> [off] {sym} ptr mem)
   440  (MOVDreg <t> x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <t> [off] {sym} ptr mem)
   441  (MOVDreg <t> x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <t> [off] {sym} ptr mem)
   442  (MOVDreg <t> x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <t> [off] {sym} ptr mem)
   443  (MOVDreg <t> x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <t> [off] {sym} ptr mem)
   444  (MOVDreg <t> x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <t> [off] {sym} ptr mem)
   445  (MOVDreg <t> x:(MOVDload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload  <t> [off] {sym} ptr mem)
   446  
   447  (MOVDnop <t> x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <t> [off] {sym} ptr mem)
   448  (MOVDnop <t> x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <t> [off] {sym} ptr mem)
   449  (MOVDnop <t> x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <t> [off] {sym} ptr mem)
   450  (MOVDnop <t> x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <t> [off] {sym} ptr mem)
   451  (MOVDnop <t> x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <t> [off] {sym} ptr mem)
   452  (MOVDnop <t> x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <t> [off] {sym} ptr mem)
   453  (MOVDnop <t> x:(MOVDload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload  <t> [off] {sym} ptr mem)
   454  
   455  (MOVDreg <t> x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
   456  (MOVDreg <t> x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <t> [off] {sym} ptr idx mem)
   457  (MOVDreg <t> x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
   458  (MOVDreg <t> x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <t> [off] {sym} ptr idx mem)
   459  (MOVDreg <t> x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
   460  (MOVDreg <t> x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <t> [off] {sym} ptr idx mem)
   461  (MOVDreg <t> x:(MOVDloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
   462  
   463  (MOVDnop <t> x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <t> [off] {sym} ptr idx mem)
   464  (MOVDnop <t> x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <t> [off] {sym} ptr idx mem)
   465  (MOVDnop <t> x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <t> [off] {sym} ptr idx mem)
   466  (MOVDnop <t> x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <t> [off] {sym} ptr idx mem)
   467  (MOVDnop <t> x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <t> [off] {sym} ptr idx mem)
   468  (MOVDnop <t> x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <t> [off] {sym} ptr idx mem)
   469  (MOVDnop <t> x:(MOVDloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDloadidx  <t> [off] {sym} ptr idx mem)
   470  
   471  // Fold sign extensions into conditional moves of constants.
   472  // Designed to remove the MOVBZreg inserted by the If lowering.
   473  (MOVBZreg x:(MOVDLT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   474  (MOVBZreg x:(MOVDLE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   475  (MOVBZreg x:(MOVDGT (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   476  (MOVBZreg x:(MOVDGE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   477  (MOVBZreg x:(MOVDEQ (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   478  (MOVBZreg x:(MOVDNE (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   479  (MOVBZreg x:(MOVDGTnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   480  (MOVBZreg x:(MOVDGEnoinv (MOVDconst [c]) (MOVDconst [d]) _)) && int64(uint8(c)) == c && int64(uint8(d)) == d -> (MOVDreg x)
   481  
   482  // Fold boolean tests into blocks.
   483  (NE (CMPWconst [0] (MOVDLT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LT cmp yes no)
   484  (NE (CMPWconst [0] (MOVDLE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (LE cmp yes no)
   485  (NE (CMPWconst [0] (MOVDGT (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GT cmp yes no)
   486  (NE (CMPWconst [0] (MOVDGE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GE cmp yes no)
   487  (NE (CMPWconst [0] (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (EQ cmp yes no)
   488  (NE (CMPWconst [0] (MOVDNE (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (NE cmp yes no)
   489  (NE (CMPWconst [0] (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GTF cmp yes no)
   490  (NE (CMPWconst [0] (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp)) yes no) -> (GEF cmp yes no)
   491  
   492  // Fold constants into instructions.
   493  (ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
   494  (ADDW x (MOVDconst [c])) -> (ADDWconst [int64(int32(c))] x)
   495  
   496  (SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
   497  (SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
   498  (SUBW x (MOVDconst [c])) -> (SUBWconst x [int64(int32(c))])
   499  (SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [int64(int32(c))]))
   500  
   501  (MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
   502  (MULLW x (MOVDconst [c])) -> (MULLWconst [int64(int32(c))] x)
   503  
   504  // NILF instructions leave the high 32 bits unchanged which is
   505  // equivalent to the leftmost 32 bits being set.
   506  // TODO(mundaym): modify the assembler to accept 64-bit values
   507  // and use isU32Bit(^c).
   508  (AND x (MOVDconst [c])) && is32Bit(c) && c < 0 -> (ANDconst [c] x)
   509  (ANDW x (MOVDconst [c])) -> (ANDWconst [int64(int32(c))] x)
   510  
   511  (ANDWconst [c] (ANDWconst [d] x)) -> (ANDWconst [c & d] x)
   512  (ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c & d] x)
   513  
   514  (OR x (MOVDconst [c])) && isU32Bit(c) -> (ORconst [c] x)
   515  (ORW x (MOVDconst [c])) -> (ORWconst [int64(int32(c))] x)
   516  
   517  (XOR x (MOVDconst [c])) && isU32Bit(c) -> (XORconst [c] x)
   518  (XORW x (MOVDconst [c])) -> (XORWconst [int64(int32(c))] x)
   519  
   520  (SLD x (MOVDconst [c])) -> (SLDconst [c&63] x)
   521  (SLW x (MOVDconst [c])) -> (SLWconst [c&63] x)
   522  (SRD x (MOVDconst [c])) -> (SRDconst [c&63] x)
   523  (SRW x (MOVDconst [c])) -> (SRWconst [c&63] x)
   524  (SRAD x (MOVDconst [c])) -> (SRADconst [c&63] x)
   525  (SRAW x (MOVDconst [c])) -> (SRAWconst [c&63] x)
   526  
   527  (SRAW x (ANDWconst [63] y)) -> (SRAW x y)
   528  (SRAD x (ANDconst [63] y)) -> (SRAD x y)
   529  (SLW x (ANDWconst [63] y)) -> (SLW x y)
   530  (SLD x (ANDconst [63] y)) -> (SLD x y)
   531  (SRW x (ANDWconst [63] y)) -> (SRW x y)
   532  (SRD x (ANDconst [63] y)) -> (SRD x y)
   533  
   534  // Rotate generation
   535  (ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
   536  ( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
   537  (XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (RLLGconst [c] x)
   538  
   539  (ADDW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
   540  ( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
   541  (XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (RLLconst [c] x)
   542  
   543  (CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
   544  (CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
   545  (CMPW x (MOVDconst [c])) -> (CMPWconst x [int64(int32(c))])
   546  (CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int32(c))]))
   547  (CMPU x (MOVDconst [c])) && isU32Bit(c) -> (CMPUconst x [int64(int32(c))])
   548  (CMPU (MOVDconst [c]) x) && isU32Bit(c) -> (InvertFlags (CMPUconst x [int64(int32(c))]))
   549  (CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
   550  (CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
   551  
   552  // Using MOV{W,H,B}Zreg instead of AND is cheaper.
   553  (AND x (MOVDconst [0xFF])) -> (MOVBZreg x)
   554  (AND x (MOVDconst [0xFFFF])) -> (MOVHZreg x)
   555  (AND x (MOVDconst [0xFFFFFFFF])) -> (MOVWZreg x)
   556  (ANDWconst [0xFF] x) -> (MOVBZreg x)
   557  (ANDWconst [0xFFFF] x) -> (MOVHZreg x)
   558  
   559  // strength reduction
   560  (MULLDconst [-1] x) -> (NEG x)
   561  (MULLDconst [0] _) -> (MOVDconst [0])
   562  (MULLDconst [1] x) -> x
   563  (MULLDconst [c] x) && isPowerOfTwo(c) -> (SLDconst [log2(c)] x)
   564  (MULLDconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
   565  (MULLDconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
   566  
   567  (MULLWconst [-1] x) -> (NEGW x)
   568  (MULLWconst [0] _) -> (MOVDconst [0])
   569  (MULLWconst [1] x) -> x
   570  (MULLWconst [c] x) && isPowerOfTwo(c) -> (SLWconst [log2(c)] x)
   571  (MULLWconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
   572  (MULLWconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
   573  
   574  // Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
   575  (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
   576  (ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
   577  (ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB && idx.Op != OpSB -> (MOVDaddridx [c] {s} ptr idx)
   578  
   579  // fold ADDconst into MOVDaddrx
   580  (ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
   581  (MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(c+d) && x.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
   582  (MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(c+d) && y.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
   583  
   584  // reverse ordering of compare instruction
   585  (MOVDLT x y (InvertFlags cmp)) -> (MOVDGT x y cmp)
   586  (MOVDGT x y (InvertFlags cmp)) -> (MOVDLT x y cmp)
   587  (MOVDLE x y (InvertFlags cmp)) -> (MOVDGE x y cmp)
   588  (MOVDGE x y (InvertFlags cmp)) -> (MOVDLE x y cmp)
   589  (MOVDEQ x y (InvertFlags cmp)) -> (MOVDEQ x y cmp)
   590  (MOVDNE x y (InvertFlags cmp)) -> (MOVDNE x y cmp)
   591  
   592  // don't extend after proper load
   593  (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
   594  (MOVBZreg x:(MOVBZload _ _)) -> (MOVDreg x)
   595  (MOVHreg x:(MOVBload _ _)) -> (MOVDreg x)
   596  (MOVHreg x:(MOVBZload _ _)) -> (MOVDreg x)
   597  (MOVHreg x:(MOVHload _ _)) -> (MOVDreg x)
   598  (MOVHZreg x:(MOVBZload _ _)) -> (MOVDreg x)
   599  (MOVHZreg x:(MOVHZload _ _)) -> (MOVDreg x)
   600  (MOVWreg x:(MOVBload _ _)) -> (MOVDreg x)
   601  (MOVWreg x:(MOVBZload _ _)) -> (MOVDreg x)
   602  (MOVWreg x:(MOVHload _ _)) -> (MOVDreg x)
   603  (MOVWreg x:(MOVHZload _ _)) -> (MOVDreg x)
   604  (MOVWreg x:(MOVWload _ _)) -> (MOVDreg x)
   605  (MOVWZreg x:(MOVBZload _ _)) -> (MOVDreg x)
   606  (MOVWZreg x:(MOVHZload _ _)) -> (MOVDreg x)
   607  (MOVWZreg x:(MOVWZload _ _)) -> (MOVDreg x)
   608  
   609  // don't extend if argument is already extended
   610  (MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> (MOVDreg x)
   611  (MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> (MOVDreg x)
   612  (MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> (MOVDreg x)
   613  (MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> (MOVDreg x)
   614  (MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> (MOVDreg x)
   615  (MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> (MOVDreg x)
   616  
   617  // fold double extensions
   618  (MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
   619  (MOVBZreg x:(MOVBZreg _)) -> (MOVDreg x)
   620  (MOVHreg x:(MOVBreg _)) -> (MOVDreg x)
   621  (MOVHreg x:(MOVBZreg _)) -> (MOVDreg x)
   622  (MOVHreg x:(MOVHreg _)) -> (MOVDreg x)
   623  (MOVHZreg x:(MOVBZreg _)) -> (MOVDreg x)
   624  (MOVHZreg x:(MOVHZreg _)) -> (MOVDreg x)
   625  (MOVWreg x:(MOVBreg _)) -> (MOVDreg x)
   626  (MOVWreg x:(MOVBZreg _)) -> (MOVDreg x)
   627  (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
   628  (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
   629  (MOVWreg x:(MOVWreg _)) -> (MOVDreg x)
   630  (MOVWZreg x:(MOVBZreg _)) -> (MOVDreg x)
   631  (MOVWZreg x:(MOVHZreg _)) -> (MOVDreg x)
   632  (MOVWZreg x:(MOVWZreg _)) -> (MOVDreg x)
   633  
   634  // fold extensions into constants
   635  (MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
   636  (MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
   637  (MOVHreg (MOVDconst [c])) -> (MOVDconst [int64(int16(c))])
   638  (MOVHZreg (MOVDconst [c])) -> (MOVDconst [int64(uint16(c))])
   639  (MOVWreg (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
   640  (MOVWZreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
   641  
   642  // sign extended loads
   643  // Note: The combined instruction must end up in the same block
   644  // as the original load. If not, we end up making a value with
   645  // memory type live in two different blocks, which can lead to
   646  // multiple memory values alive simultaneously.
   647  // Make sure we don't combine these ops if the load has another use.
   648  // This prevents a single load from being split into multiple loads
   649  // which then might return different values.  See test/atomicload.go.
   650  (MOVBreg  x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <v.Type> [off] {sym} ptr mem)
   651  (MOVBreg  x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload  <v.Type> [off] {sym} ptr mem)
   652  (MOVBZreg x:(MOVBZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <v.Type> [off] {sym} ptr mem)
   653  (MOVBZreg x:(MOVBload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZload <v.Type> [off] {sym} ptr mem)
   654  (MOVHreg  x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <v.Type> [off] {sym} ptr mem)
   655  (MOVHreg  x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHload  <v.Type> [off] {sym} ptr mem)
   656  (MOVHZreg x:(MOVHZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
   657  (MOVHZreg x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZload <v.Type> [off] {sym} ptr mem)
   658  (MOVWreg  x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <v.Type> [off] {sym} ptr mem)
   659  (MOVWreg  x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload  <v.Type> [off] {sym} ptr mem)
   660  (MOVWZreg x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
   661  (MOVWZreg x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
   662  
   663  (MOVBreg  x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <v.Type> [off] {sym} ptr idx mem)
   664  (MOVBreg  x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx  <v.Type> [off] {sym} ptr idx mem)
   665  (MOVBZreg x:(MOVBZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <v.Type> [off] {sym} ptr idx mem)
   666  (MOVBZreg x:(MOVBloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBZloadidx <v.Type> [off] {sym} ptr idx mem)
   667  (MOVHreg  x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <v.Type> [off] {sym} ptr idx mem)
   668  (MOVHreg  x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHloadidx  <v.Type> [off] {sym} ptr idx mem)
   669  (MOVHZreg x:(MOVHZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
   670  (MOVHZreg x:(MOVHloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
   671  (MOVWreg  x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <v.Type> [off] {sym} ptr idx mem)
   672  (MOVWreg  x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx  <v.Type> [off] {sym} ptr idx mem)
   673  (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
   674  (MOVWZreg x:(MOVWloadidx  [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
   675  
   676  // replace load from same location as preceding store with copy
   677  (MOVDload  [off] {sym} ptr1 (MOVDstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVDreg x)
   678  (MOVWload  [off] {sym} ptr1 (MOVWstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWreg x)
   679  (MOVHload  [off] {sym} ptr1 (MOVHstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHreg x)
   680  (MOVBload  [off] {sym} ptr1 (MOVBstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBreg x)
   681  (MOVWZload [off] {sym} ptr1 (MOVWstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWZreg x)
   682  (MOVHZload [off] {sym} ptr1 (MOVHstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHZreg x)
   683  (MOVBZload [off] {sym} ptr1 (MOVBstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBZreg x)
   684  (MOVDload  [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LGDR x)
   685  (FMOVDload [off] {sym} ptr1 (MOVDstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LDGR x)
   686  (FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
   687  (FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
   688  
   689  // prefer FPR <-> GPR moves over combined load ops
   690  (MULLDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (MULLD x (LGDR <t> y))
   691  (ADDload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (ADD   x (LGDR <t> y))
   692  (SUBload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (SUB   x (LGDR <t> y))
   693  (ORload    <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (OR    x (LGDR <t> y))
   694  (ANDload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (AND   x (LGDR <t> y))
   695  (XORload   <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (XOR   x (LGDR <t> y))
   696  
   697  // detect attempts to set/clear the sign bit
   698  // may need to be reworked when NIHH/OIHH are added
   699  (SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x))
   700  (LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x))
   701  (OR (MOVDconst [-1<<63]) (LGDR <t> x))     -> (LGDR <t> (LNDFR <x.Type> x))
   702  (LDGR <t> (OR (MOVDconst [-1<<63]) x))     -> (LNDFR (LDGR <t> x))
   703  
   704  // detect attempts to set the sign bit with load
   705  (LDGR <t> x:(ORload <t1> [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (LNDFR <t> (LDGR <t> (MOVDload <t1> [off] {sym} ptr mem)))
   706  
   707  // detect copysign
   708  (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
   709  (OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
   710  (CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
   711  (CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
   712  
   713  // absorb negations into set/clear sign bit
   714  (FNEG  (LPDFR x)) -> (LNDFR x)
   715  (FNEG  (LNDFR x)) -> (LPDFR x)
   716  (FNEGS (LPDFR x)) -> (LNDFR x)
   717  (FNEGS (LNDFR x)) -> (LPDFR x)
   718  
   719  // no need to convert float32 to float64 to set/clear sign bit
   720  (LEDBR (LPDFR (LDEBR x))) -> (LPDFR x)
   721  (LEDBR (LNDFR (LDEBR x))) -> (LNDFR x)
   722  
   723  // remove unnecessary FPR <-> GPR moves
   724  (LDGR (LGDR x)) -> x
   725  (LGDR (LDGR x)) -> (MOVDreg x)
   726  
   727  // Don't extend before storing
   728  (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
   729  (MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
   730  (MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
   731  (MOVWstore [off] {sym} ptr (MOVWZreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
   732  (MOVHstore [off] {sym} ptr (MOVHZreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
   733  (MOVBstore [off] {sym} ptr (MOVBZreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
   734  
   735  // Fold constants into memory operations.
   736  // Note that this is not always a good idea because if not all the uses of
   737  // the ADDconst get eliminated, we still have to compute the ADDconst and we now
   738  // have potentially two live values (ptr and (ADDconst [off] ptr)) instead of one.
   739  // Nevertheless, let's do it!
   740  (MOVDload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVDload  [off1+off2] {sym} ptr mem)
   741  (MOVWload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWload  [off1+off2] {sym} ptr mem)
   742  (MOVHload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHload  [off1+off2] {sym} ptr mem)
   743  (MOVBload   [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBload  [off1+off2] {sym} ptr mem)
   744  (MOVWZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} ptr mem)
   745  (MOVHZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} ptr mem)
   746  (MOVBZload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} ptr mem)
   747  (FMOVSload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVSload [off1+off2] {sym} ptr mem)
   748  (FMOVDload  [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVDload [off1+off2] {sym} ptr mem)
   749  
   750  (MOVDstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVDstore  [off1+off2] {sym} ptr val mem)
   751  (MOVWstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVWstore  [off1+off2] {sym} ptr val mem)
   752  (MOVHstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVHstore  [off1+off2] {sym} ptr val mem)
   753  (MOVBstore  [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVBstore  [off1+off2] {sym} ptr val mem)
   754  (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
   755  (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
   756  
   757  (ADDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload   [off1+off2] {sym} x ptr mem)
   758  (ADDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload  [off1+off2] {sym} x ptr mem)
   759  (MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
   760  (MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
   761  (SUBload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload   [off1+off2] {sym} x ptr mem)
   762  (SUBWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload  [off1+off2] {sym} x ptr mem)
   763  
   764  (ANDload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload   [off1+off2] {sym} x ptr mem)
   765  (ANDWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload  [off1+off2] {sym} x ptr mem)
   766  (ORload    [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload    [off1+off2] {sym} x ptr mem)
   767  (ORWload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload   [off1+off2] {sym} x ptr mem)
   768  (XORload   [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload   [off1+off2] {sym} x ptr mem)
   769  (XORWload  [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload  [off1+off2] {sym} x ptr mem)
   770  
   771  // Fold constants into stores.
   772  (MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
   773  	(MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
   774  (MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
   775  	(MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
   776  (MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(off) && ptr.Op != OpSB ->
   777  	(MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
   778  (MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(off) && ptr.Op != OpSB ->
   779  	(MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
   780  
   781  // Fold address offsets into constant stores.
   782  (MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
   783  	(MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   784  (MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
   785  	(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   786  (MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
   787  	(MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   788  (MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(ValAndOff(sc).Off()+off) ->
   789  	(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
   790  
   791  // Merge address calculations into loads and stores.
   792  // Offsets from SB must not be merged into unaligned memory accesses because
   793  // loads/stores using PC-relative addressing directly must be aligned to the
   794  // size of the target.
   795  (MOVDload   [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
   796  	(MOVDload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   797  (MOVWZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
   798  	(MOVWZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   799  (MOVHZload  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
   800  	(MOVHZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   801  (MOVBZload  [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   802  	(MOVBZload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
   803  (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   804  	(FMOVSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   805  (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   806  	(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   807  
   808  (MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
   809  	(MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   810  (MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
   811  	(MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   812  (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   813  	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
   814  
   815  (MOVDstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
   816  	(MOVDstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   817  (MOVWstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
   818  	(MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   819  (MOVHstore  [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.ElemType().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
   820  	(MOVHstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   821  (MOVBstore  [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   822  	(MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   823  (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   824  	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   825  (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   826  	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
   827  
   828  (ADDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   829  (ADDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   830  (MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   831  (MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   832  (SUBload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   833  (SUBWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   834  
   835  (ANDload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   836  (ANDWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   837  (ORload    [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload    [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   838  (ORWload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   839  (XORload   [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload   [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   840  (XORWload  [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload  [o1+o2] {mergeSym(s1, s2)} x ptr mem)
   841  
   842  // Cannot store constant to SB directly (no 'move relative long immediate' instructions).
   843  (MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   844  	(MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   845  (MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   846  	(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   847  (MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   848  	(MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   849  (MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
   850  	(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
   851  
   852  // generating indexed loads and stores
   853  (MOVBZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   854  	(MOVBZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   855  (MOVBload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   856  	(MOVBloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   857  (MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   858  	(MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   859  (MOVHload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   860  	(MOVHloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   861  (MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   862  	(MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   863  (MOVWload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   864  	(MOVWloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   865  (MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   866  	(MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   867  (FMOVSload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   868  	(FMOVSloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   869  (FMOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   870  	(FMOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
   871  
   872  (MOVBstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   873  	(MOVBstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   874  (MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   875  	(MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   876  (MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   877  	(MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   878  (MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   879  	(MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   880  (FMOVSstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   881  	(FMOVSstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   882  (FMOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
   883  	(FMOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
   884  
   885  (MOVBZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBZloadidx [off] {sym} ptr idx mem)
   886  (MOVBload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVBloadidx  [off] {sym} ptr idx mem)
   887  (MOVHZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHZloadidx [off] {sym} ptr idx mem)
   888  (MOVHload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVHloadidx  [off] {sym} ptr idx mem)
   889  (MOVWZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWZloadidx [off] {sym} ptr idx mem)
   890  (MOVWload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVWloadidx  [off] {sym} ptr idx mem)
   891  (MOVDload  [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (MOVDloadidx  [off] {sym} ptr idx mem)
   892  (FMOVSload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVSloadidx [off] {sym} ptr idx mem)
   893  (FMOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB -> (FMOVDloadidx [off] {sym} ptr idx mem)
   894  
   895  (MOVBstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVBstoreidx  [off] {sym} ptr idx val mem)
   896  (MOVHstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVHstoreidx  [off] {sym} ptr idx val mem)
   897  (MOVWstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVWstoreidx  [off] {sym} ptr idx val mem)
   898  (MOVDstore  [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (MOVDstoreidx  [off] {sym} ptr idx val mem)
   899  (FMOVSstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVSstoreidx [off] {sym} ptr idx val mem)
   900  (FMOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB -> (FMOVDstoreidx [off] {sym} ptr idx val mem)
   901  
   902  // combine ADD into indexed loads and stores
   903  (MOVBZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
   904  (MOVBloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBloadidx  [c+d] {sym} ptr idx mem)
   905  (MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
   906  (MOVHloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHloadidx  [c+d] {sym} ptr idx mem)
   907  (MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
   908  (MOVWloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWloadidx  [c+d] {sym} ptr idx mem)
   909  (MOVDloadidx  [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVDloadidx  [c+d] {sym} ptr idx mem)
   910  (FMOVSloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
   911  (FMOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
   912  
   913  (MOVBstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVBstoreidx  [c+d] {sym} ptr idx val mem)
   914  (MOVHstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVHstoreidx  [c+d] {sym} ptr idx val mem)
   915  (MOVWstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVWstoreidx  [c+d] {sym} ptr idx val mem)
   916  (MOVDstoreidx  [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVDstoreidx  [c+d] {sym} ptr idx val mem)
   917  (FMOVSstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
   918  (FMOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
   919  
   920  (MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
   921  (MOVBloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBloadidx  [c+d] {sym} ptr idx mem)
   922  (MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
   923  (MOVHloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHloadidx  [c+d] {sym} ptr idx mem)
   924  (MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
   925  (MOVWloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWloadidx  [c+d] {sym} ptr idx mem)
   926  (MOVDloadidx  [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVDloadidx  [c+d] {sym} ptr idx mem)
   927  (FMOVSloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
   928  (FMOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
   929  
   930  (MOVBstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVBstoreidx  [c+d] {sym} ptr idx val mem)
   931  (MOVHstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVHstoreidx  [c+d] {sym} ptr idx val mem)
   932  (MOVWstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVWstoreidx  [c+d] {sym} ptr idx val mem)
   933  (MOVDstoreidx  [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVDstoreidx  [c+d] {sym} ptr idx val mem)
   934  (FMOVSstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
   935  (FMOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
   936  
   937  // MOVDaddr into MOVDaddridx
   938  (MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
   939         (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
   940  (MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
   941         (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
   942  
   943  // Absorb InvertFlags into branches.
   944  ((LT|GT|LE|GE|EQ|NE) (InvertFlags cmp) yes no) -> ((GT|LT|GE|LE|EQ|NE) cmp yes no)
   945  
   946  // Constant comparisons.
   947  (CMPconst (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
   948  (CMPconst (MOVDconst [x]) [y]) && x<y -> (FlagLT)
   949  (CMPconst (MOVDconst [x]) [y]) && x>y -> (FlagGT)
   950  (CMPUconst (MOVDconst [x]) [y]) && uint64(x)==uint64(y) -> (FlagEQ)
   951  (CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) -> (FlagLT)
   952  (CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) -> (FlagGT)
   953  
   954  (CMPWconst (MOVDconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ)
   955  (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) -> (FlagLT)
   956  (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) -> (FlagGT)
   957  (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)==uint32(y) -> (FlagEQ)
   958  (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)<uint32(y) -> (FlagLT)
   959  (CMPWUconst (MOVDconst [x]) [y]) && uint32(x)>uint32(y) -> (FlagGT)
   960  
   961  // Other known comparisons.
   962  (CMPconst (MOVBZreg _) [c]) && 0xFF < c -> (FlagLT)
   963  (CMPconst (MOVHZreg _) [c]) && 0xFFFF < c -> (FlagLT)
   964  (CMPconst (MOVWZreg _) [c]) && 0xFFFFFFFF < c -> (FlagLT)
   965  (CMPWconst (SRWconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT)
   966  (CMPconst (SRDconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT)
   967  (CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT)
   968  (CMPWconst (ANDWconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT)
   969  
   970  // Absorb flag constants into SBB ops.
   971  (SUBEcarrymask (FlagEQ)) -> (MOVDconst [-1])
   972  (SUBEcarrymask (FlagLT)) -> (MOVDconst [-1])
   973  (SUBEcarrymask (FlagGT)) -> (MOVDconst [0])
   974  (SUBEWcarrymask (FlagEQ)) -> (MOVDconst [-1])
   975  (SUBEWcarrymask (FlagLT)) -> (MOVDconst [-1])
   976  (SUBEWcarrymask (FlagGT)) -> (MOVDconst [0])
   977  
   978  // Absorb flag constants into branches.
   979  (EQ (FlagEQ) yes no) -> (First nil yes no)
   980  (EQ (FlagLT) yes no) -> (First nil no yes)
   981  (EQ (FlagGT) yes no) -> (First nil no yes)
   982  
   983  (NE (FlagEQ) yes no) -> (First nil no yes)
   984  (NE (FlagLT) yes no) -> (First nil yes no)
   985  (NE (FlagGT) yes no) -> (First nil yes no)
   986  
   987  (LT (FlagEQ) yes no) -> (First nil no yes)
   988  (LT (FlagLT) yes no) -> (First nil yes no)
   989  (LT (FlagGT) yes no) -> (First nil no yes)
   990  
   991  (LE (FlagEQ) yes no) -> (First nil yes no)
   992  (LE (FlagLT) yes no) -> (First nil yes no)
   993  (LE (FlagGT) yes no) -> (First nil no yes)
   994  
   995  (GT (FlagEQ) yes no) -> (First nil no yes)
   996  (GT (FlagLT) yes no) -> (First nil no yes)
   997  (GT (FlagGT) yes no) -> (First nil yes no)
   998  
   999  (GE (FlagEQ) yes no) -> (First nil yes no)
  1000  (GE (FlagLT) yes no) -> (First nil no yes)
  1001  (GE (FlagGT) yes no) -> (First nil yes no)
  1002  
  1003  // Absorb flag constants into SETxx ops.
  1004  (MOVDEQ _ x (FlagEQ)) -> x
  1005  (MOVDEQ y _ (FlagLT)) -> y
  1006  (MOVDEQ y _ (FlagGT)) -> y
  1007  
  1008  (MOVDNE y _ (FlagEQ)) -> y
  1009  (MOVDNE _ x (FlagLT)) -> x
  1010  (MOVDNE _ x (FlagGT)) -> x
  1011  
  1012  (MOVDLT y _ (FlagEQ)) -> y
  1013  (MOVDLT _ x (FlagLT)) -> x
  1014  (MOVDLT y _ (FlagGT)) -> y
  1015  
  1016  (MOVDLE _ x (FlagEQ)) -> x
  1017  (MOVDLE _ x (FlagLT)) -> x
  1018  (MOVDLE y _ (FlagGT)) -> y
  1019  
  1020  (MOVDGT y _ (FlagEQ)) -> y
  1021  (MOVDGT y _ (FlagLT)) -> y
  1022  (MOVDGT _ x (FlagGT)) -> x
  1023  
  1024  (MOVDGE _ x (FlagEQ)) -> x
  1025  (MOVDGE y _ (FlagLT)) -> y
  1026  (MOVDGE _ x (FlagGT)) -> x
  1027  
  1028  // Remove redundant *const ops
  1029  (ADDconst [0] x) -> x
  1030  (ADDWconst [c] x) && int32(c)==0 -> x
  1031  (SUBconst [0] x) -> x
  1032  (SUBWconst [c] x) && int32(c) == 0 -> x
  1033  (ANDconst [0] _)                 -> (MOVDconst [0])
  1034  (ANDWconst [c] _) && int32(c)==0  -> (MOVDconst [0])
  1035  (ANDconst [-1] x)                -> x
  1036  (ANDWconst [c] x) && int32(c)==-1 -> x
  1037  (ORconst [0] x)                  -> x
  1038  (ORWconst [c] x) && int32(c)==0   -> x
  1039  (ORconst [-1] _)                 -> (MOVDconst [-1])
  1040  (ORWconst [c] _) && int32(c)==-1  -> (MOVDconst [-1])
  1041  (XORconst [0] x)                  -> x
  1042  (XORWconst [c] x) && int32(c)==0   -> x
  1043  
  1044  // Convert constant subtracts to constant adds.
  1045  (SUBconst [c] x) && c != -(1<<31) -> (ADDconst [-c] x)
  1046  (SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
  1047  
  1048  // generic constant folding
  1049  // TODO: more of this
  1050  (ADDconst [c] (MOVDconst [d])) -> (MOVDconst [c+d])
  1051  (ADDWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c+d))])
  1052  (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
  1053  (ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [int64(int32(c+d))] x)
  1054  (SUBconst (MOVDconst [d]) [c]) -> (MOVDconst [d-c])
  1055  (SUBconst (SUBconst x [d]) [c]) && is32Bit(-c-d) -> (ADDconst [-c-d] x)
  1056  (SRADconst [c] (MOVDconst [d])) -> (MOVDconst [d>>uint64(c)])
  1057  (SRAWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(d))>>uint64(c)])
  1058  (NEG (MOVDconst [c])) -> (MOVDconst [-c])
  1059  (NEGW (MOVDconst [c])) -> (MOVDconst [int64(int32(-c))])
  1060  (MULLDconst [c] (MOVDconst [d])) -> (MOVDconst [c*d])
  1061  (MULLWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c*d))])
  1062  (AND (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c&d])
  1063  (ANDconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
  1064  (ANDWconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
  1065  (OR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c|d])
  1066  (ORconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
  1067  (ORWconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
  1068  (XOR (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [c^d])
  1069  (XORconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
  1070  (XORWconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
  1071  (LoweredRound32F x:(FMOVSconst)) -> x
  1072  (LoweredRound64F x:(FMOVDconst)) -> x
  1073  
  1074  // generic simplifications
  1075  // TODO: more of this
  1076  (ADD x (NEG y)) -> (SUB x y)
  1077  (ADDW x (NEGW y)) -> (SUBW x y)
  1078  (SUB x x) -> (MOVDconst [0])
  1079  (SUBW x x) -> (MOVDconst [0])
  1080  (AND x x) -> x
  1081  (ANDW x x) -> x
  1082  (OR x x) -> x
  1083  (ORW x x) -> x
  1084  (XOR x x) -> (MOVDconst [0])
  1085  (XORW x x) -> (MOVDconst [0])
  1086  (NEG (ADDconst [c] (NEG x))) && c != -(1<<31) -> (ADDconst [-c] x)
  1087  
  1088  // fused multiply-add
  1089  (FADD (FMUL y z) x) -> (FMADD x y z)
  1090  (FADDS (FMULS y z) x) -> (FMADDS x y z)
  1091  (FSUB (FMUL y z) x) -> (FMSUB x y z)
  1092  (FSUBS (FMULS y z) x) -> (FMSUBS x y z)
  1093  
  1094  // Fold memory operations into operations.
  1095  // Exclude global data (SB) because these instructions cannot handle relative addresses.
  1096  // TODO(mundaym): use LARL in the assembler to handle SB?
  1097  // TODO(mundaym): indexed versions of these?
  1098  (ADD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1099  	-> (ADDload <t> [off] {sym} x ptr mem)
  1100  (ADD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1101  	-> (ADDload <t> [off] {sym} x ptr mem)
  1102  (ADDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1103  	-> (ADDWload <t> [off] {sym} x ptr mem)
  1104  (ADDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1105  	-> (ADDWload <t> [off] {sym} x ptr mem)
  1106  (ADDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1107  	-> (ADDWload <t> [off] {sym} x ptr mem)
  1108  (ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1109  	-> (ADDWload <t> [off] {sym} x ptr mem)
  1110  (MULLD <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1111  	-> (MULLDload <t> [off] {sym} x ptr mem)
  1112  (MULLD <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1113  	-> (MULLDload <t> [off] {sym} x ptr mem)
  1114  (MULLW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1115  	-> (MULLWload <t> [off] {sym} x ptr mem)
  1116  (MULLW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1117  	-> (MULLWload <t> [off] {sym} x ptr mem)
  1118  (MULLW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1119  	-> (MULLWload <t> [off] {sym} x ptr mem)
  1120  (MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1121  	-> (MULLWload <t> [off] {sym} x ptr mem)
  1122  (SUB <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1123  	-> (SUBload <t> [off] {sym} x ptr mem)
  1124  (SUBW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1125  	-> (SUBWload <t> [off] {sym} x ptr mem)
  1126  (SUBW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1127  	-> (SUBWload <t> [off] {sym} x ptr mem)
  1128  (AND <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1129  	-> (ANDload <t> [off] {sym} x ptr mem)
  1130  (AND <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1131  	-> (ANDload <t> [off] {sym} x ptr mem)
  1132  (ANDW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1133  	-> (ANDWload <t> [off] {sym} x ptr mem)
  1134  (ANDW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1135  	-> (ANDWload <t> [off] {sym} x ptr mem)
  1136  (ANDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1137  	-> (ANDWload <t> [off] {sym} x ptr mem)
  1138  (ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1139  	-> (ANDWload <t> [off] {sym} x ptr mem)
  1140  (OR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1141  	-> (ORload <t> [off] {sym} x ptr mem)
  1142  (OR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1143  	-> (ORload <t> [off] {sym} x ptr mem)
  1144  (ORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1145  	-> (ORWload <t> [off] {sym} x ptr mem)
  1146  (ORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1147  	-> (ORWload <t> [off] {sym} x ptr mem)
  1148  (ORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1149  	-> (ORWload <t> [off] {sym} x ptr mem)
  1150  (ORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1151  	-> (ORWload <t> [off] {sym} x ptr mem)
  1152  (XOR <t> x g:(MOVDload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1153  	-> (XORload <t> [off] {sym} x ptr mem)
  1154  (XOR <t> g:(MOVDload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1155  	-> (XORload <t> [off] {sym} x ptr mem)
  1156  (XORW <t> x g:(MOVWload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1157  	-> (XORWload <t> [off] {sym} x ptr mem)
  1158  (XORW <t> g:(MOVWload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1159  	-> (XORWload <t> [off] {sym} x ptr mem)
  1160  (XORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1161  	-> (XORWload <t> [off] {sym} x ptr mem)
  1162  (XORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g, x) && clobber(g)
  1163  	-> (XORWload <t> [off] {sym} x ptr mem)
  1164  
  1165  // Combine constant stores into larger (unaligned) stores.
  1166  // Avoid SB because constant stores to relative offsets are
  1167  // emulated by the assembler and also can't handle unaligned offsets.
  1168  (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
  1169    && p.Op != OpSB
  1170    && x.Uses == 1
  1171    && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
  1172    && clobber(x)
  1173    -> (MOVHstoreconst [makeValAndOff(ValAndOff(c).Val()&0xff | ValAndOff(a).Val()<<8, ValAndOff(a).Off())] {s} p mem)
  1174  (MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
  1175    && p.Op != OpSB
  1176    && x.Uses == 1
  1177    && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
  1178    && clobber(x)
  1179    -> (MOVWstore [ValAndOff(a).Off()] {s} p (MOVDconst [int64(int32(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16))]) mem)
  1180  (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
  1181    && p.Op != OpSB
  1182    && x.Uses == 1
  1183    && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
  1184    && clobber(x)
  1185    -> (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
  1186  
  1187  // Combine stores into larger (unaligned) stores.
  1188  // It doesn't work on global data (based on SB) because stores with relative addressing
  1189  // require that the memory operand be aligned.
  1190  (MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
  1191    && p.Op != OpSB
  1192    && x.Uses == 1
  1193    && clobber(x)
  1194    -> (MOVHstore [i-1] {s} p w mem)
  1195  (MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
  1196    && p.Op != OpSB
  1197    && x.Uses == 1
  1198    && clobber(x)
  1199    -> (MOVHstore [i-1] {s} p w0 mem)
  1200  (MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
  1201    && p.Op != OpSB
  1202    && x.Uses == 1
  1203    && clobber(x)
  1204    -> (MOVHstore [i-1] {s} p w mem)
  1205  (MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
  1206    && p.Op != OpSB
  1207    && x.Uses == 1
  1208    && clobber(x)
  1209    -> (MOVHstore [i-1] {s} p w0 mem)
  1210  (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
  1211    && p.Op != OpSB
  1212    && x.Uses == 1
  1213    && clobber(x)
  1214    -> (MOVWstore [i-2] {s} p w mem)
  1215  (MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
  1216    && p.Op != OpSB
  1217    && x.Uses == 1
  1218    && clobber(x)
  1219    -> (MOVWstore [i-2] {s} p w0 mem)
  1220  (MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
  1221    && p.Op != OpSB
  1222    && x.Uses == 1
  1223    && clobber(x)
  1224    -> (MOVWstore [i-2] {s} p w mem)
  1225  (MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
  1226    && p.Op != OpSB
  1227    && x.Uses == 1
  1228    && clobber(x)
  1229    -> (MOVWstore [i-2] {s} p w0 mem)
  1230  (MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
  1231    && p.Op != OpSB
  1232    && x.Uses == 1
  1233    && clobber(x)
  1234    -> (MOVDstore [i-4] {s} p w mem)
  1235  (MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
  1236    && p.Op != OpSB
  1237    && x.Uses == 1
  1238    && clobber(x)
  1239    -> (MOVDstore [i-4] {s} p w0 mem)
  1240  
  1241  (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
  1242    && x.Uses == 1
  1243    && clobber(x)
  1244    -> (MOVHstoreidx [i-1] {s} p idx w mem)
  1245  (MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
  1246    && x.Uses == 1
  1247    && clobber(x)
  1248    -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
  1249  (MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
  1250    && x.Uses == 1
  1251    && clobber(x)
  1252    -> (MOVHstoreidx [i-1] {s} p idx w mem)
  1253  (MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
  1254    && x.Uses == 1
  1255    && clobber(x)
  1256    -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
  1257  (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
  1258    && x.Uses == 1
  1259    && clobber(x)
  1260    -> (MOVWstoreidx [i-2] {s} p idx w mem)
  1261  (MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
  1262    && x.Uses == 1
  1263    && clobber(x)
  1264    -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
  1265  (MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
  1266    && x.Uses == 1
  1267    && clobber(x)
  1268    -> (MOVWstoreidx [i-2] {s} p idx w mem)
  1269  (MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
  1270    && x.Uses == 1
  1271    && clobber(x)
  1272    -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
  1273  (MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
  1274    && x.Uses == 1
  1275    && clobber(x)
  1276    -> (MOVDstoreidx [i-4] {s} p idx w mem)
  1277  (MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
  1278    && x.Uses == 1
  1279    && clobber(x)
  1280    -> (MOVDstoreidx [i-4] {s} p idx w0 mem)
  1281  
  1282  // Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
  1283  // Store-with-bytes-reversed instructions do not support relative memory addresses,
  1284  // so these stores can't operate on global data (SB).
  1285  (MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
  1286    && p.Op != OpSB
  1287    && x.Uses == 1
  1288    && clobber(x)
  1289    -> (MOVHBRstore [i-1] {s} p w mem)
  1290  (MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
  1291    && p.Op != OpSB
  1292    && x.Uses == 1
  1293    && clobber(x)
  1294    -> (MOVHBRstore [i-1] {s} p w0 mem)
  1295  (MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
  1296    && p.Op != OpSB
  1297    && x.Uses == 1
  1298    && clobber(x)
  1299    -> (MOVHBRstore [i-1] {s} p w mem)
  1300  (MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
  1301    && p.Op != OpSB
  1302    && x.Uses == 1
  1303    && clobber(x)
  1304    -> (MOVHBRstore [i-1] {s} p w0 mem)
  1305  (MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
  1306    && x.Uses == 1
  1307    && clobber(x)
  1308    -> (MOVWBRstore [i-2] {s} p w mem)
  1309  (MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
  1310    && x.Uses == 1
  1311    && clobber(x)
  1312    -> (MOVWBRstore [i-2] {s} p w0 mem)
  1313  (MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
  1314    && x.Uses == 1
  1315    && clobber(x)
  1316    -> (MOVWBRstore [i-2] {s} p w mem)
  1317  (MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
  1318    && x.Uses == 1
  1319    && clobber(x)
  1320    -> (MOVWBRstore [i-2] {s} p w0 mem)
  1321  (MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
  1322    && x.Uses == 1
  1323    && clobber(x)
  1324    -> (MOVDBRstore [i-4] {s} p w mem)
  1325  (MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
  1326    && x.Uses == 1
  1327    && clobber(x)
  1328    -> (MOVDBRstore [i-4] {s} p w0 mem)
  1329  
  1330  (MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
  1331    && x.Uses == 1
  1332    && clobber(x)
  1333    -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
  1334  (MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
  1335    && x.Uses == 1
  1336    && clobber(x)
  1337    -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
  1338  (MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
  1339    && x.Uses == 1
  1340    && clobber(x)
  1341    -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
  1342  (MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
  1343    && x.Uses == 1
  1344    && clobber(x)
  1345    -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
  1346  (MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
  1347    && x.Uses == 1
  1348    && clobber(x)
  1349    -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
  1350  (MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
  1351    && x.Uses == 1
  1352    && clobber(x)
  1353    -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
  1354  (MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
  1355    && x.Uses == 1
  1356    && clobber(x)
  1357    -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
  1358  (MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
  1359    && x.Uses == 1
  1360    && clobber(x)
  1361    -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
  1362  (MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
  1363    && x.Uses == 1
  1364    && clobber(x)
  1365    -> (MOVDBRstoreidx [i-4] {s} p idx w mem)
  1366  (MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
  1367    && x.Uses == 1
  1368    && clobber(x)
  1369    -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
  1370  
  1371  // Combining byte loads into larger (unaligned) loads.
  1372  
  1373  // Big-endian loads
  1374  
  1375  (ORW                 x1:(MOVBZload [i1] {s} p mem)
  1376      sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
  1377    && i1 == i0+1
  1378    && p.Op != OpSB
  1379    && x0.Uses == 1
  1380    && x1.Uses == 1
  1381    && sh.Uses == 1
  1382    && mergePoint(b,x0,x1) != nil
  1383    && clobber(x0)
  1384    && clobber(x1)
  1385    && clobber(sh)
  1386    -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
  1387  
  1388  (OR                  x1:(MOVBZload [i1] {s} p mem)
  1389      sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
  1390    && i1 == i0+1
  1391    && p.Op != OpSB
  1392    && x0.Uses == 1
  1393    && x1.Uses == 1
  1394    && sh.Uses == 1
  1395    && mergePoint(b,x0,x1) != nil
  1396    && clobber(x0)
  1397    && clobber(x1)
  1398    && clobber(sh)
  1399    -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
  1400  
  1401  (ORW                  x1:(MOVHZload [i1] {s} p mem)
  1402      sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
  1403    && i1 == i0+2
  1404    && p.Op != OpSB
  1405    && x0.Uses == 1
  1406    && x1.Uses == 1
  1407    && sh.Uses == 1
  1408    && mergePoint(b,x0,x1) != nil
  1409    && clobber(x0)
  1410    && clobber(x1)
  1411    && clobber(sh)
  1412    -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
  1413  
  1414  (OR                   x1:(MOVHZload [i1] {s} p mem)
  1415      sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
  1416    && i1 == i0+2
  1417    && p.Op != OpSB
  1418    && x0.Uses == 1
  1419    && x1.Uses == 1
  1420    && sh.Uses == 1
  1421    && mergePoint(b,x0,x1) != nil
  1422    && clobber(x0)
  1423    && clobber(x1)
  1424    && clobber(sh)
  1425    -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
  1426  
  1427  (OR                   x1:(MOVWZload [i1] {s} p mem)
  1428      sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
  1429    && i1 == i0+4
  1430    && p.Op != OpSB
  1431    && x0.Uses == 1
  1432    && x1.Uses == 1
  1433    && sh.Uses == 1
  1434    && mergePoint(b,x0,x1) != nil
  1435    && clobber(x0)
  1436    && clobber(x1)
  1437    && clobber(sh)
  1438    -> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
  1439  
  1440  (ORW
  1441      s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1442      or:(ORW
  1443          s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1444  	y))
  1445    && i1 == i0+1
  1446    && j1 == j0-8
  1447    && j1 % 16 == 0
  1448    && x0.Uses == 1
  1449    && x1.Uses == 1
  1450    && s0.Uses == 1
  1451    && s1.Uses == 1
  1452    && or.Uses == 1
  1453    && mergePoint(b,x0,x1) != nil
  1454    && clobber(x0)
  1455    && clobber(x1)
  1456    && clobber(s0)
  1457    && clobber(s1)
  1458    && clobber(or)
  1459    -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
  1460  
  1461  (OR
  1462      s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1463      or:(OR
  1464          s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1465  	y))
  1466    && i1 == i0+1
  1467    && j1 == j0-8
  1468    && j1 % 16 == 0
  1469    && x0.Uses == 1
  1470    && x1.Uses == 1
  1471    && s0.Uses == 1
  1472    && s1.Uses == 1
  1473    && or.Uses == 1
  1474    && mergePoint(b,x0,x1) != nil
  1475    && clobber(x0)
  1476    && clobber(x1)
  1477    && clobber(s0)
  1478    && clobber(s1)
  1479    && clobber(or)
  1480    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
  1481  
  1482  (OR
  1483      s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
  1484      or:(OR
  1485          s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
  1486  	y))
  1487    && i1 == i0+2
  1488    && j1 == j0-16
  1489    && j1 % 32 == 0
  1490    && x0.Uses == 1
  1491    && x1.Uses == 1
  1492    && s0.Uses == 1
  1493    && s1.Uses == 1
  1494    && or.Uses == 1
  1495    && mergePoint(b,x0,x1) != nil
  1496    && clobber(x0)
  1497    && clobber(x1)
  1498    && clobber(s0)
  1499    && clobber(s1)
  1500    && clobber(or)
  1501    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
  1502  
  1503  // Big-endian indexed loads
  1504  
  1505  (ORW                 x1:(MOVBZloadidx [i1] {s} p idx mem)
  1506      sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
  1507    && i1 == i0+1
  1508    && p.Op != OpSB
  1509    && x0.Uses == 1
  1510    && x1.Uses == 1
  1511    && sh.Uses == 1
  1512    && mergePoint(b,x0,x1) != nil
  1513    && clobber(x0)
  1514    && clobber(x1)
  1515    && clobber(sh)
  1516    -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
  1517  
  1518  (OR                  x1:(MOVBZloadidx [i1] {s} p idx mem)
  1519      sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
  1520    && i1 == i0+1
  1521    && p.Op != OpSB
  1522    && x0.Uses == 1
  1523    && x1.Uses == 1
  1524    && sh.Uses == 1
  1525    && mergePoint(b,x0,x1) != nil
  1526    && clobber(x0)
  1527    && clobber(x1)
  1528    && clobber(sh)
  1529    -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
  1530  
  1531  (ORW                  x1:(MOVHZloadidx [i1] {s} p idx mem)
  1532      sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
  1533    && i1 == i0+2
  1534    && p.Op != OpSB
  1535    && x0.Uses == 1
  1536    && x1.Uses == 1
  1537    && sh.Uses == 1
  1538    && mergePoint(b,x0,x1) != nil
  1539    && clobber(x0)
  1540    && clobber(x1)
  1541    && clobber(sh)
  1542    -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
  1543  
  1544  (OR                   x1:(MOVHZloadidx [i1] {s} p idx mem)
  1545      sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
  1546    && i1 == i0+2
  1547    && p.Op != OpSB
  1548    && x0.Uses == 1
  1549    && x1.Uses == 1
  1550    && sh.Uses == 1
  1551    && mergePoint(b,x0,x1) != nil
  1552    && clobber(x0)
  1553    && clobber(x1)
  1554    && clobber(sh)
  1555    -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
  1556  
  1557  (OR                   x1:(MOVWZloadidx [i1] {s} p idx mem)
  1558      sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
  1559    && i1 == i0+4
  1560    && p.Op != OpSB
  1561    && x0.Uses == 1
  1562    && x1.Uses == 1
  1563    && sh.Uses == 1
  1564    && mergePoint(b,x0,x1) != nil
  1565    && clobber(x0)
  1566    && clobber(x1)
  1567    && clobber(sh)
  1568    -> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
  1569  
  1570  (ORW
  1571      s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1572      or:(ORW
  1573          s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1574  	y))
  1575    && i1 == i0+1
  1576    && j1 == j0-8
  1577    && j1 % 16 == 0
  1578    && x0.Uses == 1
  1579    && x1.Uses == 1
  1580    && s0.Uses == 1
  1581    && s1.Uses == 1
  1582    && or.Uses == 1
  1583    && mergePoint(b,x0,x1) != nil
  1584    && clobber(x0)
  1585    && clobber(x1)
  1586    && clobber(s0)
  1587    && clobber(s1)
  1588    && clobber(or)
  1589    -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
  1590  
  1591  (OR
  1592      s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1593      or:(OR
  1594          s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1595  	y))
  1596    && i1 == i0+1
  1597    && j1 == j0-8
  1598    && j1 % 16 == 0
  1599    && x0.Uses == 1
  1600    && x1.Uses == 1
  1601    && s0.Uses == 1
  1602    && s1.Uses == 1
  1603    && or.Uses == 1
  1604    && mergePoint(b,x0,x1) != nil
  1605    && clobber(x0)
  1606    && clobber(x1)
  1607    && clobber(s0)
  1608    && clobber(s1)
  1609    && clobber(or)
  1610    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
  1611  
  1612  (OR
  1613      s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
  1614      or:(OR
  1615          s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
  1616  	y))
  1617    && i1 == i0+2
  1618    && j1 == j0-16
  1619    && j1 % 32 == 0
  1620    && x0.Uses == 1
  1621    && x1.Uses == 1
  1622    && s0.Uses == 1
  1623    && s1.Uses == 1
  1624    && or.Uses == 1
  1625    && mergePoint(b,x0,x1) != nil
  1626    && clobber(x0)
  1627    && clobber(x1)
  1628    && clobber(s0)
  1629    && clobber(s1)
  1630    && clobber(or)
  1631    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
  1632  
  1633  // Little-endian loads
  1634  
  1635  (ORW                 x0:(MOVBZload [i0] {s} p mem)
  1636      sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
  1637    && p.Op != OpSB
  1638    && i1 == i0+1
  1639    && x0.Uses == 1
  1640    && x1.Uses == 1
  1641    && sh.Uses == 1
  1642    && mergePoint(b,x0,x1) != nil
  1643    && clobber(x0)
  1644    && clobber(x1)
  1645    && clobber(sh)
  1646    -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
  1647  
  1648  (OR                  x0:(MOVBZload [i0] {s} p mem)
  1649      sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
  1650    && p.Op != OpSB
  1651    && i1 == i0+1
  1652    && x0.Uses == 1
  1653    && x1.Uses == 1
  1654    && sh.Uses == 1
  1655    && mergePoint(b,x0,x1) != nil
  1656    && clobber(x0)
  1657    && clobber(x1)
  1658    && clobber(sh)
  1659    -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
  1660  
  1661  (ORW                  r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
  1662      sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
  1663    && i1 == i0+2
  1664    && x0.Uses == 1
  1665    && x1.Uses == 1
  1666    && r0.Uses == 1
  1667    && r1.Uses == 1
  1668    && sh.Uses == 1
  1669    && mergePoint(b,x0,x1) != nil
  1670    && clobber(x0)
  1671    && clobber(x1)
  1672    && clobber(r0)
  1673    && clobber(r1)
  1674    && clobber(sh)
  1675    -> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
  1676  
  1677  (OR                   r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
  1678      sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
  1679    && i1 == i0+2
  1680    && x0.Uses == 1
  1681    && x1.Uses == 1
  1682    && r0.Uses == 1
  1683    && r1.Uses == 1
  1684    && sh.Uses == 1
  1685    && mergePoint(b,x0,x1) != nil
  1686    && clobber(x0)
  1687    && clobber(x1)
  1688    && clobber(r0)
  1689    && clobber(r1)
  1690    && clobber(sh)
  1691    -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
  1692  
  1693  (OR                   r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
  1694      sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
  1695    && i1 == i0+4
  1696    && x0.Uses == 1
  1697    && x1.Uses == 1
  1698    && r0.Uses == 1
  1699    && r1.Uses == 1
  1700    && sh.Uses == 1
  1701    && mergePoint(b,x0,x1) != nil
  1702    && clobber(x0)
  1703    && clobber(x1)
  1704    && clobber(r0)
  1705    && clobber(r1)
  1706    && clobber(sh)
  1707    -> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
  1708  
  1709  (ORW
  1710      s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1711      or:(ORW
  1712          s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1713  	y))
  1714    && p.Op != OpSB
  1715    && i1 == i0+1
  1716    && j1 == j0+8
  1717    && j0 % 16 == 0
  1718    && x0.Uses == 1
  1719    && x1.Uses == 1
  1720    && s0.Uses == 1
  1721    && s1.Uses == 1
  1722    && or.Uses == 1
  1723    && mergePoint(b,x0,x1) != nil
  1724    && clobber(x0)
  1725    && clobber(x1)
  1726    && clobber(s0)
  1727    && clobber(s1)
  1728    && clobber(or)
  1729    -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
  1730  
  1731  (OR
  1732      s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
  1733      or:(OR
  1734          s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
  1735  	y))
  1736    && p.Op != OpSB
  1737    && i1 == i0+1
  1738    && j1 == j0+8
  1739    && j0 % 16 == 0
  1740    && x0.Uses == 1
  1741    && x1.Uses == 1
  1742    && s0.Uses == 1
  1743    && s1.Uses == 1
  1744    && or.Uses == 1
  1745    && mergePoint(b,x0,x1) != nil
  1746    && clobber(x0)
  1747    && clobber(x1)
  1748    && clobber(s0)
  1749    && clobber(s1)
  1750    && clobber(or)
  1751    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
  1752  
  1753  (OR
  1754      s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
  1755      or:(OR
  1756          s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
  1757  	y))
  1758    && i1 == i0+2
  1759    && j1 == j0+16
  1760    && j0 % 32 == 0
  1761    && x0.Uses == 1
  1762    && x1.Uses == 1
  1763    && r0.Uses == 1
  1764    && r1.Uses == 1
  1765    && s0.Uses == 1
  1766    && s1.Uses == 1
  1767    && or.Uses == 1
  1768    && mergePoint(b,x0,x1) != nil
  1769    && clobber(x0)
  1770    && clobber(x1)
  1771    && clobber(r0)
  1772    && clobber(r1)
  1773    && clobber(s0)
  1774    && clobber(s1)
  1775    && clobber(or)
  1776    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
  1777  
  1778  // Little-endian indexed loads
  1779  
  1780  (ORW                 x0:(MOVBZloadidx [i0] {s} p idx mem)
  1781      sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
  1782    && p.Op != OpSB
  1783    && i1 == i0+1
  1784    && x0.Uses == 1
  1785    && x1.Uses == 1
  1786    && sh.Uses == 1
  1787    && mergePoint(b,x0,x1) != nil
  1788    && clobber(x0)
  1789    && clobber(x1)
  1790    && clobber(sh)
  1791    -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
  1792  
  1793  (OR                  x0:(MOVBZloadidx [i0] {s} p idx mem)
  1794      sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
  1795    && p.Op != OpSB
  1796    && i1 == i0+1
  1797    && x0.Uses == 1
  1798    && x1.Uses == 1
  1799    && sh.Uses == 1
  1800    && mergePoint(b,x0,x1) != nil
  1801    && clobber(x0)
  1802    && clobber(x1)
  1803    && clobber(sh)
  1804    -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
  1805  
  1806  (ORW                  r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
  1807      sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
  1808    && i1 == i0+2
  1809    && x0.Uses == 1
  1810    && x1.Uses == 1
  1811    && r0.Uses == 1
  1812    && r1.Uses == 1
  1813    && sh.Uses == 1
  1814    && mergePoint(b,x0,x1) != nil
  1815    && clobber(x0)
  1816    && clobber(x1)
  1817    && clobber(r0)
  1818    && clobber(r1)
  1819    && clobber(sh)
  1820    -> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
  1821  
  1822  (OR                   r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
  1823      sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
  1824    && i1 == i0+2
  1825    && x0.Uses == 1
  1826    && x1.Uses == 1
  1827    && r0.Uses == 1
  1828    && r1.Uses == 1
  1829    && sh.Uses == 1
  1830    && mergePoint(b,x0,x1) != nil
  1831    && clobber(x0)
  1832    && clobber(x1)
  1833    && clobber(r0)
  1834    && clobber(r1)
  1835    && clobber(sh)
  1836    -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
  1837  
  1838  (OR                   r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
  1839      sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
  1840    && i1 == i0+4
  1841    && x0.Uses == 1
  1842    && x1.Uses == 1
  1843    && r0.Uses == 1
  1844    && r1.Uses == 1
  1845    && sh.Uses == 1
  1846    && mergePoint(b,x0,x1) != nil
  1847    && clobber(x0)
  1848    && clobber(x1)
  1849    && clobber(r0)
  1850    && clobber(r1)
  1851    && clobber(sh)
  1852    -> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
  1853  
  1854  (ORW
  1855      s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1856      or:(ORW
  1857          s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1858  	y))
  1859    && p.Op != OpSB
  1860    && i1 == i0+1
  1861    && j1 == j0+8
  1862    && j0 % 16 == 0
  1863    && x0.Uses == 1
  1864    && x1.Uses == 1
  1865    && s0.Uses == 1
  1866    && s1.Uses == 1
  1867    && or.Uses == 1
  1868    && mergePoint(b,x0,x1) != nil
  1869    && clobber(x0)
  1870    && clobber(x1)
  1871    && clobber(s0)
  1872    && clobber(s1)
  1873    && clobber(or)
  1874    -> @mergePoint(b,x0,x1) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
  1875  
  1876  (OR
  1877      s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
  1878      or:(OR
  1879          s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
  1880  	y))
  1881    && p.Op != OpSB
  1882    && i1 == i0+1
  1883    && j1 == j0+8
  1884    && j0 % 16 == 0
  1885    && x0.Uses == 1
  1886    && x1.Uses == 1
  1887    && s0.Uses == 1
  1888    && s1.Uses == 1
  1889    && or.Uses == 1
  1890    && mergePoint(b,x0,x1) != nil
  1891    && clobber(x0)
  1892    && clobber(x1)
  1893    && clobber(s0)
  1894    && clobber(s1)
  1895    && clobber(or)
  1896    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
  1897  
  1898  (OR
  1899      s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
  1900      or:(OR
  1901          s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
  1902  	y))
  1903    && i1 == i0+2
  1904    && j1 == j0+16
  1905    && j0 % 32 == 0
  1906    && x0.Uses == 1
  1907    && x1.Uses == 1
  1908    && r0.Uses == 1
  1909    && r1.Uses == 1
  1910    && s0.Uses == 1
  1911    && s1.Uses == 1
  1912    && or.Uses == 1
  1913    && mergePoint(b,x0,x1) != nil
  1914    && clobber(x0)
  1915    && clobber(x1)
  1916    && clobber(r0)
  1917    && clobber(r1)
  1918    && clobber(s0)
  1919    && clobber(s1)
  1920    && clobber(or)
  1921    -> @mergePoint(b,x0,x1) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
  1922  
  1923  // Combine stores into store multiples.
  1924  // 32-bit
  1925  (MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
  1926    && p.Op != OpSB
  1927    && x.Uses == 1
  1928    && is20Bit(i-4)
  1929    && clobber(x)
  1930    -> (STM2 [i-4] {s} p w0 w1 mem)
  1931  (MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
  1932    && x.Uses == 1
  1933    && is20Bit(i-8)
  1934    && clobber(x)
  1935    -> (STM3 [i-8] {s} p w0 w1 w2 mem)
  1936  (MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
  1937    && x.Uses == 1
  1938    && is20Bit(i-12)
  1939    && clobber(x)
  1940    -> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
  1941  (STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
  1942    && x.Uses == 1
  1943    && is20Bit(i-8)
  1944    && clobber(x)
  1945    -> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
  1946  // 64-bit
  1947  (MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
  1948    && p.Op != OpSB
  1949    && x.Uses == 1
  1950    && is20Bit(i-8)
  1951    && clobber(x)
  1952    -> (STMG2 [i-8] {s} p w0 w1 mem)
  1953  (MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
  1954    && x.Uses == 1
  1955    && is20Bit(i-16)
  1956    && clobber(x)
  1957    -> (STMG3 [i-16] {s} p w0 w1 w2 mem)
  1958  (MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
  1959    && x.Uses == 1
  1960    && is20Bit(i-24)
  1961    && clobber(x)
  1962    -> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
  1963  (STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
  1964    && x.Uses == 1
  1965    && is20Bit(i-16)
  1966    && clobber(x)
  1967    -> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
  1968  
  1969  // Convert 32-bit store multiples into 64-bit stores.
  1970  (STM2 [i] {s} p (SRDconst [32] x) x mem) -> (MOVDstore [i] {s} p x mem)