github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/compile/internal/ssa/gen/AMD64.rules (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Lowering arithmetic 6 (Add(64|32|16|8) x y) -> (ADD(Q|L|L|L) x y) 7 (AddPtr x y) -> (ADDQ x y) 8 (Add(32|64)F x y) -> (ADDS(S|D) x y) 9 10 (Sub(64|32|16|8) x y) -> (SUB(Q|L|L|L) x y) 11 (SubPtr x y) -> (SUBQ x y) 12 (Sub(32|64)F x y) -> (SUBS(S|D) x y) 13 14 (Mul(64|32|16|8) x y) -> (MUL(Q|L|L|L) x y) 15 (Mul(32|64)F x y) -> (MULS(S|D) x y) 16 17 (Select0 (Mul64uover x y)) -> (Select0 <typ.UInt64> (MULQU x y)) 18 (Select0 (Mul32uover x y)) -> (Select0 <typ.UInt32> (MULLU x y)) 19 (Select1 (Mul(64|32)uover x y)) -> (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y))) 20 21 (Hmul(64|32) x y) -> (HMUL(Q|L) x y) 22 (Hmul(64|32)u x y) -> (HMUL(Q|L)U x y) 23 24 (Div(64|32|16) [a] x y) -> (Select0 (DIV(Q|L|W) [a] x y)) 25 (Div8 x y) -> (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 26 (Div(64|32|16)u x y) -> (Select0 (DIV(Q|L|W)U x y)) 27 (Div8u x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 28 (Div(32|64)F x y) -> (DIVS(S|D) x y) 29 30 (Select0 (Add64carry x y c)) -> 31 (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) 32 (Select1 (Add64carry x y c)) -> 33 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) 34 (Select0 (Sub64borrow x y c)) -> 35 (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) 36 (Select1 (Sub64borrow x y c)) -> 37 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) 38 39 // Optimize ADCQ and friends 40 (ADCQ x (MOVQconst [c]) carry) && is32Bit(c) -> (ADCQconst x [c] carry) 41 (ADCQ x y (FlagEQ)) -> (ADDQcarry x y) 42 (ADCQconst x [c] (FlagEQ)) -> (ADDQconstcarry x [c]) 43 (ADDQcarry x (MOVQconst [c])) && is32Bit(c) -> (ADDQconstcarry x [c]) 44 (SBBQ x (MOVQconst [c]) borrow) && is32Bit(c) -> (SBBQconst x [c] borrow) 45 (SBBQ x y (FlagEQ)) -> (SUBQborrow x y) 46 (SBBQconst x [c] (FlagEQ)) -> (SUBQconstborrow x [c]) 47 (SUBQborrow x (MOVQconst [c])) && is32Bit(c) -> (SUBQconstborrow x [c]) 48 (Select1 (NEGLflags (MOVQconst [0]))) -> (FlagEQ) 49 (Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) -> x 50 51 52 (Mul64uhilo x y) -> (MULQU2 x y) 53 (Div128u xhi xlo y) -> (DIVQU2 xhi xlo y) 54 55 (Avg64u x y) -> (AVGQU x y) 56 57 (Mod(64|32|16) [a] x y) -> (Select1 (DIV(Q|L|W) [a] x y)) 58 (Mod8 x y) -> (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 59 (Mod(64|32|16)u x y) -> (Select1 (DIV(Q|L|W)U x y)) 60 (Mod8u x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 61 62 (And(64|32|16|8) x y) -> (AND(Q|L|L|L) x y) 63 (Or(64|32|16|8) x y) -> (OR(Q|L|L|L) x y) 64 (Xor(64|32|16|8) x y) -> (XOR(Q|L|L|L) x y) 65 (Com(64|32|16|8) x) -> (NOT(Q|L|L|L) x) 66 67 (Neg(64|32|16|8) x) -> (NEG(Q|L|L|L) x) 68 (Neg32F x) -> (PXOR x (MOVSSconst <typ.Float32> [auxFrom32F(float32(math.Copysign(0, -1)))])) 69 (Neg64F x) -> (PXOR x (MOVSDconst <typ.Float64> [auxFrom64F(math.Copysign(0, -1))])) 70 71 // Lowering boolean ops 72 (AndB x y) -> (ANDL x y) 73 (OrB x y) -> (ORL x y) 74 (Not x) -> (XORLconst [1] x) 75 76 // Lowering pointer arithmetic 77 (OffPtr [off] ptr) && is32Bit(off) -> (ADDQconst [off] ptr) 78 (OffPtr [off] ptr) -> (ADDQ (MOVQconst [off]) ptr) 79 80 // Lowering other arithmetic 81 (Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x))) 82 (Ctz32 x) -> (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x))) 83 (Ctz16 x) -> (BSFL (BTSLconst <typ.UInt32> [16] x)) 84 (Ctz8 x) -> (BSFL (BTSLconst <typ.UInt32> [ 8] x)) 85 86 (Ctz64NonZero x) -> (Select0 (BSFQ x)) 87 (Ctz32NonZero x) -> (BSFL x) 88 (Ctz16NonZero x) -> (BSFL x) 89 (Ctz8NonZero x) -> (BSFL x) 90 91 // BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0. 92 // However, for zero-extended values, we can cheat a bit, and calculate 93 // BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently 94 // places the index of the highest set bit where we want it. 95 (BitLen64 <t> x) -> (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x)))) 96 (BitLen32 x) -> (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x)))) 97 (BitLen16 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x))) 98 (BitLen8 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x))) 99 100 (Bswap(64|32) x) -> (BSWAP(Q|L) x) 101 102 (PopCount64 x) -> (POPCNTQ x) 103 (PopCount32 x) -> (POPCNTL x) 104 (PopCount16 x) -> (POPCNTL (MOVWQZX <typ.UInt32> x)) 105 (PopCount8 x) -> (POPCNTL (MOVBQZX <typ.UInt32> x)) 106 107 (Sqrt x) -> (SQRTSD x) 108 109 (RoundToEven x) -> (ROUNDSD [0] x) 110 (Floor x) -> (ROUNDSD [1] x) 111 (Ceil x) -> (ROUNDSD [2] x) 112 (Trunc x) -> (ROUNDSD [3] x) 113 (FMA x y z) -> (VFMADD231SD z x y) 114 115 // Lowering extension 116 // Note: we always extend to 64 bits even though some ops don't need that many result bits. 117 (SignExt8to16 x) -> (MOVBQSX x) 118 (SignExt8to32 x) -> (MOVBQSX x) 119 (SignExt8to64 x) -> (MOVBQSX x) 120 (SignExt16to32 x) -> (MOVWQSX x) 121 (SignExt16to64 x) -> (MOVWQSX x) 122 (SignExt32to64 x) -> (MOVLQSX x) 123 124 (ZeroExt8to16 x) -> (MOVBQZX x) 125 (ZeroExt8to32 x) -> (MOVBQZX x) 126 (ZeroExt8to64 x) -> (MOVBQZX x) 127 (ZeroExt16to32 x) -> (MOVWQZX x) 128 (ZeroExt16to64 x) -> (MOVWQZX x) 129 (ZeroExt32to64 x) -> (MOVLQZX x) 130 131 (Slicemask <t> x) -> (SARQconst (NEGQ <t> x) [63]) 132 133 // Lowering truncation 134 // Because we ignore high parts of registers, truncates are just copies. 135 (Trunc16to8 x) -> x 136 (Trunc32to8 x) -> x 137 (Trunc32to16 x) -> x 138 (Trunc64to8 x) -> x 139 (Trunc64to16 x) -> x 140 (Trunc64to32 x) -> x 141 142 // Lowering float <-> int 143 (Cvt32to32F x) -> (CVTSL2SS x) 144 (Cvt32to64F x) -> (CVTSL2SD x) 145 (Cvt64to32F x) -> (CVTSQ2SS x) 146 (Cvt64to64F x) -> (CVTSQ2SD x) 147 148 (Cvt32Fto32 x) -> (CVTTSS2SL x) 149 (Cvt32Fto64 x) -> (CVTTSS2SQ x) 150 (Cvt64Fto32 x) -> (CVTTSD2SL x) 151 (Cvt64Fto64 x) -> (CVTTSD2SQ x) 152 153 (Cvt32Fto64F x) -> (CVTSS2SD x) 154 (Cvt64Fto32F x) -> (CVTSD2SS x) 155 156 (Round(32|64)F x) -> x 157 158 // Lowering shifts 159 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. 160 // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff) 161 (Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 162 (Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 163 (Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 164 (Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 165 166 (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLQ x y) 167 (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 168 (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 169 (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 170 171 (Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 172 (Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 173 (Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16]))) 174 (Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8]))) 175 176 (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRQ x y) 177 (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRL x y) 178 (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRW x y) 179 (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRB x y) 180 181 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. 182 // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. 183 (Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64]))))) 184 (Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32]))))) 185 (Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16]))))) 186 (Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8]))))) 187 188 (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARQ x y) 189 (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARL x y) 190 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARW x y) 191 (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARB x y) 192 193 // Lowering comparisons 194 (Less(64|32|16|8) x y) -> (SETL (CMP(Q|L|W|B) x y)) 195 (Less(64|32|16|8)U x y) -> (SETB (CMP(Q|L|W|B) x y)) 196 // Use SETGF with reversed operands to dodge NaN case 197 (Less(32|64)F x y) -> (SETGF (UCOMIS(S|D) y x)) 198 199 (Leq(64|32|16|8) x y) -> (SETLE (CMP(Q|L|W|B) x y)) 200 (Leq(64|32|16|8)U x y) -> (SETBE (CMP(Q|L|W|B) x y)) 201 // Use SETGEF with reversed operands to dodge NaN case 202 (Leq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) y x)) 203 204 (Greater(64|32|16|8) x y) -> (SETG (CMP(Q|L|W|B) x y)) 205 (Greater(64|32|16|8)U x y) -> (SETA (CMP(Q|L|W|B) x y)) 206 // Note Go assembler gets UCOMISx operand order wrong, but it is right here 207 // Bug is accommodated at generation of assembly language. 208 (Greater(32|64)F x y) -> (SETGF (UCOMIS(S|D) x y)) 209 210 (Geq(64|32|16|8) x y) -> (SETGE (CMP(Q|L|W|B) x y)) 211 (Geq(64|32|16|8)U x y) -> (SETAE (CMP(Q|L|W|B) x y)) 212 // Note Go assembler gets UCOMISx operand order wrong, but it is right here 213 // Bug is accommodated at generation of assembly language. 214 (Geq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) x y)) 215 216 (Eq(64|32|16|8|B) x y) -> (SETEQ (CMP(Q|L|W|B|B) x y)) 217 (EqPtr x y) -> (SETEQ (CMPQ x y)) 218 (Eq(32|64)F x y) -> (SETEQF (UCOMIS(S|D) x y)) 219 220 (Neq(64|32|16|8|B) x y) -> (SETNE (CMP(Q|L|W|B|B) x y)) 221 (NeqPtr x y) -> (SETNE (CMPQ x y)) 222 (Neq(32|64)F x y) -> (SETNEF (UCOMIS(S|D) x y)) 223 224 // Lowering loads 225 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem) 226 (Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem) 227 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem) 228 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem) 229 (Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem) 230 (Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem) 231 232 // Lowering stores 233 // These more-specific FP versions of Store pattern should come first. 234 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem) 235 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem) 236 237 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVQstore ptr val mem) 238 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVLstore ptr val mem) 239 (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVWstore ptr val mem) 240 (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem) 241 242 // Lowering moves 243 (Move [0] _ _ mem) -> mem 244 (Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem) 245 (Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem) 246 (Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem) 247 (Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem) 248 (Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem) 249 (Move [16] dst src mem) && !config.useSSE -> 250 (MOVQstore [8] dst (MOVQload [8] src mem) 251 (MOVQstore dst (MOVQload src mem) mem)) 252 253 (Move [32] dst src mem) -> 254 (Move [16] 255 (OffPtr <dst.Type> dst [16]) 256 (OffPtr <src.Type> src [16]) 257 (Move [16] dst src mem)) 258 259 (Move [48] dst src mem) && config.useSSE -> 260 (Move [32] 261 (OffPtr <dst.Type> dst [16]) 262 (OffPtr <src.Type> src [16]) 263 (Move [16] dst src mem)) 264 265 (Move [64] dst src mem) && config.useSSE -> 266 (Move [32] 267 (OffPtr <dst.Type> dst [32]) 268 (OffPtr <src.Type> src [32]) 269 (Move [32] dst src mem)) 270 271 (Move [3] dst src mem) -> 272 (MOVBstore [2] dst (MOVBload [2] src mem) 273 (MOVWstore dst (MOVWload src mem) mem)) 274 (Move [5] dst src mem) -> 275 (MOVBstore [4] dst (MOVBload [4] src mem) 276 (MOVLstore dst (MOVLload src mem) mem)) 277 (Move [6] dst src mem) -> 278 (MOVWstore [4] dst (MOVWload [4] src mem) 279 (MOVLstore dst (MOVLload src mem) mem)) 280 (Move [7] dst src mem) -> 281 (MOVLstore [3] dst (MOVLload [3] src mem) 282 (MOVLstore dst (MOVLload src mem) mem)) 283 (Move [9] dst src mem) -> 284 (MOVBstore [8] dst (MOVBload [8] src mem) 285 (MOVQstore dst (MOVQload src mem) mem)) 286 (Move [10] dst src mem) -> 287 (MOVWstore [8] dst (MOVWload [8] src mem) 288 (MOVQstore dst (MOVQload src mem) mem)) 289 (Move [12] dst src mem) -> 290 (MOVLstore [8] dst (MOVLload [8] src mem) 291 (MOVQstore dst (MOVQload src mem) mem)) 292 (Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 -> 293 (MOVQstore [s-8] dst (MOVQload [s-8] src mem) 294 (MOVQstore dst (MOVQload src mem) mem)) 295 296 // Adjust moves to be a multiple of 16 bytes. 297 (Move [s] dst src mem) 298 && s > 16 && s%16 != 0 && s%16 <= 8 -> 299 (Move [s-s%16] 300 (OffPtr <dst.Type> dst [s%16]) 301 (OffPtr <src.Type> src [s%16]) 302 (MOVQstore dst (MOVQload src mem) mem)) 303 (Move [s] dst src mem) 304 && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE -> 305 (Move [s-s%16] 306 (OffPtr <dst.Type> dst [s%16]) 307 (OffPtr <src.Type> src [s%16]) 308 (MOVOstore dst (MOVOload src mem) mem)) 309 (Move [s] dst src mem) 310 && s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE -> 311 (Move [s-s%16] 312 (OffPtr <dst.Type> dst [s%16]) 313 (OffPtr <src.Type> src [s%16]) 314 (MOVQstore [8] dst (MOVQload [8] src mem) 315 (MOVQstore dst (MOVQload src mem) mem))) 316 317 // Medium copying uses a duff device. 318 (Move [s] dst src mem) 319 && s > 64 && s <= 16*64 && s%16 == 0 320 && !config.noDuffDevice -> 321 (DUFFCOPY [14*(64-s/16)] dst src mem) 322 // 14 and 64 are magic constants. 14 is the number of bytes to encode: 323 // MOVUPS (SI), X0 324 // ADDQ $16, SI 325 // MOVUPS X0, (DI) 326 // ADDQ $16, DI 327 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. 328 329 // Large copying uses REP MOVSQ. 330 (Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 -> 331 (REPMOVSQ dst src (MOVQconst [s/8]) mem) 332 333 // Lowering Zero instructions 334 (Zero [0] _ mem) -> mem 335 (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem) 336 (Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem) 337 (Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem) 338 (Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem) 339 340 (Zero [3] destptr mem) -> 341 (MOVBstoreconst [makeValAndOff(0,2)] destptr 342 (MOVWstoreconst [0] destptr mem)) 343 (Zero [5] destptr mem) -> 344 (MOVBstoreconst [makeValAndOff(0,4)] destptr 345 (MOVLstoreconst [0] destptr mem)) 346 (Zero [6] destptr mem) -> 347 (MOVWstoreconst [makeValAndOff(0,4)] destptr 348 (MOVLstoreconst [0] destptr mem)) 349 (Zero [7] destptr mem) -> 350 (MOVLstoreconst [makeValAndOff(0,3)] destptr 351 (MOVLstoreconst [0] destptr mem)) 352 353 // Strip off any fractional word zeroing. 354 (Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE -> 355 (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) 356 (MOVQstoreconst [0] destptr mem)) 357 358 // Zero small numbers of words directly. 359 (Zero [16] destptr mem) && !config.useSSE -> 360 (MOVQstoreconst [makeValAndOff(0,8)] destptr 361 (MOVQstoreconst [0] destptr mem)) 362 (Zero [24] destptr mem) && !config.useSSE -> 363 (MOVQstoreconst [makeValAndOff(0,16)] destptr 364 (MOVQstoreconst [makeValAndOff(0,8)] destptr 365 (MOVQstoreconst [0] destptr mem))) 366 (Zero [32] destptr mem) && !config.useSSE -> 367 (MOVQstoreconst [makeValAndOff(0,24)] destptr 368 (MOVQstoreconst [makeValAndOff(0,16)] destptr 369 (MOVQstoreconst [makeValAndOff(0,8)] destptr 370 (MOVQstoreconst [0] destptr mem)))) 371 372 (Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE -> 373 (MOVQstoreconst [makeValAndOff(0,s-8)] destptr 374 (MOVQstoreconst [0] destptr mem)) 375 376 // Adjust zeros to be a multiple of 16 bytes. 377 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE -> 378 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 379 (MOVOstore destptr (MOVOconst [0]) mem)) 380 381 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE -> 382 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 383 (MOVQstoreconst [0] destptr mem)) 384 385 (Zero [16] destptr mem) && config.useSSE -> 386 (MOVOstore destptr (MOVOconst [0]) mem) 387 (Zero [32] destptr mem) && config.useSSE -> 388 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 389 (MOVOstore destptr (MOVOconst [0]) mem)) 390 (Zero [48] destptr mem) && config.useSSE -> 391 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) 392 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 393 (MOVOstore destptr (MOVOconst [0]) mem))) 394 (Zero [64] destptr mem) && config.useSSE -> 395 (MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0]) 396 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) 397 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 398 (MOVOstore destptr (MOVOconst [0]) mem)))) 399 400 // Medium zeroing uses a duff device. 401 (Zero [s] destptr mem) 402 && s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice -> 403 (DUFFZERO [s] destptr (MOVOconst [0]) mem) 404 405 // Large zeroing uses REP STOSQ. 406 (Zero [s] destptr mem) 407 && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) 408 && s%8 == 0 -> 409 (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) 410 411 // Lowering constants 412 (Const8 [val]) -> (MOVLconst [val]) 413 (Const16 [val]) -> (MOVLconst [val]) 414 (Const32 [val]) -> (MOVLconst [val]) 415 (Const64 [val]) -> (MOVQconst [val]) 416 (Const32F [val]) -> (MOVSSconst [val]) 417 (Const64F [val]) -> (MOVSDconst [val]) 418 (ConstNil) -> (MOVQconst [0]) 419 (ConstBool [b]) -> (MOVLconst [b]) 420 421 // Lowering calls 422 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem) 423 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem) 424 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem) 425 426 // Lowering conditional moves 427 // If the condition is a SETxx, we can just run a CMOV from the comparison that was 428 // setting the flags. 429 // Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL 430 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t)) 431 -> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 432 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t) 433 -> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 434 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t) 435 -> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 436 437 // If the condition does not set the flags, we need to generate a comparison. 438 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 439 -> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check)) 440 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 441 -> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check)) 442 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 443 -> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check)) 444 445 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) 446 -> (CMOVQNE y x (CMPQconst [0] check)) 447 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) 448 -> (CMOVLNE y x (CMPQconst [0] check)) 449 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) 450 -> (CMOVWNE y x (CMPQconst [0] check)) 451 452 // Absorb InvertFlags 453 (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 454 -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 455 (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 456 -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 457 (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 458 -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 459 460 // Absorb constants generated during lower 461 (CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x 462 (CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y 463 (CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x 464 (CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y 465 (CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x 466 (CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y 467 (CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x 468 (CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y 469 (CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x 470 (CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y 471 472 // Miscellaneous 473 (IsNonNil p) -> (SETNE (TESTQ p p)) 474 (IsInBounds idx len) -> (SETB (CMPQ idx len)) 475 (IsSliceInBounds idx len) -> (SETBE (CMPQ idx len)) 476 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem) 477 (GetG mem) -> (LoweredGetG mem) 478 (GetClosurePtr) -> (LoweredGetClosurePtr) 479 (GetCallerPC) -> (LoweredGetCallerPC) 480 (GetCallerSP) -> (LoweredGetCallerSP) 481 (Addr {sym} base) -> (LEAQ {sym} base) 482 (LocalAddr {sym} base _) -> (LEAQ {sym} base) 483 484 (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 -> (SETLstore [off] {sym} ptr x mem) 485 (MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 -> (SETLEstore [off] {sym} ptr x mem) 486 (MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 -> (SETGstore [off] {sym} ptr x mem) 487 (MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 -> (SETGEstore [off] {sym} ptr x mem) 488 (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 -> (SETEQstore [off] {sym} ptr x mem) 489 (MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 -> (SETNEstore [off] {sym} ptr x mem) 490 (MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 -> (SETBstore [off] {sym} ptr x mem) 491 (MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 -> (SETBEstore [off] {sym} ptr x mem) 492 (MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 -> (SETAstore [off] {sym} ptr x mem) 493 (MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 -> (SETAEstore [off] {sym} ptr x mem) 494 495 // block rewrites 496 (If (SETL cmp) yes no) -> (LT cmp yes no) 497 (If (SETLE cmp) yes no) -> (LE cmp yes no) 498 (If (SETG cmp) yes no) -> (GT cmp yes no) 499 (If (SETGE cmp) yes no) -> (GE cmp yes no) 500 (If (SETEQ cmp) yes no) -> (EQ cmp yes no) 501 (If (SETNE cmp) yes no) -> (NE cmp yes no) 502 (If (SETB cmp) yes no) -> (ULT cmp yes no) 503 (If (SETBE cmp) yes no) -> (ULE cmp yes no) 504 (If (SETA cmp) yes no) -> (UGT cmp yes no) 505 (If (SETAE cmp) yes no) -> (UGE cmp yes no) 506 (If (SETO cmp) yes no) -> (OS cmp yes no) 507 508 // Special case for floating point - LF/LEF not generated 509 (If (SETGF cmp) yes no) -> (UGT cmp yes no) 510 (If (SETGEF cmp) yes no) -> (UGE cmp yes no) 511 (If (SETEQF cmp) yes no) -> (EQF cmp yes no) 512 (If (SETNEF cmp) yes no) -> (NEF cmp yes no) 513 514 (If cond yes no) -> (NE (TESTB cond cond) yes no) 515 516 // Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here. 517 (AtomicLoad8 ptr mem) -> (MOVBatomicload ptr mem) 518 (AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem) 519 (AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem) 520 (AtomicLoadPtr ptr mem) -> (MOVQatomicload ptr mem) 521 522 // Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load. 523 // TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those? 524 (AtomicStore8 ptr val mem) -> (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem)) 525 (AtomicStore32 ptr val mem) -> (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem)) 526 (AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem)) 527 (AtomicStorePtrNoWB ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem)) 528 529 // Atomic exchanges. 530 (AtomicExchange32 ptr val mem) -> (XCHGL val ptr mem) 531 (AtomicExchange64 ptr val mem) -> (XCHGQ val ptr mem) 532 533 // Atomic adds. 534 (AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (XADDLlock val ptr mem)) 535 (AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (XADDQlock val ptr mem)) 536 (Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDL val (Select0 <t> tuple)) 537 (Select1 (AddTupleFirst32 _ tuple)) -> (Select1 tuple) 538 (Select0 <t> (AddTupleFirst64 val tuple)) -> (ADDQ val (Select0 <t> tuple)) 539 (Select1 (AddTupleFirst64 _ tuple)) -> (Select1 tuple) 540 541 // Atomic compare and swap. 542 (AtomicCompareAndSwap32 ptr old new_ mem) -> (CMPXCHGLlock ptr old new_ mem) 543 (AtomicCompareAndSwap64 ptr old new_ mem) -> (CMPXCHGQlock ptr old new_ mem) 544 545 // Atomic memory updates. 546 (AtomicAnd8 ptr val mem) -> (ANDBlock ptr val mem) 547 (AtomicOr8 ptr val mem) -> (ORBlock ptr val mem) 548 549 // Write barrier. 550 (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem) 551 552 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem) 553 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem) 554 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem) 555 556 // *************************** 557 // Above: lowering rules 558 // Below: optimizations 559 // *************************** 560 // TODO: Should the optimizations be a separate pass? 561 562 // Fold boolean tests into blocks 563 (NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no) 564 (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no) 565 (NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no) 566 (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no) 567 (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no) 568 (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no) 569 (NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no) 570 (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no) 571 (NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no) 572 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no) 573 (NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no) 574 575 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded 576 // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag 577 // into tests for carry flags. 578 // ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis 579 // mutandis, for UGE and SETAE, and CC and SETCC. 580 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) -> ((ULT|UGE) (BTL x y)) 581 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) -> ((ULT|UGE) (BTQ x y)) 582 ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) 583 -> ((ULT|UGE) (BTLconst [log2uint32(c)] x)) 584 ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) 585 -> ((ULT|UGE) (BTQconst [log2(c)] x)) 586 ((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) 587 -> ((ULT|UGE) (BTQconst [log2(c)] x)) 588 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) -> (SET(B|AE) (BTL x y)) 589 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) -> (SET(B|AE) (BTQ x y)) 590 (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) 591 -> (SET(B|AE) (BTLconst [log2uint32(c)] x)) 592 (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) 593 -> (SET(B|AE) (BTQconst [log2(c)] x)) 594 (SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) 595 -> (SET(B|AE) (BTQconst [log2(c)] x)) 596 // SET..store variant 597 (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) 598 -> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) 599 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) 600 -> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) 601 (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(c) 602 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem) 603 (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(c) 604 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem) 605 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) 606 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem) 607 608 // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules 609 // and further combining shifts. 610 (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x) 611 (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d -> (BT(Q|L)const [c-d] x) 612 (BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x) 613 (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x) 614 (BTLconst [c] (SHLLconst [d] x)) && c>d -> (BTLconst [c-d] x) 615 (BTLconst [0] s:(SHRL x y)) -> (BTL y x) 616 617 // Rewrite a & 1 != 1 into a & 1 == 0. 618 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test. 619 (SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) -> (SET(EQ|NE) (CMPLconst [0] s)) 620 (SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem) 621 (SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) -> (SET(EQ|NE) (CMPQconst [0] s)) 622 (SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem) 623 624 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b) 625 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) -> (BTS(Q|L) x y) 626 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) -> (BTC(Q|L) x y) 627 628 // Convert ORconst into BTS, if the code gets smaller, with boundary being 629 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). 630 ((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 631 -> (BT(S|C)Qconst [log2(c)] x) 632 ((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 633 -> (BT(S|C)Lconst [log2uint32(c)] x) 634 ((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 635 -> (BT(S|C)Qconst [log2(c)] x) 636 ((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 637 -> (BT(S|C)Lconst [log2uint32(c)] x) 638 639 // Recognize bit clearing: a &^= 1<<b 640 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) -> (BTR(Q|L) x y) 641 (ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 642 -> (BTRQconst [log2(^c)] x) 643 (ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 644 -> (BTRLconst [log2uint32(^c)] x) 645 (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 646 -> (BTRQconst [log2(^c)] x) 647 (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 648 -> (BTRLconst [log2uint32(^c)] x) 649 650 // Special-case bit patterns on first/last bit. 651 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, 652 // for instance: 653 // x & 0xFFFF0000 -> (x >> 16) << 16 654 // x & 0x80000000 -> (x >> 31) << 31 655 // 656 // In case the mask is just one bit (like second example above), it conflicts 657 // with the above rules to detect bit-testing / bit-clearing of first/last bit. 658 // We thus special-case them, by detecting the shift patterns. 659 660 // Special case resetting first/last bit 661 (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) 662 -> (BTR(L|Q)const [0] x) 663 (SHRLconst [1] (SHLLconst [1] x)) 664 -> (BTRLconst [31] x) 665 (SHRQconst [1] (SHLQconst [1] x)) 666 -> (BTRQconst [63] x) 667 668 // Special case testing first/last bit (with double-shift generated by generic.rules) 669 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 670 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 671 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 672 -> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x)) 673 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 674 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 675 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 676 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 677 678 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 679 -> ((SETB|SETAE|ULT|UGE) (BTQconst [0] x)) 680 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 681 -> ((SETB|SETAE|ULT|UGE) (BTLconst [0] x)) 682 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 683 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem) 684 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 685 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem) 686 687 // Special-case manually testing last bit with "a>>63 != 0" (without "&1") 688 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 689 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 690 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 691 -> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x)) 692 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 693 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 694 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 695 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 696 697 // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) 698 (BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x) 699 (BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x) 700 (BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x) 701 (BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x) 702 703 // Fold boolean negation into SETcc. 704 (XORLconst [1] (SETNE x)) -> (SETEQ x) 705 (XORLconst [1] (SETEQ x)) -> (SETNE x) 706 (XORLconst [1] (SETL x)) -> (SETGE x) 707 (XORLconst [1] (SETGE x)) -> (SETL x) 708 (XORLconst [1] (SETLE x)) -> (SETG x) 709 (XORLconst [1] (SETG x)) -> (SETLE x) 710 (XORLconst [1] (SETB x)) -> (SETAE x) 711 (XORLconst [1] (SETAE x)) -> (SETB x) 712 (XORLconst [1] (SETBE x)) -> (SETA x) 713 (XORLconst [1] (SETA x)) -> (SETBE x) 714 715 // Special case for floating point - LF/LEF not generated 716 (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no) 717 (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no) 718 (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no) 719 (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no) 720 721 // Disabled because it interferes with the pattern match above and makes worse code. 722 // (SETNEF x) -> (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x)) 723 // (SETEQF x) -> (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x)) 724 725 // fold constants into instructions 726 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x) 727 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x) 728 729 (SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c]) 730 (SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c])) 731 (SUBL x (MOVLconst [c])) -> (SUBLconst x [c]) 732 (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c])) 733 734 (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x) 735 (MULL x (MOVLconst [c])) -> (MULLconst [c] x) 736 737 (ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x) 738 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) 739 740 (AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x) 741 (BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ (1<<uint32(c))] x) 742 (AND(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [c &^ (1<<uint32(d))] x) 743 (BTR(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [^(1<<uint32(c) | 1<<uint32(d))] x) 744 (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x) 745 (BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1<<uint32(c)] x) 746 (XOR(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ 1<<uint32(d)] x) 747 (BTC(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [1<<uint32(c) ^ 1<<uint32(d)] x) 748 (OR(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [c | d] x) 749 (OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1<<uint32(d)] x) 750 (BTS(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [d | 1<<uint32(c)] x) 751 (BTS(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [1<<uint32(d) | 1<<uint32(c)] x) 752 753 (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x) 754 (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x) 755 756 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x) 757 (ORL x (MOVLconst [c])) -> (ORLconst [c] x) 758 759 (XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x) 760 (XORL x (MOVLconst [c])) -> (XORLconst [c] x) 761 762 (SHLQ x (MOV(Q|L)const [c])) -> (SHLQconst [c&63] x) 763 (SHLL x (MOV(Q|L)const [c])) -> (SHLLconst [c&31] x) 764 765 (SHRQ x (MOV(Q|L)const [c])) -> (SHRQconst [c&63] x) 766 (SHRL x (MOV(Q|L)const [c])) -> (SHRLconst [c&31] x) 767 (SHRW x (MOV(Q|L)const [c])) && c&31 < 16 -> (SHRWconst [c&31] x) 768 (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 -> (MOVLconst [0]) 769 (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 -> (SHRBconst [c&31] x) 770 (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 -> (MOVLconst [0]) 771 772 (SARQ x (MOV(Q|L)const [c])) -> (SARQconst [c&63] x) 773 (SARL x (MOV(Q|L)const [c])) -> (SARLconst [c&31] x) 774 (SARW x (MOV(Q|L)const [c])) -> (SARWconst [min(c&31,15)] x) 775 (SARB x (MOV(Q|L)const [c])) -> (SARBconst [min(c&31,7)] x) 776 777 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs. 778 ((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y) 779 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 780 ((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y) 781 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 782 783 ((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y) 784 ((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 785 ((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y) 786 ((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 787 788 ((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y) 789 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 790 ((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y) 791 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 792 793 ((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y) 794 ((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGL <t> y)) 795 ((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y) 796 ((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGL <t> y)) 797 798 // Constant rotate instructions 799 ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c]) 800 ((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c]) 801 802 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c]) 803 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c && c < 8 && t.Size() == 1 -> (ROLBconst x [c]) 804 805 (ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x) 806 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x) 807 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x) 808 (ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x) 809 810 (RotateLeft8 a b) -> (ROLB a b) 811 (RotateLeft16 a b) -> (ROLW a b) 812 (RotateLeft32 a b) -> (ROLL a b) 813 (RotateLeft64 a b) -> (ROLQ a b) 814 815 // Non-constant rotates. 816 // We want to issue a rotate when the Go source contains code like 817 // y &= 63 818 // x << y | x >> (64-y) 819 // The shift rules above convert << to SHLx and >> to SHRx. 820 // SHRx converts its shift argument from 64-y to -y. 821 // A tricky situation occurs when y==0. Then the original code would be: 822 // x << 0 | x >> 64 823 // But x >> 64 is 0, not x. So there's an additional mask that is ANDed in 824 // to force the second term to 0. We don't need that mask, but we must match 825 // it in order to strip it out. 826 (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (ROLQ x y) 827 (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (RORQ x y) 828 829 (ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (ROLL x y) 830 (ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (RORL x y) 831 832 // Help with rotate detection 833 (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) -> (FlagLT_ULT) 834 (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32]) -> (FlagLT_ULT) 835 836 (ORL (SHLL x (AND(Q|L)const y [15])) 837 (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))) 838 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16])))) 839 && v.Type.Size() == 2 840 -> (ROLW x y) 841 (ORL (SHRW x (AND(Q|L)const y [15])) 842 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))) 843 && v.Type.Size() == 2 844 -> (RORW x y) 845 846 (ORL (SHLL x (AND(Q|L)const y [ 7])) 847 (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))) 848 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8])))) 849 && v.Type.Size() == 1 850 -> (ROLB x y) 851 (ORL (SHRB x (AND(Q|L)const y [ 7])) 852 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))) 853 && v.Type.Size() == 1 854 -> (RORB x y) 855 856 // rotate left negative = rotate right 857 (ROLQ x (NEG(Q|L) y)) -> (RORQ x y) 858 (ROLL x (NEG(Q|L) y)) -> (RORL x y) 859 (ROLW x (NEG(Q|L) y)) -> (RORW x y) 860 (ROLB x (NEG(Q|L) y)) -> (RORB x y) 861 862 // rotate right negative = rotate left 863 (RORQ x (NEG(Q|L) y)) -> (ROLQ x y) 864 (RORL x (NEG(Q|L) y)) -> (ROLL x y) 865 (RORW x (NEG(Q|L) y)) -> (ROLW x y) 866 (RORB x (NEG(Q|L) y)) -> (ROLB x y) 867 868 // rotate by constants 869 (ROLQ x (MOV(Q|L)const [c])) -> (ROLQconst [c&63] x) 870 (ROLL x (MOV(Q|L)const [c])) -> (ROLLconst [c&31] x) 871 (ROLW x (MOV(Q|L)const [c])) -> (ROLWconst [c&15] x) 872 (ROLB x (MOV(Q|L)const [c])) -> (ROLBconst [c&7 ] x) 873 874 (RORQ x (MOV(Q|L)const [c])) -> (ROLQconst [(-c)&63] x) 875 (RORL x (MOV(Q|L)const [c])) -> (ROLLconst [(-c)&31] x) 876 (RORW x (MOV(Q|L)const [c])) -> (ROLWconst [(-c)&15] x) 877 (RORB x (MOV(Q|L)const [c])) -> (ROLBconst [(-c)&7 ] x) 878 879 // Constant shift simplifications 880 ((SHLQ|SHRQ|SARQ)const x [0]) -> x 881 ((SHLL|SHRL|SARL)const x [0]) -> x 882 ((SHRW|SARW)const x [0]) -> x 883 ((SHRB|SARB)const x [0]) -> x 884 ((ROLQ|ROLL|ROLW|ROLB)const x [0]) -> x 885 886 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) 887 // because the x86 instructions are defined to use all 5 bits of the shift even 888 // for the small shifts. I don't think we'll ever generate a weird shift (e.g. 889 // (SHRW x (MOVLconst [24])), but just in case. 890 891 (CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c]) 892 (CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c])) 893 (CMPL x (MOVLconst [c])) -> (CMPLconst x [c]) 894 (CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c])) 895 (CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))]) 896 (CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))])) 897 (CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))]) 898 (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))])) 899 900 // Using MOVZX instead of AND is cheaper. 901 (AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x) 902 (AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x) 903 (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x) 904 905 // strength reduction 906 // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: 907 // 1 - addq, shlq, leaq, negq, subq 908 // 3 - imulq 909 // This limits the rewrites to two instructions. 910 // Note that negq always operates in-place, 911 // which can require a register-register move 912 // to preserve the original value, 913 // so it must be used with care. 914 (MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x)) 915 (MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x)) 916 (MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x)) 917 (MUL(Q|L)const [-1] x) -> (NEG(Q|L) x) 918 (MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0]) 919 (MUL(Q|L)const [ 1] x) -> x 920 (MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x) 921 (MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x) 922 (MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x)) 923 (MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x) 924 (MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x)) 925 (MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x)) 926 (MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x)) 927 (MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x)) 928 (MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x)) 929 (MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x)) 930 (MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x)) 931 (MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x)) 932 (MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x)) 933 (MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) 934 (MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) 935 936 (MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x) 937 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x) 938 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x) 939 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x) 940 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x) 941 (MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x)) 942 (MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x)) 943 (MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x)) 944 945 // combine add/shift into LEAQ/LEAL 946 (ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y) 947 (ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y) 948 (ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y) 949 (ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y) 950 (ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x) 951 952 // combine ADDQ/ADDQconst into LEAQ1/LEAL1 953 (ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y) 954 (ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y) 955 (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x) 956 957 // fold ADDQ/ADDL into LEAQ/LEAL 958 (ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) 959 (LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) 960 (LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) 961 (ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) 962 963 // fold ADDQconst/ADDLconst into LEAQx/LEALx 964 (ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y) 965 (ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y) 966 (ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y) 967 (ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y) 968 (LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y) 969 (LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y) 970 (LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y) 971 (LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y) 972 (LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y) 973 (LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y) 974 (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y) 975 976 // fold shifts into LEAQx/LEALx 977 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y) 978 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y) 979 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y) 980 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y) 981 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y) 982 (LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y) 983 984 // reverse ordering of compare instruction 985 (SETL (InvertFlags x)) -> (SETG x) 986 (SETG (InvertFlags x)) -> (SETL x) 987 (SETB (InvertFlags x)) -> (SETA x) 988 (SETA (InvertFlags x)) -> (SETB x) 989 (SETLE (InvertFlags x)) -> (SETGE x) 990 (SETGE (InvertFlags x)) -> (SETLE x) 991 (SETBE (InvertFlags x)) -> (SETAE x) 992 (SETAE (InvertFlags x)) -> (SETBE x) 993 (SETEQ (InvertFlags x)) -> (SETEQ x) 994 (SETNE (InvertFlags x)) -> (SETNE x) 995 996 (SETLstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGstore [off] {sym} ptr x mem) 997 (SETGstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLstore [off] {sym} ptr x mem) 998 (SETBstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAstore [off] {sym} ptr x mem) 999 (SETAstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBstore [off] {sym} ptr x mem) 1000 (SETLEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGEstore [off] {sym} ptr x mem) 1001 (SETGEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLEstore [off] {sym} ptr x mem) 1002 (SETBEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAEstore [off] {sym} ptr x mem) 1003 (SETAEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBEstore [off] {sym} ptr x mem) 1004 (SETEQstore [off] {sym} ptr (InvertFlags x) mem) -> (SETEQstore [off] {sym} ptr x mem) 1005 (SETNEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETNEstore [off] {sym} ptr x mem) 1006 1007 // sign extended loads 1008 // Note: The combined instruction must end up in the same block 1009 // as the original load. If not, we end up making a value with 1010 // memory type live in two different blocks, which can lead to 1011 // multiple memory values alive simultaneously. 1012 // Make sure we don't combine these ops if the load has another use. 1013 // This prevents a single load from being split into multiple loads 1014 // which then might return different values. See test/atomicload.go. 1015 (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1016 (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1017 (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1018 (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1019 (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1020 (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1021 (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1022 (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1023 (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1024 (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1025 (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1026 (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1027 (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1028 (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1029 (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1030 (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1031 (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1032 (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1033 1034 (MOVLQZX x) && zeroUpper32Bits(x,3) -> x 1035 (MOVWQZX x) && zeroUpper48Bits(x,3) -> x 1036 (MOVBQZX x) && zeroUpper56Bits(x,3) -> x 1037 1038 (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem) 1039 (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem) 1040 (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem) 1041 (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem) 1042 (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem) 1043 1044 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) 1045 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x) 1046 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x) 1047 (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQZX x) 1048 (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x 1049 (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQSX x) 1050 (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQSX x) 1051 (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQSX x) 1052 1053 // Fold extensions and ANDs together. 1054 (MOVBQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x) 1055 (MOVWQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x) 1056 (MOVLQZX (ANDLconst [c] x)) -> (ANDLconst [c] x) 1057 (MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x) 1058 (MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x) 1059 (MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDLconst [c & 0x7fffffff] x) 1060 1061 // Don't extend before storing 1062 (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem) 1063 (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) -> (MOVWstore [off] {sym} ptr x mem) 1064 (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) -> (MOVBstore [off] {sym} ptr x mem) 1065 (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) -> (MOVLstore [off] {sym} ptr x mem) 1066 (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) -> (MOVWstore [off] {sym} ptr x mem) 1067 (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) -> (MOVBstore [off] {sym} ptr x mem) 1068 1069 // fold constants into memory operations 1070 // Note that this is not always a good idea because if not all the uses of 1071 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now 1072 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. 1073 // Nevertheless, let's do it! 1074 (MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 1075 (MOV(Q|L|W|B|SS|SD|O)load [off1+off2] {sym} ptr mem) 1076 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(off1+off2) -> 1077 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {sym} ptr val mem) 1078 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1079 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem) 1080 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1081 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem) 1082 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1083 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) 1084 (CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1085 (CMP(Q|L|W|B)load [off1+off2] {sym} base val mem) 1086 (CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1087 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {sym} base mem) 1088 1089 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1090 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) 1091 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1092 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) 1093 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1094 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) 1095 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1096 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) 1097 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1098 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem) 1099 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1100 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem) 1101 1102 // Fold constants into stores. 1103 (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> 1104 (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) 1105 (MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1106 (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) 1107 (MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1108 (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) 1109 (MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1110 (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) 1111 1112 // Fold address offsets into constant stores. 1113 (MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 1114 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem) 1115 1116 // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows 1117 // what variables are being read/written by the ops. 1118 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem) 1119 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1120 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem) 1121 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1122 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1123 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1124 (MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 1125 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 1126 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1127 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1128 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1129 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1130 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1131 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1132 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1133 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1134 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1135 (CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1136 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1137 (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1138 (CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1139 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1140 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1141 1142 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1143 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1144 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1145 ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1146 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1147 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1148 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1149 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1150 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1151 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1152 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1153 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1154 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1155 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1156 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1157 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1158 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1159 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1160 1161 // generating indexed loads and stores 1162 (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1163 (MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1164 (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1165 (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1166 (MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1167 (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1168 (MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1169 (MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1170 1171 (MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1172 (MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1173 (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1174 (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1175 (MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1176 (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1177 (MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1178 (MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1179 1180 (MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB -> 1181 (MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem) 1182 (MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB -> 1183 (MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem) 1184 1185 (MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1186 (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1187 (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1188 (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1189 (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1190 (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1191 (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1192 (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1193 1194 (MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem) 1195 1196 // combine SHLQ into indexed loads and stores 1197 (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem) 1198 (MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem) 1199 (MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem) 1200 1201 (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem) 1202 (MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem) 1203 (MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem) 1204 (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem) 1205 (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem) 1206 (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem) 1207 1208 // combine ADDQ into pointer of indexed loads and stores 1209 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) 1210 (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem) 1211 (MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem) 1212 (MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem) 1213 1214 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) 1215 (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem) 1216 (MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem) 1217 (MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem) 1218 1219 1220 // combine ADDQ into index of indexed loads and stores 1221 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) 1222 (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem) 1223 (MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem) 1224 (MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem) 1225 1226 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) 1227 (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem) 1228 (MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem) 1229 (MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem) 1230 1231 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1232 (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1233 (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1234 (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1235 1236 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1237 (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) 1238 (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) 1239 (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem) 1240 1241 // fold LEAQs together 1242 (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1243 (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) 1244 1245 // LEAQ into LEAQ1 1246 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1247 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1248 1249 // LEAQ1 into LEAQ 1250 (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1251 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1252 1253 // LEAQ into LEAQ[248] 1254 (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1255 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1256 (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1257 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1258 (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1259 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1260 1261 // LEAQ[248] into LEAQ 1262 (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1263 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1264 (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1265 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1266 (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1267 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1268 1269 // Absorb InvertFlags into branches. 1270 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no) 1271 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no) 1272 (LE (InvertFlags cmp) yes no) -> (GE cmp yes no) 1273 (GE (InvertFlags cmp) yes no) -> (LE cmp yes no) 1274 (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no) 1275 (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no) 1276 (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no) 1277 (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no) 1278 (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no) 1279 (NE (InvertFlags cmp) yes no) -> (NE cmp yes no) 1280 1281 // Constant comparisons. 1282 (CMPQconst (MOVQconst [x]) [y]) && x==y -> (FlagEQ) 1283 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT) 1284 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT) 1285 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT) 1286 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT) 1287 (CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ) 1288 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT) 1289 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT) 1290 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT) 1291 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT) 1292 (CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ) 1293 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT) 1294 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT) 1295 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT) 1296 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT) 1297 (CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ) 1298 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT) 1299 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT) 1300 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT) 1301 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) 1302 1303 // Other known comparisons. 1304 (CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT) 1305 (CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT) 1306 (CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT) 1307 (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT) 1308 (CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT) 1309 (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) 1310 (CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) 1311 (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT) 1312 (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) 1313 (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT) 1314 1315 // TODO: DIVxU also. 1316 1317 // Absorb flag constants into SBB ops. 1318 (SBBQcarrymask (FlagEQ)) -> (MOVQconst [0]) 1319 (SBBQcarrymask (FlagLT_ULT)) -> (MOVQconst [-1]) 1320 (SBBQcarrymask (FlagLT_UGT)) -> (MOVQconst [0]) 1321 (SBBQcarrymask (FlagGT_ULT)) -> (MOVQconst [-1]) 1322 (SBBQcarrymask (FlagGT_UGT)) -> (MOVQconst [0]) 1323 (SBBLcarrymask (FlagEQ)) -> (MOVLconst [0]) 1324 (SBBLcarrymask (FlagLT_ULT)) -> (MOVLconst [-1]) 1325 (SBBLcarrymask (FlagLT_UGT)) -> (MOVLconst [0]) 1326 (SBBLcarrymask (FlagGT_ULT)) -> (MOVLconst [-1]) 1327 (SBBLcarrymask (FlagGT_UGT)) -> (MOVLconst [0]) 1328 1329 // Absorb flag constants into branches. 1330 ((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) -> (First yes no) 1331 ((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) -> (First no yes) 1332 ((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) -> (First yes no) 1333 ((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) -> (First no yes) 1334 ((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) -> (First yes no) 1335 ((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) -> (First no yes) 1336 ((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) -> (First yes no) 1337 ((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) -> (First no yes) 1338 ((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) -> (First yes no) 1339 ((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) -> (First no yes) 1340 1341 // Absorb flag constants into SETxx ops. 1342 ((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) -> (MOVLconst [1]) 1343 ((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) -> (MOVLconst [0]) 1344 ((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) -> (MOVLconst [1]) 1345 ((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) -> (MOVLconst [0]) 1346 ((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) -> (MOVLconst [1]) 1347 ((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) -> (MOVLconst [0]) 1348 ((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) -> (MOVLconst [1]) 1349 ((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) -> (MOVLconst [0]) 1350 ((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) -> (MOVLconst [1]) 1351 ((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) -> (MOVLconst [0]) 1352 1353 (SETEQstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1354 (SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1355 (SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1356 (SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1357 (SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1358 1359 (SETNEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1360 (SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1361 (SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1362 (SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1363 (SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1364 1365 (SETLstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1366 (SETLstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1367 (SETLstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1368 (SETLstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1369 (SETLstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1370 1371 (SETLEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1372 (SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1373 (SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1374 (SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1375 (SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1376 1377 (SETGstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1378 (SETGstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1379 (SETGstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1380 (SETGstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1381 (SETGstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1382 1383 (SETGEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1384 (SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1385 (SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1386 (SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1387 (SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1388 1389 (SETBstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1390 (SETBstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1391 (SETBstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1392 (SETBstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1393 (SETBstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1394 1395 (SETBEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1396 (SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1397 (SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1398 (SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1399 (SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1400 1401 (SETAstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1402 (SETAstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1403 (SETAstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1404 (SETAstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1405 (SETAstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1406 1407 (SETAEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1408 (SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1409 (SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1410 (SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1411 (SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1412 1413 // Remove redundant *const ops 1414 (ADDQconst [0] x) -> x 1415 (ADDLconst [c] x) && int32(c)==0 -> x 1416 (SUBQconst [0] x) -> x 1417 (SUBLconst [c] x) && int32(c) == 0 -> x 1418 (ANDQconst [0] _) -> (MOVQconst [0]) 1419 (ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0]) 1420 (ANDQconst [-1] x) -> x 1421 (ANDLconst [c] x) && int32(c)==-1 -> x 1422 (ORQconst [0] x) -> x 1423 (ORLconst [c] x) && int32(c)==0 -> x 1424 (ORQconst [-1] _) -> (MOVQconst [-1]) 1425 (ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1]) 1426 (XORQconst [0] x) -> x 1427 (XORLconst [c] x) && int32(c)==0 -> x 1428 // TODO: since we got rid of the W/B versions, we might miss 1429 // things like (ANDLconst [0x100] x) which were formerly 1430 // (ANDBconst [0] x). Probably doesn't happen very often. 1431 // If we cared, we might do: 1432 // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0]) 1433 1434 // Remove redundant ops 1435 // Not in generic rules, because they may appear after lowering e. g. Slicemask 1436 (NEG(Q|L) (NEG(Q|L) x)) -> x 1437 (NEG(Q|L) s:(SUB(Q|L) x y)) && s.Uses == 1 -> (SUB(Q|L) y x) 1438 1439 // Convert constant subtracts to constant adds 1440 (SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x) 1441 (SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x) 1442 1443 // generic constant folding 1444 // TODO: more of this 1445 (ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d]) 1446 (ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c+d))]) 1447 (ADDQconst [c] (ADDQconst [d] x)) && is32Bit(c+d) -> (ADDQconst [c+d] x) 1448 (ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [int64(int32(c+d))] x) 1449 (SUBQconst (MOVQconst [d]) [c]) -> (MOVQconst [d-c]) 1450 (SUBQconst (SUBQconst x [d]) [c]) && is32Bit(-c-d) -> (ADDQconst [-c-d] x) 1451 (SARQconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) 1452 (SARLconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int32(d))>>uint64(c)]) 1453 (SARWconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int16(d))>>uint64(c)]) 1454 (SARBconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int8(d))>>uint64(c)]) 1455 (NEGQ (MOVQconst [c])) -> (MOVQconst [-c]) 1456 (NEGL (MOVLconst [c])) -> (MOVLconst [int64(int32(-c))]) 1457 (MULQconst [c] (MOVQconst [d])) -> (MOVQconst [c*d]) 1458 (MULLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c*d))]) 1459 (ANDQconst [c] (MOVQconst [d])) -> (MOVQconst [c&d]) 1460 (ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d]) 1461 (ORQconst [c] (MOVQconst [d])) -> (MOVQconst [c|d]) 1462 (ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d]) 1463 (XORQconst [c] (MOVQconst [d])) -> (MOVQconst [c^d]) 1464 (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) 1465 (NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) 1466 (NOTL (MOVLconst [c])) -> (MOVLconst [^c]) 1467 (BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))]) 1468 (BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))]) 1469 (BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))]) 1470 (BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))]) 1471 (BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))]) 1472 (BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))]) 1473 1474 // generic simplifications 1475 // TODO: more of this 1476 (ADDQ x (NEGQ y)) -> (SUBQ x y) 1477 (ADDL x (NEGL y)) -> (SUBL x y) 1478 (SUBQ x x) -> (MOVQconst [0]) 1479 (SUBL x x) -> (MOVLconst [0]) 1480 (ANDQ x x) -> x 1481 (ANDL x x) -> x 1482 (ORQ x x) -> x 1483 (ORL x x) -> x 1484 (XORQ x x) -> (MOVQconst [0]) 1485 (XORL x x) -> (MOVLconst [0]) 1486 1487 // Fold NEG into ADDconst/MULconst. Take care to keep c in 32 bit range. 1488 (NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) -> (ADDQconst [-c] x) 1489 (MULQconst [c] (NEGQ x)) && c != -(1<<31) -> (MULQconst [-c] x) 1490 1491 // checking AND against 0. 1492 (CMPQconst (ANDQ x y) [0]) -> (TESTQ x y) 1493 (CMPLconst (ANDL x y) [0]) -> (TESTL x y) 1494 (CMPWconst (ANDL x y) [0]) -> (TESTW x y) 1495 (CMPBconst (ANDL x y) [0]) -> (TESTB x y) 1496 (CMPQconst (ANDQconst [c] x) [0]) -> (TESTQconst [c] x) 1497 (CMPLconst (ANDLconst [c] x) [0]) -> (TESTLconst [c] x) 1498 (CMPWconst (ANDLconst [c] x) [0]) -> (TESTWconst [int64(int16(c))] x) 1499 (CMPBconst (ANDLconst [c] x) [0]) -> (TESTBconst [int64(int8(c))] x) 1500 1501 // Convert TESTx to TESTxconst if possible. 1502 (TESTQ (MOVQconst [c]) x) && is32Bit(c) -> (TESTQconst [c] x) 1503 (TESTL (MOVLconst [c]) x) -> (TESTLconst [c] x) 1504 (TESTW (MOVLconst [c]) x) -> (TESTWconst [c] x) 1505 (TESTB (MOVLconst [c]) x) -> (TESTBconst [c] x) 1506 1507 // TEST %reg,%reg is shorter than CMP 1508 (CMPQconst x [0]) -> (TESTQ x x) 1509 (CMPLconst x [0]) -> (TESTL x x) 1510 (CMPWconst x [0]) -> (TESTW x x) 1511 (CMPBconst x [0]) -> (TESTB x x) 1512 (TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst -> (TESTQ x x) 1513 (TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTL x x) 1514 (TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTW x x) 1515 (TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTB x x) 1516 1517 // Combining byte loads into larger (unaligned) loads. 1518 // There are many ways these combinations could occur. This is 1519 // designed to match the way encoding/binary.LittleEndian does it. 1520 1521 // Little-endian loads 1522 1523 (ORL x0:(MOVBload [i0] {s} p mem) 1524 sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) 1525 && i1 == i0+1 1526 && x0.Uses == 1 1527 && x1.Uses == 1 1528 && sh.Uses == 1 1529 && mergePoint(b,x0,x1) != nil 1530 && clobber(x0) 1531 && clobber(x1) 1532 && clobber(sh) 1533 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) 1534 1535 (ORQ x0:(MOVBload [i0] {s} p mem) 1536 sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem))) 1537 && i1 == i0+1 1538 && x0.Uses == 1 1539 && x1.Uses == 1 1540 && sh.Uses == 1 1541 && mergePoint(b,x0,x1) != nil 1542 && clobber(x0) 1543 && clobber(x1) 1544 && clobber(sh) 1545 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) 1546 1547 (ORL x0:(MOVWload [i0] {s} p mem) 1548 sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem))) 1549 && i1 == i0+2 1550 && x0.Uses == 1 1551 && x1.Uses == 1 1552 && sh.Uses == 1 1553 && mergePoint(b,x0,x1) != nil 1554 && clobber(x0) 1555 && clobber(x1) 1556 && clobber(sh) 1557 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) 1558 1559 (ORQ x0:(MOVWload [i0] {s} p mem) 1560 sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem))) 1561 && i1 == i0+2 1562 && x0.Uses == 1 1563 && x1.Uses == 1 1564 && sh.Uses == 1 1565 && mergePoint(b,x0,x1) != nil 1566 && clobber(x0) 1567 && clobber(x1) 1568 && clobber(sh) 1569 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) 1570 1571 (ORQ x0:(MOVLload [i0] {s} p mem) 1572 sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) 1573 && i1 == i0+4 1574 && x0.Uses == 1 1575 && x1.Uses == 1 1576 && sh.Uses == 1 1577 && mergePoint(b,x0,x1) != nil 1578 && clobber(x0) 1579 && clobber(x1) 1580 && clobber(sh) 1581 -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) 1582 1583 (ORL 1584 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) 1585 or:(ORL 1586 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) 1587 y)) 1588 && i1 == i0+1 1589 && j1 == j0+8 1590 && j0 % 16 == 0 1591 && x0.Uses == 1 1592 && x1.Uses == 1 1593 && s0.Uses == 1 1594 && s1.Uses == 1 1595 && or.Uses == 1 1596 && mergePoint(b,x0,x1,y) != nil 1597 && clobber(x0) 1598 && clobber(x1) 1599 && clobber(s0) 1600 && clobber(s1) 1601 && clobber(or) 1602 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) 1603 1604 (ORQ 1605 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) 1606 or:(ORQ 1607 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) 1608 y)) 1609 && i1 == i0+1 1610 && j1 == j0+8 1611 && j0 % 16 == 0 1612 && x0.Uses == 1 1613 && x1.Uses == 1 1614 && s0.Uses == 1 1615 && s1.Uses == 1 1616 && or.Uses == 1 1617 && mergePoint(b,x0,x1,y) != nil 1618 && clobber(x0) 1619 && clobber(x1) 1620 && clobber(s0) 1621 && clobber(s1) 1622 && clobber(or) 1623 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) 1624 1625 (ORQ 1626 s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) 1627 or:(ORQ 1628 s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) 1629 y)) 1630 && i1 == i0+2 1631 && j1 == j0+16 1632 && j0 % 32 == 0 1633 && x0.Uses == 1 1634 && x1.Uses == 1 1635 && s0.Uses == 1 1636 && s1.Uses == 1 1637 && or.Uses == 1 1638 && mergePoint(b,x0,x1,y) != nil 1639 && clobber(x0) 1640 && clobber(x1) 1641 && clobber(s0) 1642 && clobber(s1) 1643 && clobber(or) 1644 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) 1645 1646 // Little-endian indexed loads 1647 1648 (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) 1649 sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) 1650 && i1 == i0+1 1651 && x0.Uses == 1 1652 && x1.Uses == 1 1653 && sh.Uses == 1 1654 && mergePoint(b,x0,x1) != nil 1655 && clobber(x0) 1656 && clobber(x1) 1657 && clobber(sh) 1658 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem) 1659 1660 (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) 1661 sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) 1662 && i1 == i0+1 1663 && x0.Uses == 1 1664 && x1.Uses == 1 1665 && sh.Uses == 1 1666 && mergePoint(b,x0,x1) != nil 1667 && clobber(x0) 1668 && clobber(x1) 1669 && clobber(sh) 1670 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem) 1671 1672 (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) 1673 sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 1674 && i1 == i0+2 1675 && x0.Uses == 1 1676 && x1.Uses == 1 1677 && sh.Uses == 1 1678 && mergePoint(b,x0,x1) != nil 1679 && clobber(x0) 1680 && clobber(x1) 1681 && clobber(sh) 1682 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) 1683 1684 (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) 1685 sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 1686 && i1 == i0+2 1687 && x0.Uses == 1 1688 && x1.Uses == 1 1689 && sh.Uses == 1 1690 && mergePoint(b,x0,x1) != nil 1691 && clobber(x0) 1692 && clobber(x1) 1693 && clobber(sh) 1694 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) 1695 1696 (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) 1697 sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem))) 1698 && i1 == i0+4 1699 && x0.Uses == 1 1700 && x1.Uses == 1 1701 && sh.Uses == 1 1702 && mergePoint(b,x0,x1) != nil 1703 && clobber(x0) 1704 && clobber(x1) 1705 && clobber(sh) 1706 -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem) 1707 1708 (ORL 1709 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1710 or:(ORL 1711 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1712 y)) 1713 && i1 == i0+1 1714 && j1 == j0+8 1715 && j0 % 16 == 0 1716 && x0.Uses == 1 1717 && x1.Uses == 1 1718 && s0.Uses == 1 1719 && s1.Uses == 1 1720 && or.Uses == 1 1721 && mergePoint(b,x0,x1,y) != nil 1722 && clobber(x0) 1723 && clobber(x1) 1724 && clobber(s0) 1725 && clobber(s1) 1726 && clobber(or) 1727 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) 1728 1729 (ORQ 1730 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1731 or:(ORQ 1732 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1733 y)) 1734 && i1 == i0+1 1735 && j1 == j0+8 1736 && j0 % 16 == 0 1737 && x0.Uses == 1 1738 && x1.Uses == 1 1739 && s0.Uses == 1 1740 && s1.Uses == 1 1741 && or.Uses == 1 1742 && mergePoint(b,x0,x1,y) != nil 1743 && clobber(x0) 1744 && clobber(x1) 1745 && clobber(s0) 1746 && clobber(s1) 1747 && clobber(or) 1748 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) 1749 1750 (ORQ 1751 s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1752 or:(ORQ 1753 s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) 1754 y)) 1755 && i1 == i0+2 1756 && j1 == j0+16 1757 && j0 % 32 == 0 1758 && x0.Uses == 1 1759 && x1.Uses == 1 1760 && s0.Uses == 1 1761 && s1.Uses == 1 1762 && or.Uses == 1 1763 && mergePoint(b,x0,x1,y) != nil 1764 && clobber(x0) 1765 && clobber(x1) 1766 && clobber(s0) 1767 && clobber(s1) 1768 && clobber(or) 1769 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y) 1770 1771 // Big-endian loads 1772 1773 (ORL 1774 x1:(MOVBload [i1] {s} p mem) 1775 sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem))) 1776 && i1 == i0+1 1777 && x0.Uses == 1 1778 && x1.Uses == 1 1779 && sh.Uses == 1 1780 && mergePoint(b,x0,x1) != nil 1781 && clobber(x0) 1782 && clobber(x1) 1783 && clobber(sh) 1784 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) 1785 1786 (ORQ 1787 x1:(MOVBload [i1] {s} p mem) 1788 sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem))) 1789 && i1 == i0+1 1790 && x0.Uses == 1 1791 && x1.Uses == 1 1792 && sh.Uses == 1 1793 && mergePoint(b,x0,x1) != nil 1794 && clobber(x0) 1795 && clobber(x1) 1796 && clobber(sh) 1797 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) 1798 1799 (ORL 1800 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) 1801 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) 1802 && i1 == i0+2 1803 && x0.Uses == 1 1804 && x1.Uses == 1 1805 && r0.Uses == 1 1806 && r1.Uses == 1 1807 && sh.Uses == 1 1808 && mergePoint(b,x0,x1) != nil 1809 && clobber(x0) 1810 && clobber(x1) 1811 && clobber(r0) 1812 && clobber(r1) 1813 && clobber(sh) 1814 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) 1815 1816 (ORQ 1817 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) 1818 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) 1819 && i1 == i0+2 1820 && x0.Uses == 1 1821 && x1.Uses == 1 1822 && r0.Uses == 1 1823 && r1.Uses == 1 1824 && sh.Uses == 1 1825 && mergePoint(b,x0,x1) != nil 1826 && clobber(x0) 1827 && clobber(x1) 1828 && clobber(r0) 1829 && clobber(r1) 1830 && clobber(sh) 1831 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) 1832 1833 (ORQ 1834 r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) 1835 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) 1836 && i1 == i0+4 1837 && x0.Uses == 1 1838 && x1.Uses == 1 1839 && r0.Uses == 1 1840 && r1.Uses == 1 1841 && sh.Uses == 1 1842 && mergePoint(b,x0,x1) != nil 1843 && clobber(x0) 1844 && clobber(x1) 1845 && clobber(r0) 1846 && clobber(r1) 1847 && clobber(sh) 1848 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) 1849 1850 (ORL 1851 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) 1852 or:(ORL 1853 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) 1854 y)) 1855 && i1 == i0+1 1856 && j1 == j0-8 1857 && j1 % 16 == 0 1858 && x0.Uses == 1 1859 && x1.Uses == 1 1860 && s0.Uses == 1 1861 && s1.Uses == 1 1862 && or.Uses == 1 1863 && mergePoint(b,x0,x1,y) != nil 1864 && clobber(x0) 1865 && clobber(x1) 1866 && clobber(s0) 1867 && clobber(s1) 1868 && clobber(or) 1869 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) 1870 1871 (ORQ 1872 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) 1873 or:(ORQ 1874 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) 1875 y)) 1876 && i1 == i0+1 1877 && j1 == j0-8 1878 && j1 % 16 == 0 1879 && x0.Uses == 1 1880 && x1.Uses == 1 1881 && s0.Uses == 1 1882 && s1.Uses == 1 1883 && or.Uses == 1 1884 && mergePoint(b,x0,x1,y) != nil 1885 && clobber(x0) 1886 && clobber(x1) 1887 && clobber(s0) 1888 && clobber(s1) 1889 && clobber(or) 1890 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) 1891 1892 (ORQ 1893 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) 1894 or:(ORQ 1895 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) 1896 y)) 1897 && i1 == i0+2 1898 && j1 == j0-16 1899 && j1 % 32 == 0 1900 && x0.Uses == 1 1901 && x1.Uses == 1 1902 && r0.Uses == 1 1903 && r1.Uses == 1 1904 && s0.Uses == 1 1905 && s1.Uses == 1 1906 && or.Uses == 1 1907 && mergePoint(b,x0,x1,y) != nil 1908 && clobber(x0) 1909 && clobber(x1) 1910 && clobber(r0) 1911 && clobber(r1) 1912 && clobber(s0) 1913 && clobber(s1) 1914 && clobber(or) 1915 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) 1916 1917 // Big-endian indexed loads 1918 1919 (ORL 1920 x1:(MOVBloadidx1 [i1] {s} p idx mem) 1921 sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) 1922 && i1 == i0+1 1923 && x0.Uses == 1 1924 && x1.Uses == 1 1925 && sh.Uses == 1 1926 && mergePoint(b,x0,x1) != nil 1927 && clobber(x0) 1928 && clobber(x1) 1929 && clobber(sh) 1930 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem)) 1931 1932 (ORQ 1933 x1:(MOVBloadidx1 [i1] {s} p idx mem) 1934 sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) 1935 && i1 == i0+1 1936 && x0.Uses == 1 1937 && x1.Uses == 1 1938 && sh.Uses == 1 1939 && mergePoint(b,x0,x1) != nil 1940 && clobber(x0) 1941 && clobber(x1) 1942 && clobber(sh) 1943 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem)) 1944 1945 (ORL 1946 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1947 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) 1948 && i1 == i0+2 1949 && x0.Uses == 1 1950 && x1.Uses == 1 1951 && r0.Uses == 1 1952 && r1.Uses == 1 1953 && sh.Uses == 1 1954 && mergePoint(b,x0,x1) != nil 1955 && clobber(x0) 1956 && clobber(x1) 1957 && clobber(r0) 1958 && clobber(r1) 1959 && clobber(sh) 1960 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem)) 1961 1962 (ORQ 1963 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1964 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) 1965 && i1 == i0+2 1966 && x0.Uses == 1 1967 && x1.Uses == 1 1968 && r0.Uses == 1 1969 && r1.Uses == 1 1970 && sh.Uses == 1 1971 && mergePoint(b,x0,x1) != nil 1972 && clobber(x0) 1973 && clobber(x1) 1974 && clobber(r0) 1975 && clobber(r1) 1976 && clobber(sh) 1977 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem)) 1978 1979 (ORQ 1980 r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) 1981 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem)))) 1982 && i1 == i0+4 1983 && x0.Uses == 1 1984 && x1.Uses == 1 1985 && r0.Uses == 1 1986 && r1.Uses == 1 1987 && sh.Uses == 1 1988 && mergePoint(b,x0,x1) != nil 1989 && clobber(x0) 1990 && clobber(x1) 1991 && clobber(r0) 1992 && clobber(r1) 1993 && clobber(sh) 1994 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem)) 1995 1996 (ORL 1997 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1998 or:(ORL 1999 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 2000 y)) 2001 && i1 == i0+1 2002 && j1 == j0-8 2003 && j1 % 16 == 0 2004 && x0.Uses == 1 2005 && x1.Uses == 1 2006 && s0.Uses == 1 2007 && s1.Uses == 1 2008 && or.Uses == 1 2009 && mergePoint(b,x0,x1,y) != nil 2010 && clobber(x0) 2011 && clobber(x1) 2012 && clobber(s0) 2013 && clobber(s1) 2014 && clobber(or) 2015 -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) 2016 2017 (ORQ 2018 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 2019 or:(ORQ 2020 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 2021 y)) 2022 && i1 == i0+1 2023 && j1 == j0-8 2024 && j1 % 16 == 0 2025 && x0.Uses == 1 2026 && x1.Uses == 1 2027 && s0.Uses == 1 2028 && s1.Uses == 1 2029 && or.Uses == 1 2030 && mergePoint(b,x0,x1,y) != nil 2031 && clobber(x0) 2032 && clobber(x1) 2033 && clobber(s0) 2034 && clobber(s1) 2035 && clobber(or) 2036 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) 2037 2038 (ORQ 2039 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) 2040 or:(ORQ 2041 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 2042 y)) 2043 && i1 == i0+2 2044 && j1 == j0-16 2045 && j1 % 32 == 0 2046 && x0.Uses == 1 2047 && x1.Uses == 1 2048 && r0.Uses == 1 2049 && r1.Uses == 1 2050 && s0.Uses == 1 2051 && s1.Uses == 1 2052 && or.Uses == 1 2053 && mergePoint(b,x0,x1,y) != nil 2054 && clobber(x0) 2055 && clobber(x1) 2056 && clobber(r0) 2057 && clobber(r1) 2058 && clobber(s0) 2059 && clobber(s1) 2060 && clobber(or) 2061 -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y) 2062 2063 // Combine 2 byte stores + shift into rolw 8 + word store 2064 (MOVBstore [i] {s} p w 2065 x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) 2066 && x0.Uses == 1 2067 && clobber(x0) 2068 -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem) 2069 2070 (MOVBstoreidx1 [i] {s} p idx w 2071 x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem)) 2072 && x0.Uses == 1 2073 && clobber(x0) 2074 -> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem) 2075 2076 // Combine stores + shifts into bswap and larger (unaligned) stores 2077 (MOVBstore [i] {s} p w 2078 x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) 2079 x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) 2080 x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) 2081 && x0.Uses == 1 2082 && x1.Uses == 1 2083 && x2.Uses == 1 2084 && clobber(x0) 2085 && clobber(x1) 2086 && clobber(x2) 2087 -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem) 2088 2089 (MOVBstoreidx1 [i] {s} p idx w 2090 x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) 2091 x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) 2092 x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem)))) 2093 && x0.Uses == 1 2094 && x1.Uses == 1 2095 && x2.Uses == 1 2096 && clobber(x0) 2097 && clobber(x1) 2098 && clobber(x2) 2099 -> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem) 2100 2101 (MOVBstore [i] {s} p w 2102 x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) 2103 x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) 2104 x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) 2105 x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) 2106 x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) 2107 x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) 2108 x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) 2109 && x0.Uses == 1 2110 && x1.Uses == 1 2111 && x2.Uses == 1 2112 && x3.Uses == 1 2113 && x4.Uses == 1 2114 && x5.Uses == 1 2115 && x6.Uses == 1 2116 && clobber(x0) 2117 && clobber(x1) 2118 && clobber(x2) 2119 && clobber(x3) 2120 && clobber(x4) 2121 && clobber(x5) 2122 && clobber(x6) 2123 -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem) 2124 2125 (MOVBstoreidx1 [i] {s} p idx w 2126 x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) 2127 x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) 2128 x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) 2129 x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) 2130 x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) 2131 x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) 2132 x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem)))))))) 2133 && x0.Uses == 1 2134 && x1.Uses == 1 2135 && x2.Uses == 1 2136 && x3.Uses == 1 2137 && x4.Uses == 1 2138 && x5.Uses == 1 2139 && x6.Uses == 1 2140 && clobber(x0) 2141 && clobber(x1) 2142 && clobber(x2) 2143 && clobber(x3) 2144 && clobber(x4) 2145 && clobber(x5) 2146 && clobber(x6) 2147 -> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem) 2148 2149 // Combine constant stores into larger (unaligned) stores. 2150 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) 2151 && x.Uses == 1 2152 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2153 && clobber(x) 2154 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) 2155 (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) 2156 && x.Uses == 1 2157 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2158 && clobber(x) 2159 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) 2160 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) 2161 && x.Uses == 1 2162 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2163 && clobber(x) 2164 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) 2165 (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) 2166 && x.Uses == 1 2167 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2168 && clobber(x) 2169 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) 2170 (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) 2171 && x.Uses == 1 2172 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2173 && clobber(x) 2174 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2175 (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) 2176 && x.Uses == 1 2177 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2178 && clobber(x) 2179 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2180 (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) 2181 && config.useSSE 2182 && x.Uses == 1 2183 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() 2184 && ValAndOff(c).Val() == 0 2185 && ValAndOff(c2).Val() == 0 2186 && clobber(x) 2187 -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) 2188 2189 (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) 2190 && x.Uses == 1 2191 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2192 && clobber(x) 2193 -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) 2194 (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) 2195 && x.Uses == 1 2196 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2197 && clobber(x) 2198 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) 2199 (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem)) 2200 && x.Uses == 1 2201 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2202 && clobber(x) 2203 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2204 2205 (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) 2206 && x.Uses == 1 2207 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2208 && clobber(x) 2209 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem) 2210 (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem)) 2211 && x.Uses == 1 2212 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2213 && clobber(x) 2214 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2215 2216 // Combine stores into larger (unaligned) stores. 2217 (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) 2218 && x.Uses == 1 2219 && clobber(x) 2220 -> (MOVWstore [i-1] {s} p w mem) 2221 (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) 2222 && x.Uses == 1 2223 && clobber(x) 2224 -> (MOVWstore [i] {s} p w mem) 2225 (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) 2226 && x.Uses == 1 2227 && clobber(x) 2228 -> (MOVWstore [i-1] {s} p w0 mem) 2229 (MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) 2230 && x.Uses == 1 2231 && clobber(x) 2232 -> (MOVLstore [i-2] {s} p w mem) 2233 (MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) 2234 && x.Uses == 1 2235 && clobber(x) 2236 -> (MOVLstore [i-2] {s} p w0 mem) 2237 (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) 2238 && x.Uses == 1 2239 && clobber(x) 2240 -> (MOVQstore [i-4] {s} p w mem) 2241 (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) 2242 && x.Uses == 1 2243 && clobber(x) 2244 -> (MOVQstore [i-4] {s} p w0 mem) 2245 2246 (MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) 2247 && x.Uses == 1 2248 && clobber(x) 2249 -> (MOVWstoreidx1 [i-1] {s} p idx w mem) 2250 (MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem)) 2251 && x.Uses == 1 2252 && clobber(x) 2253 -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) 2254 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) 2255 && x.Uses == 1 2256 && clobber(x) 2257 -> (MOVLstoreidx1 [i-2] {s} p idx w mem) 2258 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem)) 2259 && x.Uses == 1 2260 && clobber(x) 2261 -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) 2262 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem)) 2263 && x.Uses == 1 2264 && clobber(x) 2265 -> (MOVQstoreidx1 [i-4] {s} p idx w mem) 2266 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) 2267 && x.Uses == 1 2268 && clobber(x) 2269 -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem) 2270 2271 (MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) 2272 && x.Uses == 1 2273 && clobber(x) 2274 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem) 2275 (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) 2276 && x.Uses == 1 2277 && clobber(x) 2278 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem) 2279 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem)) 2280 && x.Uses == 1 2281 && clobber(x) 2282 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem) 2283 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) 2284 && x.Uses == 1 2285 && clobber(x) 2286 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem) 2287 2288 (MOVBstore [i] {s} p 2289 x1:(MOVBload [j] {s2} p2 mem) 2290 mem2:(MOVBstore [i-1] {s} p 2291 x2:(MOVBload [j-1] {s2} p2 mem) mem)) 2292 && x1.Uses == 1 2293 && x2.Uses == 1 2294 && mem2.Uses == 1 2295 && clobber(x1) 2296 && clobber(x2) 2297 && clobber(mem2) 2298 -> (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem) 2299 2300 (MOVWstore [i] {s} p 2301 x1:(MOVWload [j] {s2} p2 mem) 2302 mem2:(MOVWstore [i-2] {s} p 2303 x2:(MOVWload [j-2] {s2} p2 mem) mem)) 2304 && x1.Uses == 1 2305 && x2.Uses == 1 2306 && mem2.Uses == 1 2307 && clobber(x1) 2308 && clobber(x2) 2309 && clobber(mem2) 2310 -> (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) 2311 2312 (MOVLstore [i] {s} p 2313 x1:(MOVLload [j] {s2} p2 mem) 2314 mem2:(MOVLstore [i-4] {s} p 2315 x2:(MOVLload [j-4] {s2} p2 mem) mem)) 2316 && x1.Uses == 1 2317 && x2.Uses == 1 2318 && mem2.Uses == 1 2319 && clobber(x1) 2320 && clobber(x2) 2321 && clobber(mem2) 2322 -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem) 2323 2324 (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2325 (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2326 (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2327 (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2328 (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2329 (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2330 (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2331 (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2332 2333 (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2334 (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2335 (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2336 (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2337 (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2338 (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2339 (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2340 (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2341 2342 (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2343 (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2344 (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2345 (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2346 (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2347 (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2348 (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2349 (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2350 2351 (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload [off1+off2] {sym} ptr mem) 2352 (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload [off1+off2] {sym} ptr mem) 2353 (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem) 2354 (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem) 2355 (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore [off1+off2] {sym} ptr val mem) 2356 (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem) 2357 (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem) 2358 (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem) 2359 (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2360 (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2361 (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2362 (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2363 (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2364 (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2365 (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2366 (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2367 2368 // Merge load and op 2369 // TODO: add indexed variants? 2370 ((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem) 2371 ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) 2372 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) 2373 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) 2374 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) 2375 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> 2376 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem) 2377 (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) 2378 (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> 2379 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem) 2380 2381 // Merge ADDQconst and LEAQ into atomic loads. 2382 (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2383 (MOV(Q|L|B)atomicload [off1+off2] {sym} ptr mem) 2384 (MOV(Q|L|B)atomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 2385 (MOV(Q|L|B)atomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) 2386 2387 // Merge ADDQconst and LEAQ into atomic stores. 2388 (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2389 (XCHGQ [off1+off2] {sym} val ptr mem) 2390 (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> 2391 (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2392 (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2393 (XCHGL [off1+off2] {sym} val ptr mem) 2394 (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> 2395 (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2396 2397 // Merge ADDQconst into atomic adds. 2398 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2399 (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2400 (XADDQlock [off1+off2] {sym} val ptr mem) 2401 (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2402 (XADDLlock [off1+off2] {sym} val ptr mem) 2403 2404 // Merge ADDQconst into atomic compare and swaps. 2405 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2406 (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> 2407 (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) 2408 (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> 2409 (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) 2410 2411 // We don't need the conditional move if we know the arg of BSF is not zero. 2412 (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) && c != 0 -> x 2413 // Extension is unnecessary for trailing zeros. 2414 (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x)) 2415 (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x)) 2416 2417 // Simplify indexed loads/stores 2418 (MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem) 2419 (MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem) 2420 (MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem) 2421 (MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem) 2422 (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem) 2423 (MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem) 2424 (MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem) 2425 (MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem) 2426 (MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem) 2427 (MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem) 2428 (MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem) 2429 (MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem) 2430 (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem) 2431 (MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem) 2432 (MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem) 2433 (MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem) 2434 (MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem) 2435 (MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem) 2436 (MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem) 2437 (MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem) 2438 (MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem) 2439 (MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem) 2440 (MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem) 2441 (MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem) 2442 2443 // Redundant sign/zero extensions 2444 // Note: see issue 21963. We have to make sure we use the right type on 2445 // the resulting extension (the outer type, not the inner type). 2446 (MOVLQSX (MOVLQSX x)) -> (MOVLQSX x) 2447 (MOVLQSX (MOVWQSX x)) -> (MOVWQSX x) 2448 (MOVLQSX (MOVBQSX x)) -> (MOVBQSX x) 2449 (MOVWQSX (MOVWQSX x)) -> (MOVWQSX x) 2450 (MOVWQSX (MOVBQSX x)) -> (MOVBQSX x) 2451 (MOVBQSX (MOVBQSX x)) -> (MOVBQSX x) 2452 (MOVLQZX (MOVLQZX x)) -> (MOVLQZX x) 2453 (MOVLQZX (MOVWQZX x)) -> (MOVWQZX x) 2454 (MOVLQZX (MOVBQZX x)) -> (MOVBQZX x) 2455 (MOVWQZX (MOVWQZX x)) -> (MOVWQZX x) 2456 (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) 2457 (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) 2458 2459 (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) 2460 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> 2461 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) 2462 (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) 2463 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> 2464 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) 2465 2466 // float <-> int register moves, with no conversion. 2467 // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. 2468 (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) -> (MOVQf2i val) 2469 (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) -> (MOVLf2i val) 2470 (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) -> (MOVQi2f val) 2471 (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) -> (MOVLi2f val) 2472 2473 // Other load-like ops. 2474 (ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ADDQ x (MOVQf2i y)) 2475 (ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ADDL x (MOVLf2i y)) 2476 (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (SUBQ x (MOVQf2i y)) 2477 (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (SUBL x (MOVLf2i y)) 2478 (ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ANDQ x (MOVQf2i y)) 2479 (ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ANDL x (MOVLf2i y)) 2480 ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> ( ORQ x (MOVQf2i y)) 2481 ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> ( ORL x (MOVLf2i y)) 2482 (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (XORQ x (MOVQf2i y)) 2483 (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (XORL x (MOVLf2i y)) 2484 2485 (ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (ADDSD x (MOVQi2f y)) 2486 (ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (ADDSS x (MOVLi2f y)) 2487 (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (SUBSD x (MOVQi2f y)) 2488 (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (SUBSS x (MOVLi2f y)) 2489 (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (MULSD x (MOVQi2f y)) 2490 (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (MULSS x (MOVLi2f y)) 2491 2492 // Redirect stores to use the other register set. 2493 (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) -> (MOVSDstore [off] {sym} ptr val mem) 2494 (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) -> (MOVSSstore [off] {sym} ptr val mem) 2495 (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) -> (MOVQstore [off] {sym} ptr val mem) 2496 (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) -> (MOVLstore [off] {sym} ptr val mem) 2497 2498 // Load args directly into the register class where it will be used. 2499 // We do this by just modifying the type of the Arg. 2500 (MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2501 (MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2502 (MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2503 (MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2504 2505 // LEAQ is rematerializeable, so this helps to avoid register spill. 2506 // See issue 22947 for details 2507 (ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x) 2508 2509 // HMULx is commutative, but its first argument must go in AX. 2510 // If possible, put a rematerializeable value in the first argument slot, 2511 // to reduce the odds that another value will be have to spilled 2512 // specifically to free up AX. 2513 (HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L) y x) 2514 (HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L)U y x) 2515 2516 // Fold loads into compares 2517 // Note: these may be undone by the flagalloc pass. 2518 (CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) -> (CMP(Q|L|W|B)load {sym} [off] ptr x mem) 2519 (CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) -> (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem)) 2520 2521 (CMP(Q|L|W|B)const l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) [c]) 2522 && l.Uses == 1 2523 && validValAndOff(c, off) 2524 && clobber(l) -> 2525 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(c,off)] ptr mem) 2526 2527 (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (CMPQconstload {sym} [makeValAndOff(c,off)] ptr mem) 2528 (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(c,off) -> (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) 2529 (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem) 2530 (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem) 2531 2532 (TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2) 2533 && l == l2 2534 && l.Uses == 2 2535 && validValAndOff(0,off) 2536 && clobber(l) -> 2537 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0,off)] ptr mem) 2538 2539 (MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))]) 2540 (MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.BigEndian))]) 2541 (MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read32(sym, off, config.BigEndian))]) 2542 (MOVQload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read64(sym, off, config.BigEndian))])