github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/cmd/compile/internal/ssa/gen/AMD64.rules (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Lowering arithmetic 6 (Add(64|32|16|8) x y) -> (ADD(Q|L|L|L) x y) 7 (AddPtr x y) && config.PtrSize == 8 -> (ADDQ x y) 8 (AddPtr x y) && config.PtrSize == 4 -> (ADDL x y) 9 (Add(32|64)F x y) -> (ADDS(S|D) x y) 10 11 (Sub(64|32|16|8) x y) -> (SUB(Q|L|L|L) x y) 12 (SubPtr x y) && config.PtrSize == 8 -> (SUBQ x y) 13 (SubPtr x y) && config.PtrSize == 4 -> (SUBL x y) 14 (Sub(32|64)F x y) -> (SUBS(S|D) x y) 15 16 (Mul(64|32|16|8) x y) -> (MUL(Q|L|L|L) x y) 17 (Mul(32|64)F x y) -> (MULS(S|D) x y) 18 19 (Select0 (Mul64uover x y)) -> (Select0 <typ.UInt64> (MULQU x y)) 20 (Select0 (Mul32uover x y)) -> (Select0 <typ.UInt32> (MULLU x y)) 21 (Select1 (Mul(64|32)uover x y)) -> (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y))) 22 23 (Hmul(64|32) x y) -> (HMUL(Q|L) x y) 24 (Hmul(64|32)u x y) -> (HMUL(Q|L)U x y) 25 26 (Div(64|32|16) [a] x y) -> (Select0 (DIV(Q|L|W) [a] x y)) 27 (Div8 x y) -> (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 28 (Div(64|32|16)u x y) -> (Select0 (DIV(Q|L|W)U x y)) 29 (Div8u x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 30 (Div(32|64)F x y) -> (DIVS(S|D) x y) 31 32 (Mul64uhilo x y) -> (MULQU2 x y) 33 (Div128u xhi xlo y) -> (DIVQU2 xhi xlo y) 34 35 (Avg64u x y) -> (AVGQU x y) 36 37 (Mod(64|32|16) [a] x y) -> (Select1 (DIV(Q|L|W) [a] x y)) 38 (Mod8 x y) -> (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 39 (Mod(64|32|16)u x y) -> (Select1 (DIV(Q|L|W)U x y)) 40 (Mod8u x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 41 42 (And(64|32|16|8) x y) -> (AND(Q|L|L|L) x y) 43 (Or(64|32|16|8) x y) -> (OR(Q|L|L|L) x y) 44 (Xor(64|32|16|8) x y) -> (XOR(Q|L|L|L) x y) 45 (Com(64|32|16|8) x) -> (NOT(Q|L|L|L) x) 46 47 (Neg(64|32|16|8) x) -> (NEG(Q|L|L|L) x) 48 (Neg32F x) -> (PXOR x (MOVSSconst <typ.Float32> [auxFrom32F(float32(math.Copysign(0, -1)))])) 49 (Neg64F x) -> (PXOR x (MOVSDconst <typ.Float64> [auxFrom64F(math.Copysign(0, -1))])) 50 51 // Lowering boolean ops 52 (AndB x y) -> (ANDL x y) 53 (OrB x y) -> (ORL x y) 54 (Not x) -> (XORLconst [1] x) 55 56 // Lowering pointer arithmetic 57 (OffPtr [off] ptr) && config.PtrSize == 8 && is32Bit(off) -> (ADDQconst [off] ptr) 58 (OffPtr [off] ptr) && config.PtrSize == 8 -> (ADDQ (MOVQconst [off]) ptr) 59 (OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr) 60 61 // Lowering other arithmetic 62 (Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x))) 63 (Ctz32 x) -> (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x))) 64 (Ctz16 x) -> (BSFL (BTSLconst <typ.UInt32> [16] x)) 65 (Ctz8 x) -> (BSFL (BTSLconst <typ.UInt32> [ 8] x)) 66 67 (Ctz64NonZero x) -> (Select0 (BSFQ x)) 68 (Ctz32NonZero x) -> (BSFL x) 69 (Ctz16NonZero x) -> (BSFL x) 70 (Ctz8NonZero x) -> (BSFL x) 71 72 // BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0. 73 // However, for zero-extended values, we can cheat a bit, and calculate 74 // BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently 75 // places the index of the highest set bit where we want it. 76 (BitLen64 <t> x) -> (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x)))) 77 (BitLen32 x) -> (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x)))) 78 (BitLen16 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x))) 79 (BitLen8 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x))) 80 81 (Bswap(64|32) x) -> (BSWAP(Q|L) x) 82 83 (PopCount64 x) -> (POPCNTQ x) 84 (PopCount32 x) -> (POPCNTL x) 85 (PopCount16 x) -> (POPCNTL (MOVWQZX <typ.UInt32> x)) 86 (PopCount8 x) -> (POPCNTL (MOVBQZX <typ.UInt32> x)) 87 88 (Sqrt x) -> (SQRTSD x) 89 90 (RoundToEven x) -> (ROUNDSD [0] x) 91 (Floor x) -> (ROUNDSD [1] x) 92 (Ceil x) -> (ROUNDSD [2] x) 93 (Trunc x) -> (ROUNDSD [3] x) 94 95 // Lowering extension 96 // Note: we always extend to 64 bits even though some ops don't need that many result bits. 97 (SignExt8to16 x) -> (MOVBQSX x) 98 (SignExt8to32 x) -> (MOVBQSX x) 99 (SignExt8to64 x) -> (MOVBQSX x) 100 (SignExt16to32 x) -> (MOVWQSX x) 101 (SignExt16to64 x) -> (MOVWQSX x) 102 (SignExt32to64 x) -> (MOVLQSX x) 103 104 (ZeroExt8to16 x) -> (MOVBQZX x) 105 (ZeroExt8to32 x) -> (MOVBQZX x) 106 (ZeroExt8to64 x) -> (MOVBQZX x) 107 (ZeroExt16to32 x) -> (MOVWQZX x) 108 (ZeroExt16to64 x) -> (MOVWQZX x) 109 (ZeroExt32to64 x) -> (MOVLQZX x) 110 111 (Slicemask <t> x) -> (SARQconst (NEGQ <t> x) [63]) 112 113 // Lowering truncation 114 // Because we ignore high parts of registers, truncates are just copies. 115 (Trunc16to8 x) -> x 116 (Trunc32to8 x) -> x 117 (Trunc32to16 x) -> x 118 (Trunc64to8 x) -> x 119 (Trunc64to16 x) -> x 120 (Trunc64to32 x) -> x 121 122 // Lowering float <-> int 123 (Cvt32to32F x) -> (CVTSL2SS x) 124 (Cvt32to64F x) -> (CVTSL2SD x) 125 (Cvt64to32F x) -> (CVTSQ2SS x) 126 (Cvt64to64F x) -> (CVTSQ2SD x) 127 128 (Cvt32Fto32 x) -> (CVTTSS2SL x) 129 (Cvt32Fto64 x) -> (CVTTSS2SQ x) 130 (Cvt64Fto32 x) -> (CVTTSD2SL x) 131 (Cvt64Fto64 x) -> (CVTTSD2SQ x) 132 133 (Cvt32Fto64F x) -> (CVTSS2SD x) 134 (Cvt64Fto32F x) -> (CVTSD2SS x) 135 136 (Round(32|64)F x) -> x 137 138 // Lowering shifts 139 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. 140 // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff) 141 (Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 142 (Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 143 (Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 144 (Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 145 146 (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLQ x y) 147 (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 148 (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 149 (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 150 151 (Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 152 (Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 153 (Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16]))) 154 (Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8]))) 155 156 (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRQ x y) 157 (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRL x y) 158 (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRW x y) 159 (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRB x y) 160 161 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. 162 // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. 163 (Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64]))))) 164 (Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32]))))) 165 (Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16]))))) 166 (Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8]))))) 167 168 (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARQ x y) 169 (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARL x y) 170 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARW x y) 171 (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARB x y) 172 173 // Lowering comparisons 174 (Less(64|32|16|8) x y) -> (SETL (CMP(Q|L|W|B) x y)) 175 (Less(64|32|16|8)U x y) -> (SETB (CMP(Q|L|W|B) x y)) 176 // Use SETGF with reversed operands to dodge NaN case 177 (Less(32|64)F x y) -> (SETGF (UCOMIS(S|D) y x)) 178 179 (Leq(64|32|16|8) x y) -> (SETLE (CMP(Q|L|W|B) x y)) 180 (Leq(64|32|16|8)U x y) -> (SETBE (CMP(Q|L|W|B) x y)) 181 // Use SETGEF with reversed operands to dodge NaN case 182 (Leq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) y x)) 183 184 (Greater(64|32|16|8) x y) -> (SETG (CMP(Q|L|W|B) x y)) 185 (Greater(64|32|16|8)U x y) -> (SETA (CMP(Q|L|W|B) x y)) 186 // Note Go assembler gets UCOMISx operand order wrong, but it is right here 187 // Bug is accommodated at generation of assembly language. 188 (Greater(32|64)F x y) -> (SETGF (UCOMIS(S|D) x y)) 189 190 (Geq(64|32|16|8) x y) -> (SETGE (CMP(Q|L|W|B) x y)) 191 (Geq(64|32|16|8)U x y) -> (SETAE (CMP(Q|L|W|B) x y)) 192 // Note Go assembler gets UCOMISx operand order wrong, but it is right here 193 // Bug is accommodated at generation of assembly language. 194 (Geq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) x y)) 195 196 (Eq(64|32|16|8|B) x y) -> (SETEQ (CMP(Q|L|W|B|B) x y)) 197 (EqPtr x y) && config.PtrSize == 8 -> (SETEQ (CMPQ x y)) 198 (EqPtr x y) && config.PtrSize == 4 -> (SETEQ (CMPL x y)) 199 (Eq(32|64)F x y) -> (SETEQF (UCOMIS(S|D) x y)) 200 201 (Neq(64|32|16|8|B) x y) -> (SETNE (CMP(Q|L|W|B|B) x y)) 202 (NeqPtr x y) && config.PtrSize == 8 -> (SETNE (CMPQ x y)) 203 (NeqPtr x y) && config.PtrSize == 4 -> (SETNE (CMPL x y)) 204 (Neq(32|64)F x y) -> (SETNEF (UCOMIS(S|D) x y)) 205 206 (Int64Hi x) -> (SHRQconst [32] x) // needed for amd64p32 207 208 // Lowering loads 209 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t) && config.PtrSize == 8) -> (MOVQload ptr mem) 210 (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t) && config.PtrSize == 4) -> (MOVLload ptr mem) 211 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem) 212 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem) 213 (Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem) 214 (Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem) 215 216 // Lowering stores 217 // These more-specific FP versions of Store pattern should come first. 218 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem) 219 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem) 220 221 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVQstore ptr val mem) 222 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVLstore ptr val mem) 223 (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVWstore ptr val mem) 224 (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem) 225 226 // Lowering moves 227 (Move [0] _ _ mem) -> mem 228 (Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem) 229 (Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem) 230 (Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem) 231 (Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem) 232 (Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem) 233 (Move [16] dst src mem) && !config.useSSE -> 234 (MOVQstore [8] dst (MOVQload [8] src mem) 235 (MOVQstore dst (MOVQload src mem) mem)) 236 237 (Move [32] dst src mem) -> 238 (Move [16] 239 (OffPtr <dst.Type> dst [16]) 240 (OffPtr <src.Type> src [16]) 241 (Move [16] dst src mem)) 242 243 (Move [48] dst src mem) && config.useSSE -> 244 (Move [32] 245 (OffPtr <dst.Type> dst [16]) 246 (OffPtr <src.Type> src [16]) 247 (Move [16] dst src mem)) 248 249 (Move [64] dst src mem) && config.useSSE -> 250 (Move [32] 251 (OffPtr <dst.Type> dst [32]) 252 (OffPtr <src.Type> src [32]) 253 (Move [32] dst src mem)) 254 255 (Move [3] dst src mem) -> 256 (MOVBstore [2] dst (MOVBload [2] src mem) 257 (MOVWstore dst (MOVWload src mem) mem)) 258 (Move [5] dst src mem) -> 259 (MOVBstore [4] dst (MOVBload [4] src mem) 260 (MOVLstore dst (MOVLload src mem) mem)) 261 (Move [6] dst src mem) -> 262 (MOVWstore [4] dst (MOVWload [4] src mem) 263 (MOVLstore dst (MOVLload src mem) mem)) 264 (Move [7] dst src mem) -> 265 (MOVLstore [3] dst (MOVLload [3] src mem) 266 (MOVLstore dst (MOVLload src mem) mem)) 267 (Move [s] dst src mem) && s > 8 && s < 16 -> 268 (MOVQstore [s-8] dst (MOVQload [s-8] src mem) 269 (MOVQstore dst (MOVQload src mem) mem)) 270 271 // Adjust moves to be a multiple of 16 bytes. 272 (Move [s] dst src mem) 273 && s > 16 && s%16 != 0 && s%16 <= 8 -> 274 (Move [s-s%16] 275 (OffPtr <dst.Type> dst [s%16]) 276 (OffPtr <src.Type> src [s%16]) 277 (MOVQstore dst (MOVQload src mem) mem)) 278 (Move [s] dst src mem) 279 && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE -> 280 (Move [s-s%16] 281 (OffPtr <dst.Type> dst [s%16]) 282 (OffPtr <src.Type> src [s%16]) 283 (MOVOstore dst (MOVOload src mem) mem)) 284 (Move [s] dst src mem) 285 && s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE -> 286 (Move [s-s%16] 287 (OffPtr <dst.Type> dst [s%16]) 288 (OffPtr <src.Type> src [s%16]) 289 (MOVQstore [8] dst (MOVQload [8] src mem) 290 (MOVQstore dst (MOVQload src mem) mem))) 291 292 // Medium copying uses a duff device. 293 (Move [s] dst src mem) 294 && s > 64 && s <= 16*64 && s%16 == 0 295 && !config.noDuffDevice -> 296 (DUFFCOPY [14*(64-s/16)] dst src mem) 297 // 14 and 64 are magic constants. 14 is the number of bytes to encode: 298 // MOVUPS (SI), X0 299 // ADDQ $16, SI 300 // MOVUPS X0, (DI) 301 // ADDQ $16, DI 302 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. 303 304 // Large copying uses REP MOVSQ. 305 (Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 -> 306 (REPMOVSQ dst src (MOVQconst [s/8]) mem) 307 308 // Lowering Zero instructions 309 (Zero [0] _ mem) -> mem 310 (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem) 311 (Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem) 312 (Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem) 313 (Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem) 314 315 (Zero [3] destptr mem) -> 316 (MOVBstoreconst [makeValAndOff(0,2)] destptr 317 (MOVWstoreconst [0] destptr mem)) 318 (Zero [5] destptr mem) -> 319 (MOVBstoreconst [makeValAndOff(0,4)] destptr 320 (MOVLstoreconst [0] destptr mem)) 321 (Zero [6] destptr mem) -> 322 (MOVWstoreconst [makeValAndOff(0,4)] destptr 323 (MOVLstoreconst [0] destptr mem)) 324 (Zero [7] destptr mem) -> 325 (MOVLstoreconst [makeValAndOff(0,3)] destptr 326 (MOVLstoreconst [0] destptr mem)) 327 328 // Strip off any fractional word zeroing. 329 (Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE -> 330 (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) 331 (MOVQstoreconst [0] destptr mem)) 332 333 // Zero small numbers of words directly. 334 (Zero [16] destptr mem) && !config.useSSE -> 335 (MOVQstoreconst [makeValAndOff(0,8)] destptr 336 (MOVQstoreconst [0] destptr mem)) 337 (Zero [24] destptr mem) && !config.useSSE -> 338 (MOVQstoreconst [makeValAndOff(0,16)] destptr 339 (MOVQstoreconst [makeValAndOff(0,8)] destptr 340 (MOVQstoreconst [0] destptr mem))) 341 (Zero [32] destptr mem) && !config.useSSE -> 342 (MOVQstoreconst [makeValAndOff(0,24)] destptr 343 (MOVQstoreconst [makeValAndOff(0,16)] destptr 344 (MOVQstoreconst [makeValAndOff(0,8)] destptr 345 (MOVQstoreconst [0] destptr mem)))) 346 347 (Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE -> 348 (MOVQstoreconst [makeValAndOff(0,s-8)] destptr 349 (MOVQstoreconst [0] destptr mem)) 350 351 // Adjust zeros to be a multiple of 16 bytes. 352 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE -> 353 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 354 (MOVOstore destptr (MOVOconst [0]) mem)) 355 356 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE -> 357 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 358 (MOVQstoreconst [0] destptr mem)) 359 360 (Zero [16] destptr mem) && config.useSSE -> 361 (MOVOstore destptr (MOVOconst [0]) mem) 362 (Zero [32] destptr mem) && config.useSSE -> 363 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 364 (MOVOstore destptr (MOVOconst [0]) mem)) 365 (Zero [48] destptr mem) && config.useSSE -> 366 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) 367 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 368 (MOVOstore destptr (MOVOconst [0]) mem))) 369 (Zero [64] destptr mem) && config.useSSE -> 370 (MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0]) 371 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) 372 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 373 (MOVOstore destptr (MOVOconst [0]) mem)))) 374 375 // Medium zeroing uses a duff device. 376 (Zero [s] destptr mem) 377 && s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice -> 378 (DUFFZERO [s] destptr (MOVOconst [0]) mem) 379 380 // Large zeroing uses REP STOSQ. 381 (Zero [s] destptr mem) 382 && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) 383 && s%8 == 0 -> 384 (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) 385 386 // Lowering constants 387 (Const8 [val]) -> (MOVLconst [val]) 388 (Const16 [val]) -> (MOVLconst [val]) 389 (Const32 [val]) -> (MOVLconst [val]) 390 (Const64 [val]) -> (MOVQconst [val]) 391 (Const32F [val]) -> (MOVSSconst [val]) 392 (Const64F [val]) -> (MOVSDconst [val]) 393 (ConstNil) && config.PtrSize == 8 -> (MOVQconst [0]) 394 (ConstNil) && config.PtrSize == 4 -> (MOVLconst [0]) 395 (ConstBool [b]) -> (MOVLconst [b]) 396 397 // Lowering calls 398 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem) 399 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem) 400 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem) 401 402 // Lowering conditional moves 403 // If the condition is a SETxx, we can just run a CMOV from the comparison that was 404 // setting the flags. 405 // Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL 406 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t)) 407 -> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 408 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t) 409 -> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 410 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t) 411 -> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 412 413 // If the condition does not set the flags, we need to generate a comparison. 414 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 415 -> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check)) 416 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 417 -> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check)) 418 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 419 -> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check)) 420 421 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) 422 -> (CMOVQNE y x (CMPQconst [0] check)) 423 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) 424 -> (CMOVLNE y x (CMPQconst [0] check)) 425 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) 426 -> (CMOVWNE y x (CMPQconst [0] check)) 427 428 // Absorb InvertFlags 429 (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 430 -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 431 (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 432 -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 433 (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 434 -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 435 436 // Absorb constants generated during lower 437 (CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x 438 (CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y 439 (CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x 440 (CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y 441 (CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x 442 (CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y 443 (CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x 444 (CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y 445 (CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x 446 (CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y 447 448 // Miscellaneous 449 (IsNonNil p) && config.PtrSize == 8 -> (SETNE (TESTQ p p)) 450 (IsNonNil p) && config.PtrSize == 4 -> (SETNE (TESTL p p)) 451 (IsInBounds idx len) && config.PtrSize == 8 -> (SETB (CMPQ idx len)) 452 (IsInBounds idx len) && config.PtrSize == 4 -> (SETB (CMPL idx len)) 453 (IsSliceInBounds idx len) && config.PtrSize == 8 -> (SETBE (CMPQ idx len)) 454 (IsSliceInBounds idx len) && config.PtrSize == 4 -> (SETBE (CMPL idx len)) 455 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem) 456 (GetG mem) -> (LoweredGetG mem) 457 (GetClosurePtr) -> (LoweredGetClosurePtr) 458 (GetCallerPC) -> (LoweredGetCallerPC) 459 (GetCallerSP) -> (LoweredGetCallerSP) 460 (Addr {sym} base) && config.PtrSize == 8 -> (LEAQ {sym} base) 461 (Addr {sym} base) && config.PtrSize == 4 -> (LEAL {sym} base) 462 (LocalAddr {sym} base _) && config.PtrSize == 8 -> (LEAQ {sym} base) 463 (LocalAddr {sym} base _) && config.PtrSize == 4 -> (LEAL {sym} base) 464 465 (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 -> (SETLstore [off] {sym} ptr x mem) 466 (MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 -> (SETLEstore [off] {sym} ptr x mem) 467 (MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 -> (SETGstore [off] {sym} ptr x mem) 468 (MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 -> (SETGEstore [off] {sym} ptr x mem) 469 (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 -> (SETEQstore [off] {sym} ptr x mem) 470 (MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 -> (SETNEstore [off] {sym} ptr x mem) 471 (MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 -> (SETBstore [off] {sym} ptr x mem) 472 (MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 -> (SETBEstore [off] {sym} ptr x mem) 473 (MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 -> (SETAstore [off] {sym} ptr x mem) 474 (MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 -> (SETAEstore [off] {sym} ptr x mem) 475 476 // block rewrites 477 (If (SETL cmp) yes no) -> (LT cmp yes no) 478 (If (SETLE cmp) yes no) -> (LE cmp yes no) 479 (If (SETG cmp) yes no) -> (GT cmp yes no) 480 (If (SETGE cmp) yes no) -> (GE cmp yes no) 481 (If (SETEQ cmp) yes no) -> (EQ cmp yes no) 482 (If (SETNE cmp) yes no) -> (NE cmp yes no) 483 (If (SETB cmp) yes no) -> (ULT cmp yes no) 484 (If (SETBE cmp) yes no) -> (ULE cmp yes no) 485 (If (SETA cmp) yes no) -> (UGT cmp yes no) 486 (If (SETAE cmp) yes no) -> (UGE cmp yes no) 487 (If (SETO cmp) yes no) -> (OS cmp yes no) 488 489 // Special case for floating point - LF/LEF not generated 490 (If (SETGF cmp) yes no) -> (UGT cmp yes no) 491 (If (SETGEF cmp) yes no) -> (UGE cmp yes no) 492 (If (SETEQF cmp) yes no) -> (EQF cmp yes no) 493 (If (SETNEF cmp) yes no) -> (NEF cmp yes no) 494 495 (If cond yes no) -> (NE (TESTB cond cond) yes no) 496 497 // Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here. 498 (AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem) 499 (AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem) 500 (AtomicLoadPtr ptr mem) && config.PtrSize == 8 -> (MOVQatomicload ptr mem) 501 (AtomicLoadPtr ptr mem) && config.PtrSize == 4 -> (MOVLatomicload ptr mem) 502 503 // Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load. 504 // TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those? 505 (AtomicStore32 ptr val mem) -> (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem)) 506 (AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem)) 507 (AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem)) 508 (AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem)) 509 510 // Atomic exchanges. 511 (AtomicExchange32 ptr val mem) -> (XCHGL val ptr mem) 512 (AtomicExchange64 ptr val mem) -> (XCHGQ val ptr mem) 513 514 // Atomic adds. 515 (AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (XADDLlock val ptr mem)) 516 (AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (XADDQlock val ptr mem)) 517 (Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDL val (Select0 <t> tuple)) 518 (Select1 (AddTupleFirst32 _ tuple)) -> (Select1 tuple) 519 (Select0 <t> (AddTupleFirst64 val tuple)) -> (ADDQ val (Select0 <t> tuple)) 520 (Select1 (AddTupleFirst64 _ tuple)) -> (Select1 tuple) 521 522 // Atomic compare and swap. 523 (AtomicCompareAndSwap32 ptr old new_ mem) -> (CMPXCHGLlock ptr old new_ mem) 524 (AtomicCompareAndSwap64 ptr old new_ mem) -> (CMPXCHGQlock ptr old new_ mem) 525 526 // Atomic memory updates. 527 (AtomicAnd8 ptr val mem) -> (ANDBlock ptr val mem) 528 (AtomicOr8 ptr val mem) -> (ORBlock ptr val mem) 529 530 // Write barrier. 531 (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem) 532 533 // *************************** 534 // Above: lowering rules 535 // Below: optimizations 536 // *************************** 537 // TODO: Should the optimizations be a separate pass? 538 539 // Fold boolean tests into blocks 540 (NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no) 541 (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no) 542 (NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no) 543 (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no) 544 (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no) 545 (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no) 546 (NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no) 547 (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no) 548 (NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no) 549 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no) 550 (NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no) 551 552 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded 553 // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag 554 // into tests for carry flags. 555 // ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis 556 // mutandis, for UGE and SETAE, and CC and SETCC. 557 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTL x y)) 558 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTQ x y)) 559 ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl 560 -> ((ULT|UGE) (BTLconst [log2uint32(c)] x)) 561 ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) && !config.nacl 562 -> ((ULT|UGE) (BTQconst [log2(c)] x)) 563 ((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) && !config.nacl 564 -> ((ULT|UGE) (BTQconst [log2(c)] x)) 565 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> (SET(B|AE) (BTL x y)) 566 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> (SET(B|AE) (BTQ x y)) 567 (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl 568 -> (SET(B|AE) (BTLconst [log2uint32(c)] x)) 569 (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) && !config.nacl 570 -> (SET(B|AE) (BTQconst [log2(c)] x)) 571 (SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) && !config.nacl 572 -> (SET(B|AE) (BTQconst [log2(c)] x)) 573 // SET..store variant 574 (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) && !config.nacl 575 -> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) 576 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) && !config.nacl 577 -> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) 578 (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(c) && !config.nacl 579 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem) 580 (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(c) && !config.nacl 581 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem) 582 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) && !config.nacl 583 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem) 584 585 // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules 586 // and further combining shifts. 587 (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x) 588 (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d -> (BT(Q|L)const [c-d] x) 589 (BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x) 590 (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x) 591 (BTLconst [c] (SHLLconst [d] x)) && c>d -> (BTLconst [c-d] x) 592 (BTLconst [0] s:(SHRL x y)) -> (BTL y x) 593 594 // Rewrite a & 1 != 1 into a & 1 == 0. 595 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test. 596 (SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) -> (SET(EQ|NE) (CMPLconst [0] s)) 597 (SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem) 598 (SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) -> (SET(EQ|NE) (CMPQconst [0] s)) 599 (SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem) 600 601 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b) 602 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTS(Q|L) x y) 603 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTC(Q|L) x y) 604 605 // Convert ORconst into BTS, if the code gets smaller, with boundary being 606 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). 607 ((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 608 -> (BT(S|C)Qconst [log2(c)] x) 609 ((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 610 -> (BT(S|C)Lconst [log2uint32(c)] x) 611 ((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 612 -> (BT(S|C)Qconst [log2(c)] x) 613 ((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 614 -> (BT(S|C)Lconst [log2uint32(c)] x) 615 616 // Recognize bit clearing: a &^= 1<<b 617 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) && !config.nacl -> (BTR(Q|L) x y) 618 (ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 619 -> (BTRQconst [log2(^c)] x) 620 (ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 621 -> (BTRLconst [log2uint32(^c)] x) 622 (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 623 -> (BTRQconst [log2(^c)] x) 624 (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 625 -> (BTRLconst [log2uint32(^c)] x) 626 627 // Special-case bit patterns on first/last bit. 628 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, 629 // for instance: 630 // x & 0xFFFF0000 -> (x >> 16) << 16 631 // x & 0x80000000 -> (x >> 31) << 31 632 // 633 // In case the mask is just one bit (like second example above), it conflicts 634 // with the above rules to detect bit-testing / bit-clearing of first/last bit. 635 // We thus special-case them, by detecting the shift patterns. 636 637 // Special case resetting first/last bit 638 (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) && !config.nacl 639 -> (BTR(L|Q)const [0] x) 640 (SHRLconst [1] (SHLLconst [1] x)) && !config.nacl 641 -> (BTRLconst [31] x) 642 (SHRQconst [1] (SHLQconst [1] x)) && !config.nacl 643 -> (BTRQconst [63] x) 644 645 // Special case testing first/last bit (with double-shift generated by generic.rules) 646 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 && !config.nacl 647 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 648 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 && !config.nacl 649 -> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x)) 650 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 && !config.nacl 651 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 652 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 && !config.nacl 653 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 654 655 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 && !config.nacl 656 -> ((SETB|SETAE|ULT|UGE) (BTQconst [0] x)) 657 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 && !config.nacl 658 -> ((SETB|SETAE|ULT|UGE) (BTLconst [0] x)) 659 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 && !config.nacl 660 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem) 661 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 && !config.nacl 662 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem) 663 664 // Special-case manually testing last bit with "a>>63 != 0" (without "&1") 665 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 && !config.nacl 666 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 667 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 && !config.nacl 668 -> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x)) 669 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 && !config.nacl 670 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 671 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 && !config.nacl 672 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 673 674 // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) 675 (BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x) 676 (BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x) 677 (BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x) 678 (BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x) 679 680 // Fold boolean negation into SETcc. 681 (XORLconst [1] (SETNE x)) -> (SETEQ x) 682 (XORLconst [1] (SETEQ x)) -> (SETNE x) 683 (XORLconst [1] (SETL x)) -> (SETGE x) 684 (XORLconst [1] (SETGE x)) -> (SETL x) 685 (XORLconst [1] (SETLE x)) -> (SETG x) 686 (XORLconst [1] (SETG x)) -> (SETLE x) 687 (XORLconst [1] (SETB x)) -> (SETAE x) 688 (XORLconst [1] (SETAE x)) -> (SETB x) 689 (XORLconst [1] (SETBE x)) -> (SETA x) 690 (XORLconst [1] (SETA x)) -> (SETBE x) 691 692 // Special case for floating point - LF/LEF not generated 693 (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no) 694 (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no) 695 (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no) 696 (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no) 697 698 // Disabled because it interferes with the pattern match above and makes worse code. 699 // (SETNEF x) -> (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x)) 700 // (SETEQF x) -> (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x)) 701 702 // fold constants into instructions 703 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x) 704 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x) 705 706 (SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c]) 707 (SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c])) 708 (SUBL x (MOVLconst [c])) -> (SUBLconst x [c]) 709 (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c])) 710 711 (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x) 712 (MULL x (MOVLconst [c])) -> (MULLconst [c] x) 713 714 (ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x) 715 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) 716 717 (AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x) 718 (BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ (1<<uint32(c))] x) 719 (AND(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [c &^ (1<<uint32(d))] x) 720 (BTR(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [^(1<<uint32(c) | 1<<uint32(d))] x) 721 (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x) 722 (BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1<<uint32(c)] x) 723 (XOR(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ 1<<uint32(d)] x) 724 (BTC(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [1<<uint32(c) ^ 1<<uint32(d)] x) 725 (OR(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [c | d] x) 726 (OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1<<uint32(d)] x) 727 (BTS(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [d | 1<<uint32(c)] x) 728 (BTS(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [1<<uint32(d) | 1<<uint32(c)] x) 729 730 (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x) 731 (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x) 732 733 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x) 734 (ORL x (MOVLconst [c])) -> (ORLconst [c] x) 735 736 (XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x) 737 (XORL x (MOVLconst [c])) -> (XORLconst [c] x) 738 739 (SHLQ x (MOV(Q|L)const [c])) -> (SHLQconst [c&63] x) 740 (SHLL x (MOV(Q|L)const [c])) -> (SHLLconst [c&31] x) 741 742 (SHRQ x (MOV(Q|L)const [c])) -> (SHRQconst [c&63] x) 743 (SHRL x (MOV(Q|L)const [c])) -> (SHRLconst [c&31] x) 744 (SHRW x (MOV(Q|L)const [c])) && c&31 < 16 -> (SHRWconst [c&31] x) 745 (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 -> (MOVLconst [0]) 746 (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 -> (SHRBconst [c&31] x) 747 (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 -> (MOVLconst [0]) 748 749 (SARQ x (MOV(Q|L)const [c])) -> (SARQconst [c&63] x) 750 (SARL x (MOV(Q|L)const [c])) -> (SARLconst [c&31] x) 751 (SARW x (MOV(Q|L)const [c])) -> (SARWconst [min(c&31,15)] x) 752 (SARB x (MOV(Q|L)const [c])) -> (SARBconst [min(c&31,7)] x) 753 754 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs. 755 ((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y) 756 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 757 ((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y) 758 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 759 760 ((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y) 761 ((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 762 ((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y) 763 ((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 764 765 ((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y) 766 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 767 ((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y) 768 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 769 770 ((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y) 771 ((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGL <t> y)) 772 ((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y) 773 ((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGL <t> y)) 774 775 // Constant rotate instructions 776 ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c]) 777 ((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c]) 778 779 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c]) 780 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c && c < 8 && t.Size() == 1 -> (ROLBconst x [c]) 781 782 (ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x) 783 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x) 784 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x) 785 (ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x) 786 787 (RotateLeft8 a b) -> (ROLB a b) 788 (RotateLeft16 a b) -> (ROLW a b) 789 (RotateLeft32 a b) -> (ROLL a b) 790 (RotateLeft64 a b) -> (ROLQ a b) 791 792 // Non-constant rotates. 793 // We want to issue a rotate when the Go source contains code like 794 // y &= 63 795 // x << y | x >> (64-y) 796 // The shift rules above convert << to SHLx and >> to SHRx. 797 // SHRx converts its shift argument from 64-y to -y. 798 // A tricky situation occurs when y==0. Then the original code would be: 799 // x << 0 | x >> 64 800 // But x >> 64 is 0, not x. So there's an additional mask that is ANDed in 801 // to force the second term to 0. We don't need that mask, but we must match 802 // it in order to strip it out. 803 (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (ROLQ x y) 804 (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (RORQ x y) 805 806 (ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (ROLL x y) 807 (ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (RORL x y) 808 809 // Help with rotate detection 810 (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) -> (FlagLT_ULT) 811 (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32]) -> (FlagLT_ULT) 812 813 (ORL (SHLL x (AND(Q|L)const y [15])) 814 (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))) 815 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16])))) 816 && v.Type.Size() == 2 817 -> (ROLW x y) 818 (ORL (SHRW x (AND(Q|L)const y [15])) 819 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))) 820 && v.Type.Size() == 2 821 -> (RORW x y) 822 823 (ORL (SHLL x (AND(Q|L)const y [ 7])) 824 (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))) 825 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8])))) 826 && v.Type.Size() == 1 827 -> (ROLB x y) 828 (ORL (SHRB x (AND(Q|L)const y [ 7])) 829 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))) 830 && v.Type.Size() == 1 831 -> (RORB x y) 832 833 // rotate left negative = rotate right 834 (ROLQ x (NEG(Q|L) y)) -> (RORQ x y) 835 (ROLL x (NEG(Q|L) y)) -> (RORL x y) 836 (ROLW x (NEG(Q|L) y)) -> (RORW x y) 837 (ROLB x (NEG(Q|L) y)) -> (RORB x y) 838 839 // rotate right negative = rotate left 840 (RORQ x (NEG(Q|L) y)) -> (ROLQ x y) 841 (RORL x (NEG(Q|L) y)) -> (ROLL x y) 842 (RORW x (NEG(Q|L) y)) -> (ROLW x y) 843 (RORB x (NEG(Q|L) y)) -> (ROLB x y) 844 845 // rotate by constants 846 (ROLQ x (MOV(Q|L)const [c])) -> (ROLQconst [c&63] x) 847 (ROLL x (MOV(Q|L)const [c])) -> (ROLLconst [c&31] x) 848 (ROLW x (MOV(Q|L)const [c])) -> (ROLWconst [c&15] x) 849 (ROLB x (MOV(Q|L)const [c])) -> (ROLBconst [c&7 ] x) 850 851 (RORQ x (MOV(Q|L)const [c])) -> (ROLQconst [(-c)&63] x) 852 (RORL x (MOV(Q|L)const [c])) -> (ROLLconst [(-c)&31] x) 853 (RORW x (MOV(Q|L)const [c])) -> (ROLWconst [(-c)&15] x) 854 (RORB x (MOV(Q|L)const [c])) -> (ROLBconst [(-c)&7 ] x) 855 856 // Constant shift simplifications 857 ((SHLQ|SHRQ|SARQ)const x [0]) -> x 858 ((SHLL|SHRL|SARL)const x [0]) -> x 859 ((SHRW|SARW)const x [0]) -> x 860 ((SHRB|SARB)const x [0]) -> x 861 ((ROLQ|ROLL|ROLW|ROLB)const x [0]) -> x 862 863 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) 864 // because the x86 instructions are defined to use all 5 bits of the shift even 865 // for the small shifts. I don't think we'll ever generate a weird shift (e.g. 866 // (SHRW x (MOVLconst [24])), but just in case. 867 868 (CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c]) 869 (CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c])) 870 (CMPL x (MOVLconst [c])) -> (CMPLconst x [c]) 871 (CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c])) 872 (CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))]) 873 (CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))])) 874 (CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))]) 875 (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))])) 876 877 // Using MOVZX instead of AND is cheaper. 878 (AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x) 879 (AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x) 880 (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x) 881 882 // strength reduction 883 // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: 884 // 1 - addq, shlq, leaq, negq, subq 885 // 3 - imulq 886 // This limits the rewrites to two instructions. 887 // Note that negq always operates in-place, 888 // which can require a register-register move 889 // to preserve the original value, 890 // so it must be used with care. 891 (MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x)) 892 (MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x)) 893 (MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x)) 894 (MUL(Q|L)const [-1] x) -> (NEG(Q|L) x) 895 (MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0]) 896 (MUL(Q|L)const [ 1] x) -> x 897 (MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x) 898 (MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x) 899 (MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x)) 900 (MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x) 901 (MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x)) 902 (MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x)) 903 (MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x)) 904 (MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x)) 905 (MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x)) 906 (MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x)) 907 (MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x)) 908 (MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x)) 909 (MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x)) 910 (MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) 911 (MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) 912 913 (MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x) 914 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x) 915 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x) 916 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x) 917 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x) 918 (MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x)) 919 (MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x)) 920 (MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x)) 921 922 // combine add/shift into LEAQ/LEAL 923 (ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y) 924 (ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y) 925 (ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y) 926 (ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y) 927 (ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x) 928 929 // combine ADDQ/ADDQconst into LEAQ1/LEAL1 930 (ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y) 931 (ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y) 932 (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x) 933 934 // fold ADDQ/ADDL into LEAQ/LEAL 935 (ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) 936 (LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) 937 (LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) 938 (ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) 939 940 // fold ADDQconst/ADDLconst into LEAQx/LEALx 941 (ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y) 942 (ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y) 943 (ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y) 944 (ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y) 945 (LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y) 946 (LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y) 947 (LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y) 948 (LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y) 949 (LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y) 950 (LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y) 951 (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y) 952 953 // fold shifts into LEAQx/LEALx 954 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y) 955 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y) 956 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y) 957 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y) 958 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y) 959 (LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y) 960 961 // reverse ordering of compare instruction 962 (SETL (InvertFlags x)) -> (SETG x) 963 (SETG (InvertFlags x)) -> (SETL x) 964 (SETB (InvertFlags x)) -> (SETA x) 965 (SETA (InvertFlags x)) -> (SETB x) 966 (SETLE (InvertFlags x)) -> (SETGE x) 967 (SETGE (InvertFlags x)) -> (SETLE x) 968 (SETBE (InvertFlags x)) -> (SETAE x) 969 (SETAE (InvertFlags x)) -> (SETBE x) 970 (SETEQ (InvertFlags x)) -> (SETEQ x) 971 (SETNE (InvertFlags x)) -> (SETNE x) 972 973 (SETLstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGstore [off] {sym} ptr x mem) 974 (SETGstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLstore [off] {sym} ptr x mem) 975 (SETBstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAstore [off] {sym} ptr x mem) 976 (SETAstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBstore [off] {sym} ptr x mem) 977 (SETLEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGEstore [off] {sym} ptr x mem) 978 (SETGEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLEstore [off] {sym} ptr x mem) 979 (SETBEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAEstore [off] {sym} ptr x mem) 980 (SETAEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBEstore [off] {sym} ptr x mem) 981 (SETEQstore [off] {sym} ptr (InvertFlags x) mem) -> (SETEQstore [off] {sym} ptr x mem) 982 (SETNEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETNEstore [off] {sym} ptr x mem) 983 984 // sign extended loads 985 // Note: The combined instruction must end up in the same block 986 // as the original load. If not, we end up making a value with 987 // memory type live in two different blocks, which can lead to 988 // multiple memory values alive simultaneously. 989 // Make sure we don't combine these ops if the load has another use. 990 // This prevents a single load from being split into multiple loads 991 // which then might return different values. See test/atomicload.go. 992 (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 993 (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 994 (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 995 (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 996 (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 997 (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 998 (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 999 (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1000 (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1001 (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1002 (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1003 (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1004 (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1005 (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1006 (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1007 (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1008 (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1009 (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1010 1011 (MOVLQZX x) && zeroUpper32Bits(x,3) -> x 1012 (MOVWQZX x) && zeroUpper48Bits(x,3) -> x 1013 (MOVBQZX x) && zeroUpper56Bits(x,3) -> x 1014 1015 (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem) 1016 (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem) 1017 (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem) 1018 (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem) 1019 (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem) 1020 1021 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) 1022 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x) 1023 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x) 1024 (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQZX x) 1025 (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x 1026 (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQSX x) 1027 (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQSX x) 1028 (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQSX x) 1029 1030 // Fold extensions and ANDs together. 1031 (MOVBQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x) 1032 (MOVWQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x) 1033 (MOVLQZX (ANDLconst [c] x)) -> (ANDLconst [c] x) 1034 (MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x) 1035 (MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x) 1036 (MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDLconst [c & 0x7fffffff] x) 1037 1038 // Don't extend before storing 1039 (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem) 1040 (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) -> (MOVWstore [off] {sym} ptr x mem) 1041 (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) -> (MOVBstore [off] {sym} ptr x mem) 1042 (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) -> (MOVLstore [off] {sym} ptr x mem) 1043 (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) -> (MOVWstore [off] {sym} ptr x mem) 1044 (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) -> (MOVBstore [off] {sym} ptr x mem) 1045 1046 // fold constants into memory operations 1047 // Note that this is not always a good idea because if not all the uses of 1048 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now 1049 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. 1050 // Nevertheless, let's do it! 1051 (MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 1052 (MOV(Q|L|W|B|SS|SD|O)load [off1+off2] {sym} ptr mem) 1053 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(off1+off2) -> 1054 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {sym} ptr val mem) 1055 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1056 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem) 1057 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1058 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem) 1059 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1060 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) 1061 (CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1062 (CMP(Q|L|W|B)load [off1+off2] {sym} base val mem) 1063 (CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1064 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {sym} base mem) 1065 1066 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1067 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) 1068 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1069 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) 1070 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1071 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) 1072 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1073 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) 1074 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1075 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem) 1076 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1077 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem) 1078 1079 // Fold constants into stores. 1080 (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> 1081 (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) 1082 (MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1083 (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) 1084 (MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1085 (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) 1086 (MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1087 (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) 1088 1089 // Fold address offsets into constant stores. 1090 (MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 1091 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem) 1092 1093 // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows 1094 // what variables are being read/written by the ops. 1095 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem) 1096 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1097 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem) 1098 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1099 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1100 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1101 (MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 1102 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 1103 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1104 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1105 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1106 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1107 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1108 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1109 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1110 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1111 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1112 (CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1113 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1114 (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1115 (CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1116 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1117 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1118 1119 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1120 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1121 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1122 ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1123 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1124 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1125 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1126 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1127 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1128 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1129 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1130 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1131 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1132 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1133 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1134 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1135 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1136 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1137 1138 // generating indexed loads and stores 1139 (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1140 (MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1141 (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1142 (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1143 (MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1144 (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1145 (MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1146 (MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1147 1148 (MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1149 (MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1150 (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1151 (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1152 (MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1153 (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1154 (MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1155 (MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1156 1157 (MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB -> 1158 (MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem) 1159 (MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB -> 1160 (MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem) 1161 1162 (MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1163 (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1164 (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1165 (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1166 (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1167 (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1168 (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1169 (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1170 1171 (MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem) 1172 1173 // combine SHLQ into indexed loads and stores 1174 (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem) 1175 (MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem) 1176 (MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem) 1177 1178 (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem) 1179 (MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem) 1180 (MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem) 1181 (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem) 1182 (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem) 1183 (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem) 1184 1185 // combine ADDQ into pointer of indexed loads and stores 1186 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) 1187 (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem) 1188 (MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem) 1189 (MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem) 1190 1191 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) 1192 (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem) 1193 (MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem) 1194 (MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem) 1195 1196 1197 // combine ADDQ into index of indexed loads and stores 1198 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) 1199 (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem) 1200 (MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem) 1201 (MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem) 1202 1203 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) 1204 (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem) 1205 (MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem) 1206 (MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem) 1207 1208 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1209 (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1210 (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1211 (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1212 1213 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1214 (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) 1215 (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) 1216 (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem) 1217 1218 // fold LEAQs together 1219 (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1220 (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) 1221 1222 // LEAQ into LEAQ1 1223 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1224 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1225 1226 // LEAQ1 into LEAQ 1227 (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1228 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1229 1230 // LEAQ into LEAQ[248] 1231 (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1232 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1233 (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1234 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1235 (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1236 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1237 1238 // LEAQ[248] into LEAQ 1239 (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1240 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1241 (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1242 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1243 (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1244 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1245 1246 // Absorb InvertFlags into branches. 1247 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no) 1248 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no) 1249 (LE (InvertFlags cmp) yes no) -> (GE cmp yes no) 1250 (GE (InvertFlags cmp) yes no) -> (LE cmp yes no) 1251 (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no) 1252 (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no) 1253 (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no) 1254 (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no) 1255 (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no) 1256 (NE (InvertFlags cmp) yes no) -> (NE cmp yes no) 1257 1258 // Constant comparisons. 1259 (CMPQconst (MOVQconst [x]) [y]) && x==y -> (FlagEQ) 1260 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT) 1261 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT) 1262 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT) 1263 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT) 1264 (CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ) 1265 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT) 1266 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT) 1267 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT) 1268 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT) 1269 (CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ) 1270 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT) 1271 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT) 1272 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT) 1273 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT) 1274 (CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ) 1275 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT) 1276 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT) 1277 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT) 1278 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) 1279 1280 // Other known comparisons. 1281 (CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT) 1282 (CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT) 1283 (CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT) 1284 (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT) 1285 (CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT) 1286 (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) 1287 (CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) 1288 (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT) 1289 (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) 1290 (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT) 1291 1292 // TODO: DIVxU also. 1293 1294 // Absorb flag constants into SBB ops. 1295 (SBBQcarrymask (FlagEQ)) -> (MOVQconst [0]) 1296 (SBBQcarrymask (FlagLT_ULT)) -> (MOVQconst [-1]) 1297 (SBBQcarrymask (FlagLT_UGT)) -> (MOVQconst [0]) 1298 (SBBQcarrymask (FlagGT_ULT)) -> (MOVQconst [-1]) 1299 (SBBQcarrymask (FlagGT_UGT)) -> (MOVQconst [0]) 1300 (SBBLcarrymask (FlagEQ)) -> (MOVLconst [0]) 1301 (SBBLcarrymask (FlagLT_ULT)) -> (MOVLconst [-1]) 1302 (SBBLcarrymask (FlagLT_UGT)) -> (MOVLconst [0]) 1303 (SBBLcarrymask (FlagGT_ULT)) -> (MOVLconst [-1]) 1304 (SBBLcarrymask (FlagGT_UGT)) -> (MOVLconst [0]) 1305 1306 // Absorb flag constants into branches. 1307 ((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) -> (First nil yes no) 1308 ((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) -> (First nil no yes) 1309 ((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) -> (First nil yes no) 1310 ((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) -> (First nil no yes) 1311 ((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) -> (First nil yes no) 1312 ((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) -> (First nil no yes) 1313 ((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) -> (First nil yes no) 1314 ((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) -> (First nil no yes) 1315 ((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) -> (First nil yes no) 1316 ((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) -> (First nil no yes) 1317 1318 // Absorb flag constants into SETxx ops. 1319 ((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) -> (MOVLconst [1]) 1320 ((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) -> (MOVLconst [0]) 1321 ((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) -> (MOVLconst [1]) 1322 ((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) -> (MOVLconst [0]) 1323 ((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) -> (MOVLconst [1]) 1324 ((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) -> (MOVLconst [0]) 1325 ((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) -> (MOVLconst [1]) 1326 ((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) -> (MOVLconst [0]) 1327 ((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) -> (MOVLconst [1]) 1328 ((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) -> (MOVLconst [0]) 1329 1330 (SETEQstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1331 (SETEQstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1332 (SETEQstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1333 (SETEQstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1334 (SETEQstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1335 1336 (SETNEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1337 (SETNEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1338 (SETNEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1339 (SETNEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1340 (SETNEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1341 1342 (SETLstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1343 (SETLstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1344 (SETLstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1345 (SETLstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1346 (SETLstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1347 1348 (SETLEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1349 (SETLEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1350 (SETLEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1351 (SETLEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1352 (SETLEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1353 1354 (SETGstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1355 (SETGstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1356 (SETGstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1357 (SETGstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1358 (SETGstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1359 1360 (SETGEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1361 (SETGEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1362 (SETGEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1363 (SETGEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1364 (SETGEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1365 1366 (SETBstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1367 (SETBstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1368 (SETBstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1369 (SETBstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1370 (SETBstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1371 1372 (SETBEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1373 (SETBEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1374 (SETBEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1375 (SETBEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1376 (SETBEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1377 1378 (SETAstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1379 (SETAstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1380 (SETAstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1381 (SETAstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1382 (SETAstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1383 1384 (SETAEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1385 (SETAEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1386 (SETAEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1387 (SETAEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1388 (SETAEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1389 1390 // Remove redundant *const ops 1391 (ADDQconst [0] x) -> x 1392 (ADDLconst [c] x) && int32(c)==0 -> x 1393 (SUBQconst [0] x) -> x 1394 (SUBLconst [c] x) && int32(c) == 0 -> x 1395 (ANDQconst [0] _) -> (MOVQconst [0]) 1396 (ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0]) 1397 (ANDQconst [-1] x) -> x 1398 (ANDLconst [c] x) && int32(c)==-1 -> x 1399 (ORQconst [0] x) -> x 1400 (ORLconst [c] x) && int32(c)==0 -> x 1401 (ORQconst [-1] _) -> (MOVQconst [-1]) 1402 (ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1]) 1403 (XORQconst [0] x) -> x 1404 (XORLconst [c] x) && int32(c)==0 -> x 1405 // TODO: since we got rid of the W/B versions, we might miss 1406 // things like (ANDLconst [0x100] x) which were formerly 1407 // (ANDBconst [0] x). Probably doesn't happen very often. 1408 // If we cared, we might do: 1409 // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0]) 1410 1411 // Remove redundant ops 1412 // Not in generic rules, because they may appear after lowering e. g. Slicemask 1413 (NEG(Q|L) (NEG(Q|L) x)) -> x 1414 1415 // Convert constant subtracts to constant adds 1416 (SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x) 1417 (SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x) 1418 1419 // generic constant folding 1420 // TODO: more of this 1421 (ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d]) 1422 (ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c+d))]) 1423 (ADDQconst [c] (ADDQconst [d] x)) && is32Bit(c+d) -> (ADDQconst [c+d] x) 1424 (ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [int64(int32(c+d))] x) 1425 (SUBQconst (MOVQconst [d]) [c]) -> (MOVQconst [d-c]) 1426 (SUBQconst (SUBQconst x [d]) [c]) && is32Bit(-c-d) -> (ADDQconst [-c-d] x) 1427 (SARQconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) 1428 (SARLconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int32(d))>>uint64(c)]) 1429 (SARWconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int16(d))>>uint64(c)]) 1430 (SARBconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int8(d))>>uint64(c)]) 1431 (NEGQ (MOVQconst [c])) -> (MOVQconst [-c]) 1432 (NEGL (MOVLconst [c])) -> (MOVLconst [int64(int32(-c))]) 1433 (MULQconst [c] (MOVQconst [d])) -> (MOVQconst [c*d]) 1434 (MULLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c*d))]) 1435 (ANDQconst [c] (MOVQconst [d])) -> (MOVQconst [c&d]) 1436 (ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d]) 1437 (ORQconst [c] (MOVQconst [d])) -> (MOVQconst [c|d]) 1438 (ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d]) 1439 (XORQconst [c] (MOVQconst [d])) -> (MOVQconst [c^d]) 1440 (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) 1441 (NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) 1442 (NOTL (MOVLconst [c])) -> (MOVLconst [^c]) 1443 (BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))]) 1444 (BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))]) 1445 (BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))]) 1446 (BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))]) 1447 (BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))]) 1448 (BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))]) 1449 1450 // generic simplifications 1451 // TODO: more of this 1452 (ADDQ x (NEGQ y)) -> (SUBQ x y) 1453 (ADDL x (NEGL y)) -> (SUBL x y) 1454 (SUBQ x x) -> (MOVQconst [0]) 1455 (SUBL x x) -> (MOVLconst [0]) 1456 (ANDQ x x) -> x 1457 (ANDL x x) -> x 1458 (ORQ x x) -> x 1459 (ORL x x) -> x 1460 (XORQ x x) -> (MOVQconst [0]) 1461 (XORL x x) -> (MOVLconst [0]) 1462 (NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) -> (ADDQconst [-c] x) 1463 1464 // checking AND against 0. 1465 (CMPQconst (ANDQ x y) [0]) -> (TESTQ x y) 1466 (CMPLconst (ANDL x y) [0]) -> (TESTL x y) 1467 (CMPWconst (ANDL x y) [0]) -> (TESTW x y) 1468 (CMPBconst (ANDL x y) [0]) -> (TESTB x y) 1469 (CMPQconst (ANDQconst [c] x) [0]) -> (TESTQconst [c] x) 1470 (CMPLconst (ANDLconst [c] x) [0]) -> (TESTLconst [c] x) 1471 (CMPWconst (ANDLconst [c] x) [0]) -> (TESTWconst [int64(int16(c))] x) 1472 (CMPBconst (ANDLconst [c] x) [0]) -> (TESTBconst [int64(int8(c))] x) 1473 1474 // Convert TESTx to TESTxconst if possible. 1475 (TESTQ (MOVQconst [c]) x) && is32Bit(c) -> (TESTQconst [c] x) 1476 (TESTL (MOVLconst [c]) x) -> (TESTLconst [c] x) 1477 (TESTW (MOVLconst [c]) x) -> (TESTWconst [c] x) 1478 (TESTB (MOVLconst [c]) x) -> (TESTBconst [c] x) 1479 1480 // TEST %reg,%reg is shorter than CMP 1481 (CMPQconst x [0]) -> (TESTQ x x) 1482 (CMPLconst x [0]) -> (TESTL x x) 1483 (CMPWconst x [0]) -> (TESTW x x) 1484 (CMPBconst x [0]) -> (TESTB x x) 1485 (TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst -> (TESTQ x x) 1486 (TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTL x x) 1487 (TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTW x x) 1488 (TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTB x x) 1489 1490 // Combining byte loads into larger (unaligned) loads. 1491 // There are many ways these combinations could occur. This is 1492 // designed to match the way encoding/binary.LittleEndian does it. 1493 1494 // Little-endian loads 1495 1496 (ORL x0:(MOVBload [i0] {s} p mem) 1497 sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) 1498 && i1 == i0+1 1499 && x0.Uses == 1 1500 && x1.Uses == 1 1501 && sh.Uses == 1 1502 && mergePoint(b,x0,x1) != nil 1503 && clobber(x0) 1504 && clobber(x1) 1505 && clobber(sh) 1506 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) 1507 1508 (ORQ x0:(MOVBload [i0] {s} p mem) 1509 sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem))) 1510 && i1 == i0+1 1511 && x0.Uses == 1 1512 && x1.Uses == 1 1513 && sh.Uses == 1 1514 && mergePoint(b,x0,x1) != nil 1515 && clobber(x0) 1516 && clobber(x1) 1517 && clobber(sh) 1518 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) 1519 1520 (ORL x0:(MOVWload [i0] {s} p mem) 1521 sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem))) 1522 && i1 == i0+2 1523 && x0.Uses == 1 1524 && x1.Uses == 1 1525 && sh.Uses == 1 1526 && mergePoint(b,x0,x1) != nil 1527 && clobber(x0) 1528 && clobber(x1) 1529 && clobber(sh) 1530 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) 1531 1532 (ORQ x0:(MOVWload [i0] {s} p mem) 1533 sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem))) 1534 && i1 == i0+2 1535 && x0.Uses == 1 1536 && x1.Uses == 1 1537 && sh.Uses == 1 1538 && mergePoint(b,x0,x1) != nil 1539 && clobber(x0) 1540 && clobber(x1) 1541 && clobber(sh) 1542 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) 1543 1544 (ORQ x0:(MOVLload [i0] {s} p mem) 1545 sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) 1546 && i1 == i0+4 1547 && x0.Uses == 1 1548 && x1.Uses == 1 1549 && sh.Uses == 1 1550 && mergePoint(b,x0,x1) != nil 1551 && clobber(x0) 1552 && clobber(x1) 1553 && clobber(sh) 1554 -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) 1555 1556 (ORL 1557 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) 1558 or:(ORL 1559 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) 1560 y)) 1561 && i1 == i0+1 1562 && j1 == j0+8 1563 && j0 % 16 == 0 1564 && x0.Uses == 1 1565 && x1.Uses == 1 1566 && s0.Uses == 1 1567 && s1.Uses == 1 1568 && or.Uses == 1 1569 && mergePoint(b,x0,x1) != nil 1570 && clobber(x0) 1571 && clobber(x1) 1572 && clobber(s0) 1573 && clobber(s1) 1574 && clobber(or) 1575 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) 1576 1577 (ORQ 1578 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) 1579 or:(ORQ 1580 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) 1581 y)) 1582 && i1 == i0+1 1583 && j1 == j0+8 1584 && j0 % 16 == 0 1585 && x0.Uses == 1 1586 && x1.Uses == 1 1587 && s0.Uses == 1 1588 && s1.Uses == 1 1589 && or.Uses == 1 1590 && mergePoint(b,x0,x1) != nil 1591 && clobber(x0) 1592 && clobber(x1) 1593 && clobber(s0) 1594 && clobber(s1) 1595 && clobber(or) 1596 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) 1597 1598 (ORQ 1599 s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) 1600 or:(ORQ 1601 s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) 1602 y)) 1603 && i1 == i0+2 1604 && j1 == j0+16 1605 && j0 % 32 == 0 1606 && x0.Uses == 1 1607 && x1.Uses == 1 1608 && s0.Uses == 1 1609 && s1.Uses == 1 1610 && or.Uses == 1 1611 && mergePoint(b,x0,x1) != nil 1612 && clobber(x0) 1613 && clobber(x1) 1614 && clobber(s0) 1615 && clobber(s1) 1616 && clobber(or) 1617 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) 1618 1619 // Little-endian indexed loads 1620 1621 (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) 1622 sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) 1623 && i1 == i0+1 1624 && x0.Uses == 1 1625 && x1.Uses == 1 1626 && sh.Uses == 1 1627 && mergePoint(b,x0,x1) != nil 1628 && clobber(x0) 1629 && clobber(x1) 1630 && clobber(sh) 1631 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem) 1632 1633 (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) 1634 sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) 1635 && i1 == i0+1 1636 && x0.Uses == 1 1637 && x1.Uses == 1 1638 && sh.Uses == 1 1639 && mergePoint(b,x0,x1) != nil 1640 && clobber(x0) 1641 && clobber(x1) 1642 && clobber(sh) 1643 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem) 1644 1645 (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) 1646 sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 1647 && i1 == i0+2 1648 && x0.Uses == 1 1649 && x1.Uses == 1 1650 && sh.Uses == 1 1651 && mergePoint(b,x0,x1) != nil 1652 && clobber(x0) 1653 && clobber(x1) 1654 && clobber(sh) 1655 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) 1656 1657 (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) 1658 sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 1659 && i1 == i0+2 1660 && x0.Uses == 1 1661 && x1.Uses == 1 1662 && sh.Uses == 1 1663 && mergePoint(b,x0,x1) != nil 1664 && clobber(x0) 1665 && clobber(x1) 1666 && clobber(sh) 1667 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) 1668 1669 (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) 1670 sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem))) 1671 && i1 == i0+4 1672 && x0.Uses == 1 1673 && x1.Uses == 1 1674 && sh.Uses == 1 1675 && mergePoint(b,x0,x1) != nil 1676 && clobber(x0) 1677 && clobber(x1) 1678 && clobber(sh) 1679 -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem) 1680 1681 (ORL 1682 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1683 or:(ORL 1684 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1685 y)) 1686 && i1 == i0+1 1687 && j1 == j0+8 1688 && j0 % 16 == 0 1689 && x0.Uses == 1 1690 && x1.Uses == 1 1691 && s0.Uses == 1 1692 && s1.Uses == 1 1693 && or.Uses == 1 1694 && mergePoint(b,x0,x1) != nil 1695 && clobber(x0) 1696 && clobber(x1) 1697 && clobber(s0) 1698 && clobber(s1) 1699 && clobber(or) 1700 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) 1701 1702 (ORQ 1703 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1704 or:(ORQ 1705 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1706 y)) 1707 && i1 == i0+1 1708 && j1 == j0+8 1709 && j0 % 16 == 0 1710 && x0.Uses == 1 1711 && x1.Uses == 1 1712 && s0.Uses == 1 1713 && s1.Uses == 1 1714 && or.Uses == 1 1715 && mergePoint(b,x0,x1) != nil 1716 && clobber(x0) 1717 && clobber(x1) 1718 && clobber(s0) 1719 && clobber(s1) 1720 && clobber(or) 1721 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) 1722 1723 (ORQ 1724 s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1725 or:(ORQ 1726 s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) 1727 y)) 1728 && i1 == i0+2 1729 && j1 == j0+16 1730 && j0 % 32 == 0 1731 && x0.Uses == 1 1732 && x1.Uses == 1 1733 && s0.Uses == 1 1734 && s1.Uses == 1 1735 && or.Uses == 1 1736 && mergePoint(b,x0,x1) != nil 1737 && clobber(x0) 1738 && clobber(x1) 1739 && clobber(s0) 1740 && clobber(s1) 1741 && clobber(or) 1742 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y) 1743 1744 // Big-endian loads 1745 1746 (ORL 1747 x1:(MOVBload [i1] {s} p mem) 1748 sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem))) 1749 && i1 == i0+1 1750 && x0.Uses == 1 1751 && x1.Uses == 1 1752 && sh.Uses == 1 1753 && mergePoint(b,x0,x1) != nil 1754 && clobber(x0) 1755 && clobber(x1) 1756 && clobber(sh) 1757 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) 1758 1759 (ORQ 1760 x1:(MOVBload [i1] {s} p mem) 1761 sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem))) 1762 && i1 == i0+1 1763 && x0.Uses == 1 1764 && x1.Uses == 1 1765 && sh.Uses == 1 1766 && mergePoint(b,x0,x1) != nil 1767 && clobber(x0) 1768 && clobber(x1) 1769 && clobber(sh) 1770 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) 1771 1772 (ORL 1773 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) 1774 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) 1775 && i1 == i0+2 1776 && x0.Uses == 1 1777 && x1.Uses == 1 1778 && r0.Uses == 1 1779 && r1.Uses == 1 1780 && sh.Uses == 1 1781 && mergePoint(b,x0,x1) != nil 1782 && clobber(x0) 1783 && clobber(x1) 1784 && clobber(r0) 1785 && clobber(r1) 1786 && clobber(sh) 1787 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) 1788 1789 (ORQ 1790 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) 1791 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) 1792 && i1 == i0+2 1793 && x0.Uses == 1 1794 && x1.Uses == 1 1795 && r0.Uses == 1 1796 && r1.Uses == 1 1797 && sh.Uses == 1 1798 && mergePoint(b,x0,x1) != nil 1799 && clobber(x0) 1800 && clobber(x1) 1801 && clobber(r0) 1802 && clobber(r1) 1803 && clobber(sh) 1804 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) 1805 1806 (ORQ 1807 r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) 1808 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) 1809 && i1 == i0+4 1810 && x0.Uses == 1 1811 && x1.Uses == 1 1812 && r0.Uses == 1 1813 && r1.Uses == 1 1814 && sh.Uses == 1 1815 && mergePoint(b,x0,x1) != nil 1816 && clobber(x0) 1817 && clobber(x1) 1818 && clobber(r0) 1819 && clobber(r1) 1820 && clobber(sh) 1821 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) 1822 1823 (ORL 1824 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) 1825 or:(ORL 1826 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) 1827 y)) 1828 && i1 == i0+1 1829 && j1 == j0-8 1830 && j1 % 16 == 0 1831 && x0.Uses == 1 1832 && x1.Uses == 1 1833 && s0.Uses == 1 1834 && s1.Uses == 1 1835 && or.Uses == 1 1836 && mergePoint(b,x0,x1) != nil 1837 && clobber(x0) 1838 && clobber(x1) 1839 && clobber(s0) 1840 && clobber(s1) 1841 && clobber(or) 1842 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) 1843 1844 (ORQ 1845 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) 1846 or:(ORQ 1847 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) 1848 y)) 1849 && i1 == i0+1 1850 && j1 == j0-8 1851 && j1 % 16 == 0 1852 && x0.Uses == 1 1853 && x1.Uses == 1 1854 && s0.Uses == 1 1855 && s1.Uses == 1 1856 && or.Uses == 1 1857 && mergePoint(b,x0,x1) != nil 1858 && clobber(x0) 1859 && clobber(x1) 1860 && clobber(s0) 1861 && clobber(s1) 1862 && clobber(or) 1863 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) 1864 1865 (ORQ 1866 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) 1867 or:(ORQ 1868 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) 1869 y)) 1870 && i1 == i0+2 1871 && j1 == j0-16 1872 && j1 % 32 == 0 1873 && x0.Uses == 1 1874 && x1.Uses == 1 1875 && r0.Uses == 1 1876 && r1.Uses == 1 1877 && s0.Uses == 1 1878 && s1.Uses == 1 1879 && or.Uses == 1 1880 && mergePoint(b,x0,x1) != nil 1881 && clobber(x0) 1882 && clobber(x1) 1883 && clobber(r0) 1884 && clobber(r1) 1885 && clobber(s0) 1886 && clobber(s1) 1887 && clobber(or) 1888 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) 1889 1890 // Big-endian indexed loads 1891 1892 (ORL 1893 x1:(MOVBloadidx1 [i1] {s} p idx mem) 1894 sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) 1895 && i1 == i0+1 1896 && x0.Uses == 1 1897 && x1.Uses == 1 1898 && sh.Uses == 1 1899 && mergePoint(b,x0,x1) != nil 1900 && clobber(x0) 1901 && clobber(x1) 1902 && clobber(sh) 1903 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem)) 1904 1905 (ORQ 1906 x1:(MOVBloadidx1 [i1] {s} p idx mem) 1907 sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) 1908 && i1 == i0+1 1909 && x0.Uses == 1 1910 && x1.Uses == 1 1911 && sh.Uses == 1 1912 && mergePoint(b,x0,x1) != nil 1913 && clobber(x0) 1914 && clobber(x1) 1915 && clobber(sh) 1916 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem)) 1917 1918 (ORL 1919 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1920 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) 1921 && i1 == i0+2 1922 && x0.Uses == 1 1923 && x1.Uses == 1 1924 && r0.Uses == 1 1925 && r1.Uses == 1 1926 && sh.Uses == 1 1927 && mergePoint(b,x0,x1) != nil 1928 && clobber(x0) 1929 && clobber(x1) 1930 && clobber(r0) 1931 && clobber(r1) 1932 && clobber(sh) 1933 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem)) 1934 1935 (ORQ 1936 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1937 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) 1938 && i1 == i0+2 1939 && x0.Uses == 1 1940 && x1.Uses == 1 1941 && r0.Uses == 1 1942 && r1.Uses == 1 1943 && sh.Uses == 1 1944 && mergePoint(b,x0,x1) != nil 1945 && clobber(x0) 1946 && clobber(x1) 1947 && clobber(r0) 1948 && clobber(r1) 1949 && clobber(sh) 1950 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem)) 1951 1952 (ORQ 1953 r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) 1954 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem)))) 1955 && i1 == i0+4 1956 && x0.Uses == 1 1957 && x1.Uses == 1 1958 && r0.Uses == 1 1959 && r1.Uses == 1 1960 && sh.Uses == 1 1961 && mergePoint(b,x0,x1) != nil 1962 && clobber(x0) 1963 && clobber(x1) 1964 && clobber(r0) 1965 && clobber(r1) 1966 && clobber(sh) 1967 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem)) 1968 1969 (ORL 1970 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1971 or:(ORL 1972 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1973 y)) 1974 && i1 == i0+1 1975 && j1 == j0-8 1976 && j1 % 16 == 0 1977 && x0.Uses == 1 1978 && x1.Uses == 1 1979 && s0.Uses == 1 1980 && s1.Uses == 1 1981 && or.Uses == 1 1982 && mergePoint(b,x0,x1) != nil 1983 && clobber(x0) 1984 && clobber(x1) 1985 && clobber(s0) 1986 && clobber(s1) 1987 && clobber(or) 1988 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) 1989 1990 (ORQ 1991 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1992 or:(ORQ 1993 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1994 y)) 1995 && i1 == i0+1 1996 && j1 == j0-8 1997 && j1 % 16 == 0 1998 && x0.Uses == 1 1999 && x1.Uses == 1 2000 && s0.Uses == 1 2001 && s1.Uses == 1 2002 && or.Uses == 1 2003 && mergePoint(b,x0,x1) != nil 2004 && clobber(x0) 2005 && clobber(x1) 2006 && clobber(s0) 2007 && clobber(s1) 2008 && clobber(or) 2009 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) 2010 2011 (ORQ 2012 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) 2013 or:(ORQ 2014 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 2015 y)) 2016 && i1 == i0+2 2017 && j1 == j0-16 2018 && j1 % 32 == 0 2019 && x0.Uses == 1 2020 && x1.Uses == 1 2021 && r0.Uses == 1 2022 && r1.Uses == 1 2023 && s0.Uses == 1 2024 && s1.Uses == 1 2025 && or.Uses == 1 2026 && mergePoint(b,x0,x1) != nil 2027 && clobber(x0) 2028 && clobber(x1) 2029 && clobber(r0) 2030 && clobber(r1) 2031 && clobber(s0) 2032 && clobber(s1) 2033 && clobber(or) 2034 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y) 2035 2036 // Combine 2 byte stores + shift into rolw 8 + word store 2037 (MOVBstore [i] {s} p w 2038 x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) 2039 && x0.Uses == 1 2040 && clobber(x0) 2041 -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem) 2042 2043 (MOVBstoreidx1 [i] {s} p idx w 2044 x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem)) 2045 && x0.Uses == 1 2046 && clobber(x0) 2047 -> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem) 2048 2049 // Combine stores + shifts into bswap and larger (unaligned) stores 2050 (MOVBstore [i] {s} p w 2051 x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) 2052 x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) 2053 x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) 2054 && x0.Uses == 1 2055 && x1.Uses == 1 2056 && x2.Uses == 1 2057 && clobber(x0) 2058 && clobber(x1) 2059 && clobber(x2) 2060 -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem) 2061 2062 (MOVBstoreidx1 [i] {s} p idx w 2063 x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) 2064 x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) 2065 x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem)))) 2066 && x0.Uses == 1 2067 && x1.Uses == 1 2068 && x2.Uses == 1 2069 && clobber(x0) 2070 && clobber(x1) 2071 && clobber(x2) 2072 -> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem) 2073 2074 (MOVBstore [i] {s} p w 2075 x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) 2076 x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) 2077 x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) 2078 x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) 2079 x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) 2080 x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) 2081 x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) 2082 && x0.Uses == 1 2083 && x1.Uses == 1 2084 && x2.Uses == 1 2085 && x3.Uses == 1 2086 && x4.Uses == 1 2087 && x5.Uses == 1 2088 && x6.Uses == 1 2089 && clobber(x0) 2090 && clobber(x1) 2091 && clobber(x2) 2092 && clobber(x3) 2093 && clobber(x4) 2094 && clobber(x5) 2095 && clobber(x6) 2096 -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem) 2097 2098 (MOVBstoreidx1 [i] {s} p idx w 2099 x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) 2100 x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) 2101 x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) 2102 x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) 2103 x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) 2104 x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) 2105 x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem)))))))) 2106 && x0.Uses == 1 2107 && x1.Uses == 1 2108 && x2.Uses == 1 2109 && x3.Uses == 1 2110 && x4.Uses == 1 2111 && x5.Uses == 1 2112 && x6.Uses == 1 2113 && clobber(x0) 2114 && clobber(x1) 2115 && clobber(x2) 2116 && clobber(x3) 2117 && clobber(x4) 2118 && clobber(x5) 2119 && clobber(x6) 2120 -> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem) 2121 2122 // Combine constant stores into larger (unaligned) stores. 2123 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) 2124 && x.Uses == 1 2125 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2126 && clobber(x) 2127 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) 2128 (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) 2129 && x.Uses == 1 2130 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2131 && clobber(x) 2132 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) 2133 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) 2134 && x.Uses == 1 2135 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2136 && clobber(x) 2137 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) 2138 (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) 2139 && x.Uses == 1 2140 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2141 && clobber(x) 2142 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) 2143 (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) 2144 && x.Uses == 1 2145 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2146 && clobber(x) 2147 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2148 (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) 2149 && x.Uses == 1 2150 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2151 && clobber(x) 2152 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2153 (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) 2154 && config.useSSE 2155 && x.Uses == 1 2156 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() 2157 && ValAndOff(c).Val() == 0 2158 && ValAndOff(c2).Val() == 0 2159 && clobber(x) 2160 -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) 2161 2162 (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) 2163 && x.Uses == 1 2164 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2165 && clobber(x) 2166 -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) 2167 (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) 2168 && x.Uses == 1 2169 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2170 && clobber(x) 2171 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) 2172 (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem)) 2173 && x.Uses == 1 2174 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2175 && clobber(x) 2176 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2177 2178 (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) 2179 && x.Uses == 1 2180 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2181 && clobber(x) 2182 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem) 2183 (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem)) 2184 && x.Uses == 1 2185 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2186 && clobber(x) 2187 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2188 2189 // Combine stores into larger (unaligned) stores. 2190 (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) 2191 && x.Uses == 1 2192 && clobber(x) 2193 -> (MOVWstore [i-1] {s} p w mem) 2194 (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) 2195 && x.Uses == 1 2196 && clobber(x) 2197 -> (MOVWstore [i] {s} p w mem) 2198 (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) 2199 && x.Uses == 1 2200 && clobber(x) 2201 -> (MOVWstore [i-1] {s} p w0 mem) 2202 (MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) 2203 && x.Uses == 1 2204 && clobber(x) 2205 -> (MOVLstore [i-2] {s} p w mem) 2206 (MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) 2207 && x.Uses == 1 2208 && clobber(x) 2209 -> (MOVLstore [i-2] {s} p w0 mem) 2210 (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) 2211 && x.Uses == 1 2212 && clobber(x) 2213 -> (MOVQstore [i-4] {s} p w mem) 2214 (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) 2215 && x.Uses == 1 2216 && clobber(x) 2217 -> (MOVQstore [i-4] {s} p w0 mem) 2218 2219 (MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) 2220 && x.Uses == 1 2221 && clobber(x) 2222 -> (MOVWstoreidx1 [i-1] {s} p idx w mem) 2223 (MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem)) 2224 && x.Uses == 1 2225 && clobber(x) 2226 -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) 2227 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) 2228 && x.Uses == 1 2229 && clobber(x) 2230 -> (MOVLstoreidx1 [i-2] {s} p idx w mem) 2231 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem)) 2232 && x.Uses == 1 2233 && clobber(x) 2234 -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) 2235 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem)) 2236 && x.Uses == 1 2237 && clobber(x) 2238 -> (MOVQstoreidx1 [i-4] {s} p idx w mem) 2239 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) 2240 && x.Uses == 1 2241 && clobber(x) 2242 -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem) 2243 2244 (MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) 2245 && x.Uses == 1 2246 && clobber(x) 2247 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem) 2248 (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) 2249 && x.Uses == 1 2250 && clobber(x) 2251 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem) 2252 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem)) 2253 && x.Uses == 1 2254 && clobber(x) 2255 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem) 2256 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) 2257 && x.Uses == 1 2258 && clobber(x) 2259 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem) 2260 2261 (MOVBstore [i] {s} p 2262 x1:(MOVBload [j] {s2} p2 mem) 2263 mem2:(MOVBstore [i-1] {s} p 2264 x2:(MOVBload [j-1] {s2} p2 mem) mem)) 2265 && x1.Uses == 1 2266 && x2.Uses == 1 2267 && mem2.Uses == 1 2268 && clobber(x1) 2269 && clobber(x2) 2270 && clobber(mem2) 2271 -> (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem) 2272 2273 (MOVWstore [i] {s} p 2274 x1:(MOVWload [j] {s2} p2 mem) 2275 mem2:(MOVWstore [i-2] {s} p 2276 x2:(MOVWload [j-2] {s2} p2 mem) mem)) 2277 && x1.Uses == 1 2278 && x2.Uses == 1 2279 && mem2.Uses == 1 2280 && clobber(x1) 2281 && clobber(x2) 2282 && clobber(mem2) 2283 -> (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) 2284 2285 (MOVLstore [i] {s} p 2286 x1:(MOVLload [j] {s2} p2 mem) 2287 mem2:(MOVLstore [i-4] {s} p 2288 x2:(MOVLload [j-4] {s2} p2 mem) mem)) 2289 && x1.Uses == 1 2290 && x2.Uses == 1 2291 && mem2.Uses == 1 2292 && clobber(x1) 2293 && clobber(x2) 2294 && clobber(mem2) 2295 -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem) 2296 2297 (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2298 (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2299 (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2300 (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2301 (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2302 (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2303 (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2304 (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2305 2306 (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2307 (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2308 (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2309 (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2310 (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2311 (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2312 (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2313 (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2314 2315 (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2316 (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2317 (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2318 (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2319 (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2320 (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2321 (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2322 (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2323 2324 (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload [off1+off2] {sym} ptr mem) 2325 (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload [off1+off2] {sym} ptr mem) 2326 (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem) 2327 (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem) 2328 (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore [off1+off2] {sym} ptr val mem) 2329 (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem) 2330 (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem) 2331 (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem) 2332 (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2333 (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2334 (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2335 (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2336 (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2337 (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2338 (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2339 (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2340 2341 // Merge load and op 2342 // TODO: add indexed variants? 2343 ((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem) 2344 ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) 2345 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) 2346 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) 2347 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) 2348 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> 2349 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem) 2350 (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) 2351 (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> 2352 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem) 2353 2354 // Merge ADDQconst and LEAQ into atomic loads. 2355 (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2356 (MOVQatomicload [off1+off2] {sym} ptr mem) 2357 (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2358 (MOVLatomicload [off1+off2] {sym} ptr mem) 2359 (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 2360 (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) 2361 (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 2362 (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) 2363 2364 // Merge ADDQconst and LEAQ into atomic stores. 2365 (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2366 (XCHGQ [off1+off2] {sym} val ptr mem) 2367 (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> 2368 (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2369 (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2370 (XCHGL [off1+off2] {sym} val ptr mem) 2371 (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> 2372 (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2373 2374 // Merge ADDQconst into atomic adds. 2375 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2376 (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2377 (XADDQlock [off1+off2] {sym} val ptr mem) 2378 (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2379 (XADDLlock [off1+off2] {sym} val ptr mem) 2380 2381 // Merge ADDQconst into atomic compare and swaps. 2382 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2383 (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> 2384 (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) 2385 (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> 2386 (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) 2387 2388 // We don't need the conditional move if we know the arg of BSF is not zero. 2389 (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) && c != 0 -> x 2390 // Extension is unnecessary for trailing zeros. 2391 (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x)) 2392 (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x)) 2393 2394 // Simplify indexed loads/stores 2395 (MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem) 2396 (MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem) 2397 (MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem) 2398 (MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem) 2399 (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem) 2400 (MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem) 2401 (MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem) 2402 (MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem) 2403 (MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem) 2404 (MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem) 2405 (MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem) 2406 (MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem) 2407 (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem) 2408 (MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem) 2409 (MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem) 2410 (MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem) 2411 (MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem) 2412 (MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem) 2413 (MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem) 2414 (MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem) 2415 (MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem) 2416 (MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem) 2417 (MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem) 2418 (MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem) 2419 2420 // Redundant sign/zero extensions 2421 // Note: see issue 21963. We have to make sure we use the right type on 2422 // the resulting extension (the outer type, not the inner type). 2423 (MOVLQSX (MOVLQSX x)) -> (MOVLQSX x) 2424 (MOVLQSX (MOVWQSX x)) -> (MOVWQSX x) 2425 (MOVLQSX (MOVBQSX x)) -> (MOVBQSX x) 2426 (MOVWQSX (MOVWQSX x)) -> (MOVWQSX x) 2427 (MOVWQSX (MOVBQSX x)) -> (MOVBQSX x) 2428 (MOVBQSX (MOVBQSX x)) -> (MOVBQSX x) 2429 (MOVLQZX (MOVLQZX x)) -> (MOVLQZX x) 2430 (MOVLQZX (MOVWQZX x)) -> (MOVWQZX x) 2431 (MOVLQZX (MOVBQZX x)) -> (MOVBQZX x) 2432 (MOVWQZX (MOVWQZX x)) -> (MOVWQZX x) 2433 (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) 2434 (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) 2435 2436 (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) 2437 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> 2438 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) 2439 (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) 2440 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> 2441 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) 2442 2443 // float <-> int register moves, with no conversion. 2444 // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. 2445 (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) -> (MOVQf2i val) 2446 (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) -> (MOVLf2i val) 2447 (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) -> (MOVQi2f val) 2448 (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) -> (MOVLi2f val) 2449 2450 // Other load-like ops. 2451 (ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ADDQ x (MOVQf2i y)) 2452 (ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ADDL x (MOVLf2i y)) 2453 (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (SUBQ x (MOVQf2i y)) 2454 (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (SUBL x (MOVLf2i y)) 2455 (ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ANDQ x (MOVQf2i y)) 2456 (ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ANDL x (MOVLf2i y)) 2457 ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> ( ORQ x (MOVQf2i y)) 2458 ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> ( ORL x (MOVLf2i y)) 2459 (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (XORQ x (MOVQf2i y)) 2460 (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (XORL x (MOVLf2i y)) 2461 2462 (ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (ADDSD x (MOVQi2f y)) 2463 (ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (ADDSS x (MOVLi2f y)) 2464 (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (SUBSD x (MOVQi2f y)) 2465 (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (SUBSS x (MOVLi2f y)) 2466 (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (MULSD x (MOVQi2f y)) 2467 (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (MULSS x (MOVLi2f y)) 2468 2469 // Redirect stores to use the other register set. 2470 (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) -> (MOVSDstore [off] {sym} ptr val mem) 2471 (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) -> (MOVSSstore [off] {sym} ptr val mem) 2472 (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) -> (MOVQstore [off] {sym} ptr val mem) 2473 (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) -> (MOVLstore [off] {sym} ptr val mem) 2474 2475 // Load args directly into the register class where it will be used. 2476 // We do this by just modifying the type of the Arg. 2477 (MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2478 (MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2479 (MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2480 (MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2481 2482 // LEAQ is rematerializeable, so this helps to avoid register spill. 2483 // See issue 22947 for details 2484 (ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x) 2485 2486 // HMULx is commutative, but its first argument must go in AX. 2487 // If possible, put a rematerializeable value in the first argument slot, 2488 // to reduce the odds that another value will be have to spilled 2489 // specifically to free up AX. 2490 (HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L) y x) 2491 (HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L)U y x) 2492 2493 // Fold loads into compares 2494 // Note: these may be undone by the flagalloc pass. 2495 (CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (CMP(Q|L|W|B)load {sym} [off] ptr x mem) 2496 (CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem)) 2497 2498 (CMP(Q|L|W|B)const l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) [c]) 2499 && l.Uses == 1 2500 && validValAndOff(c, off) 2501 && clobber(l) -> 2502 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(c,off)] ptr mem) 2503 2504 (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (CMPQconstload {sym} [makeValAndOff(c,off)] ptr mem) 2505 (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(c,off) -> (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) 2506 (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem) 2507 (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem) 2508 2509 (TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2) 2510 && l == l2 2511 && l.Uses == 2 2512 && validValAndOff(0,off) 2513 && clobber(l) -> 2514 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0,off)] ptr mem) 2515 2516 (MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))]) 2517 (MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.BigEndian))]) 2518 (MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read32(sym, off, config.BigEndian))]) 2519 (MOVQload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read64(sym, off, config.BigEndian))])