github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/cmd/compile/internal/ssa/gen/AMD64.rules (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Lowering arithmetic 6 (Add(64|32|16|8) x y) -> (ADD(Q|L|L|L) x y) 7 (AddPtr x y) && config.PtrSize == 8 -> (ADDQ x y) 8 (AddPtr x y) && config.PtrSize == 4 -> (ADDL x y) 9 (Add(32|64)F x y) -> (ADDS(S|D) x y) 10 11 (Sub(64|32|16|8) x y) -> (SUB(Q|L|L|L) x y) 12 (SubPtr x y) && config.PtrSize == 8 -> (SUBQ x y) 13 (SubPtr x y) && config.PtrSize == 4 -> (SUBL x y) 14 (Sub(32|64)F x y) -> (SUBS(S|D) x y) 15 16 (Mul(64|32|16|8) x y) -> (MUL(Q|L|L|L) x y) 17 (Mul(32|64)F x y) -> (MULS(S|D) x y) 18 19 (Select0 (Mul64uover x y)) -> (Select0 <typ.UInt64> (MULQU x y)) 20 (Select0 (Mul32uover x y)) -> (Select0 <typ.UInt32> (MULLU x y)) 21 (Select1 (Mul(64|32)uover x y)) -> (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y))) 22 23 (Hmul(64|32) x y) -> (HMUL(Q|L) x y) 24 (Hmul(64|32)u x y) -> (HMUL(Q|L)U x y) 25 26 (Div(64|32|16) [a] x y) -> (Select0 (DIV(Q|L|W) [a] x y)) 27 (Div8 x y) -> (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 28 (Div(64|32|16)u x y) -> (Select0 (DIV(Q|L|W)U x y)) 29 (Div8u x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 30 (Div(32|64)F x y) -> (DIVS(S|D) x y) 31 32 (Select0 (Add64carry x y c)) -> 33 (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) 34 (Select1 (Add64carry x y c)) -> 35 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) 36 (Select0 (Sub64borrow x y c)) -> 37 (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) 38 (Select1 (Sub64borrow x y c)) -> 39 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) 40 41 // Optimize ADCQ and friends 42 (ADCQ x (MOVQconst [c]) carry) && is32Bit(c) -> (ADCQconst x [c] carry) 43 (ADCQ x y (FlagEQ)) -> (ADDQcarry x y) 44 (ADCQconst x [c] (FlagEQ)) -> (ADDQconstcarry x [c]) 45 (ADDQcarry x (MOVQconst [c])) && is32Bit(c) -> (ADDQconstcarry x [c]) 46 (SBBQ x (MOVQconst [c]) borrow) && is32Bit(c) -> (SBBQconst x [c] borrow) 47 (SBBQ x y (FlagEQ)) -> (SUBQborrow x y) 48 (SBBQconst x [c] (FlagEQ)) -> (SUBQconstborrow x [c]) 49 (SUBQborrow x (MOVQconst [c])) && is32Bit(c) -> (SUBQconstborrow x [c]) 50 (Select1 (NEGLflags (MOVQconst [0]))) -> (FlagEQ) 51 (Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) -> x 52 53 54 (Mul64uhilo x y) -> (MULQU2 x y) 55 (Div128u xhi xlo y) -> (DIVQU2 xhi xlo y) 56 57 (Avg64u x y) -> (AVGQU x y) 58 59 (Mod(64|32|16) [a] x y) -> (Select1 (DIV(Q|L|W) [a] x y)) 60 (Mod8 x y) -> (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 61 (Mod(64|32|16)u x y) -> (Select1 (DIV(Q|L|W)U x y)) 62 (Mod8u x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 63 64 (And(64|32|16|8) x y) -> (AND(Q|L|L|L) x y) 65 (Or(64|32|16|8) x y) -> (OR(Q|L|L|L) x y) 66 (Xor(64|32|16|8) x y) -> (XOR(Q|L|L|L) x y) 67 (Com(64|32|16|8) x) -> (NOT(Q|L|L|L) x) 68 69 (Neg(64|32|16|8) x) -> (NEG(Q|L|L|L) x) 70 (Neg32F x) -> (PXOR x (MOVSSconst <typ.Float32> [auxFrom32F(float32(math.Copysign(0, -1)))])) 71 (Neg64F x) -> (PXOR x (MOVSDconst <typ.Float64> [auxFrom64F(math.Copysign(0, -1))])) 72 73 // Lowering boolean ops 74 (AndB x y) -> (ANDL x y) 75 (OrB x y) -> (ORL x y) 76 (Not x) -> (XORLconst [1] x) 77 78 // Lowering pointer arithmetic 79 (OffPtr [off] ptr) && config.PtrSize == 8 && is32Bit(off) -> (ADDQconst [off] ptr) 80 (OffPtr [off] ptr) && config.PtrSize == 8 -> (ADDQ (MOVQconst [off]) ptr) 81 (OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr) 82 83 // Lowering other arithmetic 84 (Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x))) 85 (Ctz32 x) -> (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x))) 86 (Ctz16 x) -> (BSFL (BTSLconst <typ.UInt32> [16] x)) 87 (Ctz8 x) -> (BSFL (BTSLconst <typ.UInt32> [ 8] x)) 88 89 (Ctz64NonZero x) -> (Select0 (BSFQ x)) 90 (Ctz32NonZero x) -> (BSFL x) 91 (Ctz16NonZero x) -> (BSFL x) 92 (Ctz8NonZero x) -> (BSFL x) 93 94 // BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0. 95 // However, for zero-extended values, we can cheat a bit, and calculate 96 // BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently 97 // places the index of the highest set bit where we want it. 98 (BitLen64 <t> x) -> (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x)))) 99 (BitLen32 x) -> (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x)))) 100 (BitLen16 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x))) 101 (BitLen8 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x))) 102 103 (Bswap(64|32) x) -> (BSWAP(Q|L) x) 104 105 (PopCount64 x) -> (POPCNTQ x) 106 (PopCount32 x) -> (POPCNTL x) 107 (PopCount16 x) -> (POPCNTL (MOVWQZX <typ.UInt32> x)) 108 (PopCount8 x) -> (POPCNTL (MOVBQZX <typ.UInt32> x)) 109 110 (Sqrt x) -> (SQRTSD x) 111 112 (RoundToEven x) -> (ROUNDSD [0] x) 113 (Floor x) -> (ROUNDSD [1] x) 114 (Ceil x) -> (ROUNDSD [2] x) 115 (Trunc x) -> (ROUNDSD [3] x) 116 117 // Lowering extension 118 // Note: we always extend to 64 bits even though some ops don't need that many result bits. 119 (SignExt8to16 x) -> (MOVBQSX x) 120 (SignExt8to32 x) -> (MOVBQSX x) 121 (SignExt8to64 x) -> (MOVBQSX x) 122 (SignExt16to32 x) -> (MOVWQSX x) 123 (SignExt16to64 x) -> (MOVWQSX x) 124 (SignExt32to64 x) -> (MOVLQSX x) 125 126 (ZeroExt8to16 x) -> (MOVBQZX x) 127 (ZeroExt8to32 x) -> (MOVBQZX x) 128 (ZeroExt8to64 x) -> (MOVBQZX x) 129 (ZeroExt16to32 x) -> (MOVWQZX x) 130 (ZeroExt16to64 x) -> (MOVWQZX x) 131 (ZeroExt32to64 x) -> (MOVLQZX x) 132 133 (Slicemask <t> x) -> (SARQconst (NEGQ <t> x) [63]) 134 135 // Lowering truncation 136 // Because we ignore high parts of registers, truncates are just copies. 137 (Trunc16to8 x) -> x 138 (Trunc32to8 x) -> x 139 (Trunc32to16 x) -> x 140 (Trunc64to8 x) -> x 141 (Trunc64to16 x) -> x 142 (Trunc64to32 x) -> x 143 144 // Lowering float <-> int 145 (Cvt32to32F x) -> (CVTSL2SS x) 146 (Cvt32to64F x) -> (CVTSL2SD x) 147 (Cvt64to32F x) -> (CVTSQ2SS x) 148 (Cvt64to64F x) -> (CVTSQ2SD x) 149 150 (Cvt32Fto32 x) -> (CVTTSS2SL x) 151 (Cvt32Fto64 x) -> (CVTTSS2SQ x) 152 (Cvt64Fto32 x) -> (CVTTSD2SL x) 153 (Cvt64Fto64 x) -> (CVTTSD2SQ x) 154 155 (Cvt32Fto64F x) -> (CVTSS2SD x) 156 (Cvt64Fto32F x) -> (CVTSD2SS x) 157 158 (Round(32|64)F x) -> x 159 160 // Lowering shifts 161 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. 162 // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff) 163 (Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 164 (Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 165 (Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 166 (Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 167 168 (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLQ x y) 169 (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 170 (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 171 (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y) 172 173 (Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 174 (Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 175 (Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16]))) 176 (Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8]))) 177 178 (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRQ x y) 179 (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRL x y) 180 (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRW x y) 181 (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRB x y) 182 183 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. 184 // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. 185 (Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64]))))) 186 (Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32]))))) 187 (Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16]))))) 188 (Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8]))))) 189 190 (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARQ x y) 191 (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARL x y) 192 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARW x y) 193 (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARB x y) 194 195 // Lowering comparisons 196 (Less(64|32|16|8) x y) -> (SETL (CMP(Q|L|W|B) x y)) 197 (Less(64|32|16|8)U x y) -> (SETB (CMP(Q|L|W|B) x y)) 198 // Use SETGF with reversed operands to dodge NaN case 199 (Less(32|64)F x y) -> (SETGF (UCOMIS(S|D) y x)) 200 201 (Leq(64|32|16|8) x y) -> (SETLE (CMP(Q|L|W|B) x y)) 202 (Leq(64|32|16|8)U x y) -> (SETBE (CMP(Q|L|W|B) x y)) 203 // Use SETGEF with reversed operands to dodge NaN case 204 (Leq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) y x)) 205 206 (Greater(64|32|16|8) x y) -> (SETG (CMP(Q|L|W|B) x y)) 207 (Greater(64|32|16|8)U x y) -> (SETA (CMP(Q|L|W|B) x y)) 208 // Note Go assembler gets UCOMISx operand order wrong, but it is right here 209 // Bug is accommodated at generation of assembly language. 210 (Greater(32|64)F x y) -> (SETGF (UCOMIS(S|D) x y)) 211 212 (Geq(64|32|16|8) x y) -> (SETGE (CMP(Q|L|W|B) x y)) 213 (Geq(64|32|16|8)U x y) -> (SETAE (CMP(Q|L|W|B) x y)) 214 // Note Go assembler gets UCOMISx operand order wrong, but it is right here 215 // Bug is accommodated at generation of assembly language. 216 (Geq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) x y)) 217 218 (Eq(64|32|16|8|B) x y) -> (SETEQ (CMP(Q|L|W|B|B) x y)) 219 (EqPtr x y) && config.PtrSize == 8 -> (SETEQ (CMPQ x y)) 220 (EqPtr x y) && config.PtrSize == 4 -> (SETEQ (CMPL x y)) 221 (Eq(32|64)F x y) -> (SETEQF (UCOMIS(S|D) x y)) 222 223 (Neq(64|32|16|8|B) x y) -> (SETNE (CMP(Q|L|W|B|B) x y)) 224 (NeqPtr x y) && config.PtrSize == 8 -> (SETNE (CMPQ x y)) 225 (NeqPtr x y) && config.PtrSize == 4 -> (SETNE (CMPL x y)) 226 (Neq(32|64)F x y) -> (SETNEF (UCOMIS(S|D) x y)) 227 228 (Int64Hi x) -> (SHRQconst [32] x) // needed for amd64p32 229 230 // Lowering loads 231 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t) && config.PtrSize == 8) -> (MOVQload ptr mem) 232 (Load <t> ptr mem) && (is32BitInt(t) || isPtr(t) && config.PtrSize == 4) -> (MOVLload ptr mem) 233 (Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem) 234 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem) 235 (Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem) 236 (Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem) 237 238 // Lowering stores 239 // These more-specific FP versions of Store pattern should come first. 240 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem) 241 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem) 242 243 (Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVQstore ptr val mem) 244 (Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVLstore ptr val mem) 245 (Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVWstore ptr val mem) 246 (Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem) 247 248 // Lowering moves 249 (Move [0] _ _ mem) -> mem 250 (Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem) 251 (Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem) 252 (Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem) 253 (Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem) 254 (Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem) 255 (Move [16] dst src mem) && !config.useSSE -> 256 (MOVQstore [8] dst (MOVQload [8] src mem) 257 (MOVQstore dst (MOVQload src mem) mem)) 258 259 (Move [32] dst src mem) -> 260 (Move [16] 261 (OffPtr <dst.Type> dst [16]) 262 (OffPtr <src.Type> src [16]) 263 (Move [16] dst src mem)) 264 265 (Move [48] dst src mem) && config.useSSE -> 266 (Move [32] 267 (OffPtr <dst.Type> dst [16]) 268 (OffPtr <src.Type> src [16]) 269 (Move [16] dst src mem)) 270 271 (Move [64] dst src mem) && config.useSSE -> 272 (Move [32] 273 (OffPtr <dst.Type> dst [32]) 274 (OffPtr <src.Type> src [32]) 275 (Move [32] dst src mem)) 276 277 (Move [3] dst src mem) -> 278 (MOVBstore [2] dst (MOVBload [2] src mem) 279 (MOVWstore dst (MOVWload src mem) mem)) 280 (Move [5] dst src mem) -> 281 (MOVBstore [4] dst (MOVBload [4] src mem) 282 (MOVLstore dst (MOVLload src mem) mem)) 283 (Move [6] dst src mem) -> 284 (MOVWstore [4] dst (MOVWload [4] src mem) 285 (MOVLstore dst (MOVLload src mem) mem)) 286 (Move [7] dst src mem) -> 287 (MOVLstore [3] dst (MOVLload [3] src mem) 288 (MOVLstore dst (MOVLload src mem) mem)) 289 (Move [9] dst src mem) -> 290 (MOVBstore [8] dst (MOVBload [8] src mem) 291 (MOVQstore dst (MOVQload src mem) mem)) 292 (Move [10] dst src mem) -> 293 (MOVWstore [8] dst (MOVWload [8] src mem) 294 (MOVQstore dst (MOVQload src mem) mem)) 295 (Move [12] dst src mem) -> 296 (MOVLstore [8] dst (MOVLload [8] src mem) 297 (MOVQstore dst (MOVQload src mem) mem)) 298 (Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 -> 299 (MOVQstore [s-8] dst (MOVQload [s-8] src mem) 300 (MOVQstore dst (MOVQload src mem) mem)) 301 302 // Adjust moves to be a multiple of 16 bytes. 303 (Move [s] dst src mem) 304 && s > 16 && s%16 != 0 && s%16 <= 8 -> 305 (Move [s-s%16] 306 (OffPtr <dst.Type> dst [s%16]) 307 (OffPtr <src.Type> src [s%16]) 308 (MOVQstore dst (MOVQload src mem) mem)) 309 (Move [s] dst src mem) 310 && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE -> 311 (Move [s-s%16] 312 (OffPtr <dst.Type> dst [s%16]) 313 (OffPtr <src.Type> src [s%16]) 314 (MOVOstore dst (MOVOload src mem) mem)) 315 (Move [s] dst src mem) 316 && s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE -> 317 (Move [s-s%16] 318 (OffPtr <dst.Type> dst [s%16]) 319 (OffPtr <src.Type> src [s%16]) 320 (MOVQstore [8] dst (MOVQload [8] src mem) 321 (MOVQstore dst (MOVQload src mem) mem))) 322 323 // Medium copying uses a duff device. 324 (Move [s] dst src mem) 325 && s > 64 && s <= 16*64 && s%16 == 0 326 && !config.noDuffDevice -> 327 (DUFFCOPY [14*(64-s/16)] dst src mem) 328 // 14 and 64 are magic constants. 14 is the number of bytes to encode: 329 // MOVUPS (SI), X0 330 // ADDQ $16, SI 331 // MOVUPS X0, (DI) 332 // ADDQ $16, DI 333 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy. 334 335 // Large copying uses REP MOVSQ. 336 (Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 -> 337 (REPMOVSQ dst src (MOVQconst [s/8]) mem) 338 339 // Lowering Zero instructions 340 (Zero [0] _ mem) -> mem 341 (Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem) 342 (Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem) 343 (Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem) 344 (Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem) 345 346 (Zero [3] destptr mem) -> 347 (MOVBstoreconst [makeValAndOff(0,2)] destptr 348 (MOVWstoreconst [0] destptr mem)) 349 (Zero [5] destptr mem) -> 350 (MOVBstoreconst [makeValAndOff(0,4)] destptr 351 (MOVLstoreconst [0] destptr mem)) 352 (Zero [6] destptr mem) -> 353 (MOVWstoreconst [makeValAndOff(0,4)] destptr 354 (MOVLstoreconst [0] destptr mem)) 355 (Zero [7] destptr mem) -> 356 (MOVLstoreconst [makeValAndOff(0,3)] destptr 357 (MOVLstoreconst [0] destptr mem)) 358 359 // Strip off any fractional word zeroing. 360 (Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE -> 361 (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) 362 (MOVQstoreconst [0] destptr mem)) 363 364 // Zero small numbers of words directly. 365 (Zero [16] destptr mem) && !config.useSSE -> 366 (MOVQstoreconst [makeValAndOff(0,8)] destptr 367 (MOVQstoreconst [0] destptr mem)) 368 (Zero [24] destptr mem) && !config.useSSE -> 369 (MOVQstoreconst [makeValAndOff(0,16)] destptr 370 (MOVQstoreconst [makeValAndOff(0,8)] destptr 371 (MOVQstoreconst [0] destptr mem))) 372 (Zero [32] destptr mem) && !config.useSSE -> 373 (MOVQstoreconst [makeValAndOff(0,24)] destptr 374 (MOVQstoreconst [makeValAndOff(0,16)] destptr 375 (MOVQstoreconst [makeValAndOff(0,8)] destptr 376 (MOVQstoreconst [0] destptr mem)))) 377 378 (Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE -> 379 (MOVQstoreconst [makeValAndOff(0,s-8)] destptr 380 (MOVQstoreconst [0] destptr mem)) 381 382 // Adjust zeros to be a multiple of 16 bytes. 383 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE -> 384 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 385 (MOVOstore destptr (MOVOconst [0]) mem)) 386 387 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE -> 388 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 389 (MOVQstoreconst [0] destptr mem)) 390 391 (Zero [16] destptr mem) && config.useSSE -> 392 (MOVOstore destptr (MOVOconst [0]) mem) 393 (Zero [32] destptr mem) && config.useSSE -> 394 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 395 (MOVOstore destptr (MOVOconst [0]) mem)) 396 (Zero [48] destptr mem) && config.useSSE -> 397 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) 398 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 399 (MOVOstore destptr (MOVOconst [0]) mem))) 400 (Zero [64] destptr mem) && config.useSSE -> 401 (MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0]) 402 (MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0]) 403 (MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0]) 404 (MOVOstore destptr (MOVOconst [0]) mem)))) 405 406 // Medium zeroing uses a duff device. 407 (Zero [s] destptr mem) 408 && s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice -> 409 (DUFFZERO [s] destptr (MOVOconst [0]) mem) 410 411 // Large zeroing uses REP STOSQ. 412 (Zero [s] destptr mem) 413 && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) 414 && s%8 == 0 -> 415 (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) 416 417 // Lowering constants 418 (Const8 [val]) -> (MOVLconst [val]) 419 (Const16 [val]) -> (MOVLconst [val]) 420 (Const32 [val]) -> (MOVLconst [val]) 421 (Const64 [val]) -> (MOVQconst [val]) 422 (Const32F [val]) -> (MOVSSconst [val]) 423 (Const64F [val]) -> (MOVSDconst [val]) 424 (ConstNil) && config.PtrSize == 8 -> (MOVQconst [0]) 425 (ConstNil) && config.PtrSize == 4 -> (MOVLconst [0]) 426 (ConstBool [b]) -> (MOVLconst [b]) 427 428 // Lowering calls 429 (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem) 430 (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem) 431 (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem) 432 433 // Lowering conditional moves 434 // If the condition is a SETxx, we can just run a CMOV from the comparison that was 435 // setting the flags. 436 // Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL 437 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t)) 438 -> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 439 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t) 440 -> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 441 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t) 442 -> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 443 444 // If the condition does not set the flags, we need to generate a comparison. 445 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 446 -> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check)) 447 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 448 -> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check)) 449 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 450 -> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check)) 451 452 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) 453 -> (CMOVQNE y x (CMPQconst [0] check)) 454 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) 455 -> (CMOVLNE y x (CMPQconst [0] check)) 456 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) 457 -> (CMOVWNE y x (CMPQconst [0] check)) 458 459 // Absorb InvertFlags 460 (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 461 -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 462 (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 463 -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 464 (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 465 -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 466 467 // Absorb constants generated during lower 468 (CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x 469 (CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y 470 (CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x 471 (CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y 472 (CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x 473 (CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y 474 (CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x 475 (CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y 476 (CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x 477 (CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y 478 479 // Miscellaneous 480 (IsNonNil p) && config.PtrSize == 8 -> (SETNE (TESTQ p p)) 481 (IsNonNil p) && config.PtrSize == 4 -> (SETNE (TESTL p p)) 482 (IsInBounds idx len) && config.PtrSize == 8 -> (SETB (CMPQ idx len)) 483 (IsInBounds idx len) && config.PtrSize == 4 -> (SETB (CMPL idx len)) 484 (IsSliceInBounds idx len) && config.PtrSize == 8 -> (SETBE (CMPQ idx len)) 485 (IsSliceInBounds idx len) && config.PtrSize == 4 -> (SETBE (CMPL idx len)) 486 (NilCheck ptr mem) -> (LoweredNilCheck ptr mem) 487 (GetG mem) -> (LoweredGetG mem) 488 (GetClosurePtr) -> (LoweredGetClosurePtr) 489 (GetCallerPC) -> (LoweredGetCallerPC) 490 (GetCallerSP) -> (LoweredGetCallerSP) 491 (Addr {sym} base) && config.PtrSize == 8 -> (LEAQ {sym} base) 492 (Addr {sym} base) && config.PtrSize == 4 -> (LEAL {sym} base) 493 (LocalAddr {sym} base _) && config.PtrSize == 8 -> (LEAQ {sym} base) 494 (LocalAddr {sym} base _) && config.PtrSize == 4 -> (LEAL {sym} base) 495 496 (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 -> (SETLstore [off] {sym} ptr x mem) 497 (MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 -> (SETLEstore [off] {sym} ptr x mem) 498 (MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 -> (SETGstore [off] {sym} ptr x mem) 499 (MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 -> (SETGEstore [off] {sym} ptr x mem) 500 (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 -> (SETEQstore [off] {sym} ptr x mem) 501 (MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 -> (SETNEstore [off] {sym} ptr x mem) 502 (MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 -> (SETBstore [off] {sym} ptr x mem) 503 (MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 -> (SETBEstore [off] {sym} ptr x mem) 504 (MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 -> (SETAstore [off] {sym} ptr x mem) 505 (MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 -> (SETAEstore [off] {sym} ptr x mem) 506 507 // block rewrites 508 (If (SETL cmp) yes no) -> (LT cmp yes no) 509 (If (SETLE cmp) yes no) -> (LE cmp yes no) 510 (If (SETG cmp) yes no) -> (GT cmp yes no) 511 (If (SETGE cmp) yes no) -> (GE cmp yes no) 512 (If (SETEQ cmp) yes no) -> (EQ cmp yes no) 513 (If (SETNE cmp) yes no) -> (NE cmp yes no) 514 (If (SETB cmp) yes no) -> (ULT cmp yes no) 515 (If (SETBE cmp) yes no) -> (ULE cmp yes no) 516 (If (SETA cmp) yes no) -> (UGT cmp yes no) 517 (If (SETAE cmp) yes no) -> (UGE cmp yes no) 518 (If (SETO cmp) yes no) -> (OS cmp yes no) 519 520 // Special case for floating point - LF/LEF not generated 521 (If (SETGF cmp) yes no) -> (UGT cmp yes no) 522 (If (SETGEF cmp) yes no) -> (UGE cmp yes no) 523 (If (SETEQF cmp) yes no) -> (EQF cmp yes no) 524 (If (SETNEF cmp) yes no) -> (NEF cmp yes no) 525 526 (If cond yes no) -> (NE (TESTB cond cond) yes no) 527 528 // Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here. 529 (AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem) 530 (AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem) 531 (AtomicLoadPtr ptr mem) && config.PtrSize == 8 -> (MOVQatomicload ptr mem) 532 (AtomicLoadPtr ptr mem) && config.PtrSize == 4 -> (MOVLatomicload ptr mem) 533 534 // Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load. 535 // TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those? 536 (AtomicStore32 ptr val mem) -> (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem)) 537 (AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem)) 538 (AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem)) 539 (AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem)) 540 541 // Atomic exchanges. 542 (AtomicExchange32 ptr val mem) -> (XCHGL val ptr mem) 543 (AtomicExchange64 ptr val mem) -> (XCHGQ val ptr mem) 544 545 // Atomic adds. 546 (AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (XADDLlock val ptr mem)) 547 (AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (XADDQlock val ptr mem)) 548 (Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDL val (Select0 <t> tuple)) 549 (Select1 (AddTupleFirst32 _ tuple)) -> (Select1 tuple) 550 (Select0 <t> (AddTupleFirst64 val tuple)) -> (ADDQ val (Select0 <t> tuple)) 551 (Select1 (AddTupleFirst64 _ tuple)) -> (Select1 tuple) 552 553 // Atomic compare and swap. 554 (AtomicCompareAndSwap32 ptr old new_ mem) -> (CMPXCHGLlock ptr old new_ mem) 555 (AtomicCompareAndSwap64 ptr old new_ mem) -> (CMPXCHGQlock ptr old new_ mem) 556 557 // Atomic memory updates. 558 (AtomicAnd8 ptr val mem) -> (ANDBlock ptr val mem) 559 (AtomicOr8 ptr val mem) -> (ORBlock ptr val mem) 560 561 // Write barrier. 562 (WB {fn} destptr srcptr mem) -> (LoweredWB {fn} destptr srcptr mem) 563 564 // *************************** 565 // Above: lowering rules 566 // Below: optimizations 567 // *************************** 568 // TODO: Should the optimizations be a separate pass? 569 570 // Fold boolean tests into blocks 571 (NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no) 572 (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no) 573 (NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no) 574 (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no) 575 (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no) 576 (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no) 577 (NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no) 578 (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no) 579 (NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no) 580 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no) 581 (NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no) 582 583 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded 584 // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag 585 // into tests for carry flags. 586 // ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis 587 // mutandis, for UGE and SETAE, and CC and SETCC. 588 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTL x y)) 589 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTQ x y)) 590 ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl 591 -> ((ULT|UGE) (BTLconst [log2uint32(c)] x)) 592 ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) && !config.nacl 593 -> ((ULT|UGE) (BTQconst [log2(c)] x)) 594 ((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) && !config.nacl 595 -> ((ULT|UGE) (BTQconst [log2(c)] x)) 596 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> (SET(B|AE) (BTL x y)) 597 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> (SET(B|AE) (BTQ x y)) 598 (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl 599 -> (SET(B|AE) (BTLconst [log2uint32(c)] x)) 600 (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c) && !config.nacl 601 -> (SET(B|AE) (BTQconst [log2(c)] x)) 602 (SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) && !config.nacl 603 -> (SET(B|AE) (BTQconst [log2(c)] x)) 604 // SET..store variant 605 (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) && !config.nacl 606 -> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) 607 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) && !config.nacl 608 -> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) 609 (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(c) && !config.nacl 610 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem) 611 (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(c) && !config.nacl 612 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem) 613 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) && !config.nacl 614 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem) 615 616 // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules 617 // and further combining shifts. 618 (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x) 619 (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d -> (BT(Q|L)const [c-d] x) 620 (BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x) 621 (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x) 622 (BTLconst [c] (SHLLconst [d] x)) && c>d -> (BTLconst [c-d] x) 623 (BTLconst [0] s:(SHRL x y)) -> (BTL y x) 624 625 // Rewrite a & 1 != 1 into a & 1 == 0. 626 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test. 627 (SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) -> (SET(EQ|NE) (CMPLconst [0] s)) 628 (SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem) 629 (SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) -> (SET(EQ|NE) (CMPQconst [0] s)) 630 (SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem) 631 632 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b) 633 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTS(Q|L) x y) 634 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTC(Q|L) x y) 635 636 // Convert ORconst into BTS, if the code gets smaller, with boundary being 637 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). 638 ((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 639 -> (BT(S|C)Qconst [log2(c)] x) 640 ((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 641 -> (BT(S|C)Lconst [log2uint32(c)] x) 642 ((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 643 -> (BT(S|C)Qconst [log2(c)] x) 644 ((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl 645 -> (BT(S|C)Lconst [log2uint32(c)] x) 646 647 // Recognize bit clearing: a &^= 1<<b 648 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) && !config.nacl -> (BTR(Q|L) x y) 649 (ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 650 -> (BTRQconst [log2(^c)] x) 651 (ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 652 -> (BTRLconst [log2uint32(^c)] x) 653 (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 654 -> (BTRQconst [log2(^c)] x) 655 (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl 656 -> (BTRLconst [log2uint32(^c)] x) 657 658 // Special-case bit patterns on first/last bit. 659 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, 660 // for instance: 661 // x & 0xFFFF0000 -> (x >> 16) << 16 662 // x & 0x80000000 -> (x >> 31) << 31 663 // 664 // In case the mask is just one bit (like second example above), it conflicts 665 // with the above rules to detect bit-testing / bit-clearing of first/last bit. 666 // We thus special-case them, by detecting the shift patterns. 667 668 // Special case resetting first/last bit 669 (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) && !config.nacl 670 -> (BTR(L|Q)const [0] x) 671 (SHRLconst [1] (SHLLconst [1] x)) && !config.nacl 672 -> (BTRLconst [31] x) 673 (SHRQconst [1] (SHLQconst [1] x)) && !config.nacl 674 -> (BTRQconst [63] x) 675 676 // Special case testing first/last bit (with double-shift generated by generic.rules) 677 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 && !config.nacl 678 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 679 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 && !config.nacl 680 -> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x)) 681 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 && !config.nacl 682 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 683 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 && !config.nacl 684 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 685 686 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 && !config.nacl 687 -> ((SETB|SETAE|ULT|UGE) (BTQconst [0] x)) 688 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 && !config.nacl 689 -> ((SETB|SETAE|ULT|UGE) (BTLconst [0] x)) 690 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 && !config.nacl 691 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem) 692 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 && !config.nacl 693 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem) 694 695 // Special-case manually testing last bit with "a>>63 != 0" (without "&1") 696 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 && !config.nacl 697 -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 698 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 && !config.nacl 699 -> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x)) 700 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 && !config.nacl 701 -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 702 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 && !config.nacl 703 -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 704 705 // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) 706 (BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x) 707 (BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x) 708 (BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x) 709 (BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x) 710 711 // Fold boolean negation into SETcc. 712 (XORLconst [1] (SETNE x)) -> (SETEQ x) 713 (XORLconst [1] (SETEQ x)) -> (SETNE x) 714 (XORLconst [1] (SETL x)) -> (SETGE x) 715 (XORLconst [1] (SETGE x)) -> (SETL x) 716 (XORLconst [1] (SETLE x)) -> (SETG x) 717 (XORLconst [1] (SETG x)) -> (SETLE x) 718 (XORLconst [1] (SETB x)) -> (SETAE x) 719 (XORLconst [1] (SETAE x)) -> (SETB x) 720 (XORLconst [1] (SETBE x)) -> (SETA x) 721 (XORLconst [1] (SETA x)) -> (SETBE x) 722 723 // Special case for floating point - LF/LEF not generated 724 (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no) 725 (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no) 726 (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no) 727 (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no) 728 729 // Disabled because it interferes with the pattern match above and makes worse code. 730 // (SETNEF x) -> (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x)) 731 // (SETEQF x) -> (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x)) 732 733 // fold constants into instructions 734 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x) 735 (ADDL x (MOVLconst [c])) -> (ADDLconst [c] x) 736 737 (SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c]) 738 (SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c])) 739 (SUBL x (MOVLconst [c])) -> (SUBLconst x [c]) 740 (SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c])) 741 742 (MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x) 743 (MULL x (MOVLconst [c])) -> (MULLconst [c] x) 744 745 (ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x) 746 (ANDL x (MOVLconst [c])) -> (ANDLconst [c] x) 747 748 (AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x) 749 (BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ (1<<uint32(c))] x) 750 (AND(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [c &^ (1<<uint32(d))] x) 751 (BTR(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [^(1<<uint32(c) | 1<<uint32(d))] x) 752 (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x) 753 (BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1<<uint32(c)] x) 754 (XOR(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ 1<<uint32(d)] x) 755 (BTC(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [1<<uint32(c) ^ 1<<uint32(d)] x) 756 (OR(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [c | d] x) 757 (OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1<<uint32(d)] x) 758 (BTS(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [d | 1<<uint32(c)] x) 759 (BTS(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [1<<uint32(d) | 1<<uint32(c)] x) 760 761 (MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x) 762 (MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x) 763 764 (ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x) 765 (ORL x (MOVLconst [c])) -> (ORLconst [c] x) 766 767 (XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x) 768 (XORL x (MOVLconst [c])) -> (XORLconst [c] x) 769 770 (SHLQ x (MOV(Q|L)const [c])) -> (SHLQconst [c&63] x) 771 (SHLL x (MOV(Q|L)const [c])) -> (SHLLconst [c&31] x) 772 773 (SHRQ x (MOV(Q|L)const [c])) -> (SHRQconst [c&63] x) 774 (SHRL x (MOV(Q|L)const [c])) -> (SHRLconst [c&31] x) 775 (SHRW x (MOV(Q|L)const [c])) && c&31 < 16 -> (SHRWconst [c&31] x) 776 (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 -> (MOVLconst [0]) 777 (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 -> (SHRBconst [c&31] x) 778 (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 -> (MOVLconst [0]) 779 780 (SARQ x (MOV(Q|L)const [c])) -> (SARQconst [c&63] x) 781 (SARL x (MOV(Q|L)const [c])) -> (SARLconst [c&31] x) 782 (SARW x (MOV(Q|L)const [c])) -> (SARWconst [min(c&31,15)] x) 783 (SARB x (MOV(Q|L)const [c])) -> (SARBconst [min(c&31,7)] x) 784 785 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs. 786 ((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y) 787 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 788 ((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y) 789 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 790 791 ((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y) 792 ((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 793 ((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y) 794 ((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 795 796 ((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y) 797 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 798 ((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y) 799 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 800 801 ((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y) 802 ((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGL <t> y)) 803 ((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y) 804 ((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGL <t> y)) 805 806 // Constant rotate instructions 807 ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c]) 808 ((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c]) 809 810 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c]) 811 ((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c && c < 8 && t.Size() == 1 -> (ROLBconst x [c]) 812 813 (ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x) 814 (ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x) 815 (ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x) 816 (ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x) 817 818 (RotateLeft8 a b) -> (ROLB a b) 819 (RotateLeft16 a b) -> (ROLW a b) 820 (RotateLeft32 a b) -> (ROLL a b) 821 (RotateLeft64 a b) -> (ROLQ a b) 822 823 // Non-constant rotates. 824 // We want to issue a rotate when the Go source contains code like 825 // y &= 63 826 // x << y | x >> (64-y) 827 // The shift rules above convert << to SHLx and >> to SHRx. 828 // SHRx converts its shift argument from 64-y to -y. 829 // A tricky situation occurs when y==0. Then the original code would be: 830 // x << 0 | x >> 64 831 // But x >> 64 is 0, not x. So there's an additional mask that is ANDed in 832 // to force the second term to 0. We don't need that mask, but we must match 833 // it in order to strip it out. 834 (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (ROLQ x y) 835 (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (RORQ x y) 836 837 (ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (ROLL x y) 838 (ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (RORL x y) 839 840 // Help with rotate detection 841 (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) -> (FlagLT_ULT) 842 (CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32]) -> (FlagLT_ULT) 843 844 (ORL (SHLL x (AND(Q|L)const y [15])) 845 (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))) 846 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16])))) 847 && v.Type.Size() == 2 848 -> (ROLW x y) 849 (ORL (SHRW x (AND(Q|L)const y [15])) 850 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))) 851 && v.Type.Size() == 2 852 -> (RORW x y) 853 854 (ORL (SHLL x (AND(Q|L)const y [ 7])) 855 (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))) 856 (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8])))) 857 && v.Type.Size() == 1 858 -> (ROLB x y) 859 (ORL (SHRB x (AND(Q|L)const y [ 7])) 860 (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))) 861 && v.Type.Size() == 1 862 -> (RORB x y) 863 864 // rotate left negative = rotate right 865 (ROLQ x (NEG(Q|L) y)) -> (RORQ x y) 866 (ROLL x (NEG(Q|L) y)) -> (RORL x y) 867 (ROLW x (NEG(Q|L) y)) -> (RORW x y) 868 (ROLB x (NEG(Q|L) y)) -> (RORB x y) 869 870 // rotate right negative = rotate left 871 (RORQ x (NEG(Q|L) y)) -> (ROLQ x y) 872 (RORL x (NEG(Q|L) y)) -> (ROLL x y) 873 (RORW x (NEG(Q|L) y)) -> (ROLW x y) 874 (RORB x (NEG(Q|L) y)) -> (ROLB x y) 875 876 // rotate by constants 877 (ROLQ x (MOV(Q|L)const [c])) -> (ROLQconst [c&63] x) 878 (ROLL x (MOV(Q|L)const [c])) -> (ROLLconst [c&31] x) 879 (ROLW x (MOV(Q|L)const [c])) -> (ROLWconst [c&15] x) 880 (ROLB x (MOV(Q|L)const [c])) -> (ROLBconst [c&7 ] x) 881 882 (RORQ x (MOV(Q|L)const [c])) -> (ROLQconst [(-c)&63] x) 883 (RORL x (MOV(Q|L)const [c])) -> (ROLLconst [(-c)&31] x) 884 (RORW x (MOV(Q|L)const [c])) -> (ROLWconst [(-c)&15] x) 885 (RORB x (MOV(Q|L)const [c])) -> (ROLBconst [(-c)&7 ] x) 886 887 // Constant shift simplifications 888 ((SHLQ|SHRQ|SARQ)const x [0]) -> x 889 ((SHLL|SHRL|SARL)const x [0]) -> x 890 ((SHRW|SARW)const x [0]) -> x 891 ((SHRB|SARB)const x [0]) -> x 892 ((ROLQ|ROLL|ROLW|ROLB)const x [0]) -> x 893 894 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) 895 // because the x86 instructions are defined to use all 5 bits of the shift even 896 // for the small shifts. I don't think we'll ever generate a weird shift (e.g. 897 // (SHRW x (MOVLconst [24])), but just in case. 898 899 (CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c]) 900 (CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c])) 901 (CMPL x (MOVLconst [c])) -> (CMPLconst x [c]) 902 (CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c])) 903 (CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))]) 904 (CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))])) 905 (CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))]) 906 (CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))])) 907 908 // Using MOVZX instead of AND is cheaper. 909 (AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x) 910 (AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x) 911 (ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x) 912 913 // strength reduction 914 // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: 915 // 1 - addq, shlq, leaq, negq, subq 916 // 3 - imulq 917 // This limits the rewrites to two instructions. 918 // Note that negq always operates in-place, 919 // which can require a register-register move 920 // to preserve the original value, 921 // so it must be used with care. 922 (MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x)) 923 (MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x)) 924 (MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x)) 925 (MUL(Q|L)const [-1] x) -> (NEG(Q|L) x) 926 (MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0]) 927 (MUL(Q|L)const [ 1] x) -> x 928 (MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x) 929 (MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x) 930 (MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x)) 931 (MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x) 932 (MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x)) 933 (MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x)) 934 (MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x)) 935 (MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x)) 936 (MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x)) 937 (MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x)) 938 (MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x)) 939 (MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x)) 940 (MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x)) 941 (MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) 942 (MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) 943 944 (MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x) 945 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x) 946 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x) 947 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x) 948 (MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x) 949 (MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x)) 950 (MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x)) 951 (MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x)) 952 953 // combine add/shift into LEAQ/LEAL 954 (ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y) 955 (ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y) 956 (ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y) 957 (ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y) 958 (ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x) 959 960 // combine ADDQ/ADDQconst into LEAQ1/LEAL1 961 (ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y) 962 (ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y) 963 (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x) 964 965 // fold ADDQ/ADDL into LEAQ/LEAL 966 (ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) 967 (LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x) 968 (LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) 969 (ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y) 970 971 // fold ADDQconst/ADDLconst into LEAQx/LEALx 972 (ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y) 973 (ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y) 974 (ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y) 975 (ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y) 976 (LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y) 977 (LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y) 978 (LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y) 979 (LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y) 980 (LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y) 981 (LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y) 982 (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y) 983 984 // fold shifts into LEAQx/LEALx 985 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y) 986 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y) 987 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y) 988 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y) 989 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y) 990 (LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y) 991 992 // reverse ordering of compare instruction 993 (SETL (InvertFlags x)) -> (SETG x) 994 (SETG (InvertFlags x)) -> (SETL x) 995 (SETB (InvertFlags x)) -> (SETA x) 996 (SETA (InvertFlags x)) -> (SETB x) 997 (SETLE (InvertFlags x)) -> (SETGE x) 998 (SETGE (InvertFlags x)) -> (SETLE x) 999 (SETBE (InvertFlags x)) -> (SETAE x) 1000 (SETAE (InvertFlags x)) -> (SETBE x) 1001 (SETEQ (InvertFlags x)) -> (SETEQ x) 1002 (SETNE (InvertFlags x)) -> (SETNE x) 1003 1004 (SETLstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGstore [off] {sym} ptr x mem) 1005 (SETGstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLstore [off] {sym} ptr x mem) 1006 (SETBstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAstore [off] {sym} ptr x mem) 1007 (SETAstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBstore [off] {sym} ptr x mem) 1008 (SETLEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGEstore [off] {sym} ptr x mem) 1009 (SETGEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLEstore [off] {sym} ptr x mem) 1010 (SETBEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAEstore [off] {sym} ptr x mem) 1011 (SETAEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBEstore [off] {sym} ptr x mem) 1012 (SETEQstore [off] {sym} ptr (InvertFlags x) mem) -> (SETEQstore [off] {sym} ptr x mem) 1013 (SETNEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETNEstore [off] {sym} ptr x mem) 1014 1015 // sign extended loads 1016 // Note: The combined instruction must end up in the same block 1017 // as the original load. If not, we end up making a value with 1018 // memory type live in two different blocks, which can lead to 1019 // multiple memory values alive simultaneously. 1020 // Make sure we don't combine these ops if the load has another use. 1021 // This prevents a single load from being split into multiple loads 1022 // which then might return different values. See test/atomicload.go. 1023 (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1024 (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1025 (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1026 (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1027 (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1028 (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1029 (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1030 (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1031 (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1032 (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1033 (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1034 (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1035 (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1036 (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1037 (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1038 (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1039 (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1040 (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1041 1042 (MOVLQZX x) && zeroUpper32Bits(x,3) -> x 1043 (MOVWQZX x) && zeroUpper48Bits(x,3) -> x 1044 (MOVBQZX x) && zeroUpper56Bits(x,3) -> x 1045 1046 (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 <v.Type> [off] {sym} ptr idx mem) 1047 (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 <v.Type> [off] {sym} ptr idx mem) 1048 (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 <v.Type> [off] {sym} ptr idx mem) 1049 (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 <v.Type> [off] {sym} ptr idx mem) 1050 (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 <v.Type> [off] {sym} ptr idx mem) 1051 1052 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) 1053 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x) 1054 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x) 1055 (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQZX x) 1056 (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x 1057 (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQSX x) 1058 (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQSX x) 1059 (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQSX x) 1060 1061 // Fold extensions and ANDs together. 1062 (MOVBQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x) 1063 (MOVWQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x) 1064 (MOVLQZX (ANDLconst [c] x)) -> (ANDLconst [c] x) 1065 (MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x) 1066 (MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x) 1067 (MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDLconst [c & 0x7fffffff] x) 1068 1069 // Don't extend before storing 1070 (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem) 1071 (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) -> (MOVWstore [off] {sym} ptr x mem) 1072 (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) -> (MOVBstore [off] {sym} ptr x mem) 1073 (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) -> (MOVLstore [off] {sym} ptr x mem) 1074 (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) -> (MOVWstore [off] {sym} ptr x mem) 1075 (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) -> (MOVBstore [off] {sym} ptr x mem) 1076 1077 // fold constants into memory operations 1078 // Note that this is not always a good idea because if not all the uses of 1079 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now 1080 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. 1081 // Nevertheless, let's do it! 1082 (MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 1083 (MOV(Q|L|W|B|SS|SD|O)load [off1+off2] {sym} ptr mem) 1084 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(off1+off2) -> 1085 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {sym} ptr val mem) 1086 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1087 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem) 1088 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1089 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem) 1090 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1091 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) 1092 (CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1093 (CMP(Q|L|W|B)load [off1+off2] {sym} base val mem) 1094 (CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1095 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {sym} base mem) 1096 1097 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1098 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) 1099 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) -> 1100 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) 1101 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1102 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) 1103 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) -> 1104 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem) 1105 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1106 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem) 1107 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) -> 1108 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem) 1109 1110 // Fold constants into stores. 1111 (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> 1112 (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) 1113 (MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1114 (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) 1115 (MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1116 (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) 1117 (MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) -> 1118 (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem) 1119 1120 // Fold address offsets into constant stores. 1121 (MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 1122 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem) 1123 1124 // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows 1125 // what variables are being read/written by the ops. 1126 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem) 1127 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1128 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem) 1129 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1130 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1131 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1132 (MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 1133 (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 1134 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1135 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1136 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1137 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1138 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1139 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1140 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1141 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1142 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1143 (CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1144 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1145 (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1146 (CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1147 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1148 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1149 1150 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1151 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1152 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1153 ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1154 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1155 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1156 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1157 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1158 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1159 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1160 && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> 1161 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) 1162 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1163 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1164 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1165 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1166 && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1167 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1168 1169 // generating indexed loads and stores 1170 (MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1171 (MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1172 (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1173 (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1174 (MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1175 (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1176 (MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1177 (MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) 1178 1179 (MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1180 (MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1181 (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1182 (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1183 (MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1184 (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1185 (MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1186 (MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) 1187 1188 (MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB -> 1189 (MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem) 1190 (MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB -> 1191 (MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem) 1192 1193 (MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1194 (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1195 (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1196 (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1197 (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1198 (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1199 (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> 1200 (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) 1201 1202 (MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem) 1203 1204 // combine SHLQ into indexed loads and stores 1205 (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem) 1206 (MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem) 1207 (MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem) 1208 1209 (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem) 1210 (MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem) 1211 (MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem) 1212 (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem) 1213 (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem) 1214 (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem) 1215 1216 // combine ADDQ into pointer of indexed loads and stores 1217 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) 1218 (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem) 1219 (MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem) 1220 (MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem) 1221 1222 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) 1223 (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem) 1224 (MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem) 1225 (MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem) 1226 1227 1228 // combine ADDQ into index of indexed loads and stores 1229 (MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) 1230 (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem) 1231 (MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem) 1232 (MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem) 1233 1234 (MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) 1235 (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem) 1236 (MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem) 1237 (MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem) 1238 1239 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1240 (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1241 (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1242 (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1243 1244 (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) 1245 (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) 1246 (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) 1247 (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem) 1248 1249 // fold LEAQs together 1250 (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1251 (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) 1252 1253 // LEAQ into LEAQ1 1254 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1255 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1256 1257 // LEAQ1 into LEAQ 1258 (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1259 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1260 1261 // LEAQ into LEAQ[248] 1262 (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1263 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1264 (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1265 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1266 (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> 1267 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1268 1269 // LEAQ[248] into LEAQ 1270 (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1271 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1272 (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1273 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1274 (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 1275 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1276 1277 // Absorb InvertFlags into branches. 1278 (LT (InvertFlags cmp) yes no) -> (GT cmp yes no) 1279 (GT (InvertFlags cmp) yes no) -> (LT cmp yes no) 1280 (LE (InvertFlags cmp) yes no) -> (GE cmp yes no) 1281 (GE (InvertFlags cmp) yes no) -> (LE cmp yes no) 1282 (ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no) 1283 (UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no) 1284 (ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no) 1285 (UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no) 1286 (EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no) 1287 (NE (InvertFlags cmp) yes no) -> (NE cmp yes no) 1288 1289 // Constant comparisons. 1290 (CMPQconst (MOVQconst [x]) [y]) && x==y -> (FlagEQ) 1291 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT) 1292 (CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT) 1293 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT) 1294 (CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT) 1295 (CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ) 1296 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT) 1297 (CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT) 1298 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT) 1299 (CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT) 1300 (CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ) 1301 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT) 1302 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT) 1303 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT) 1304 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT) 1305 (CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ) 1306 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT) 1307 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT) 1308 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT) 1309 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) 1310 1311 // Other known comparisons. 1312 (CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT) 1313 (CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT) 1314 (CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT) 1315 (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT) 1316 (CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT) 1317 (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) 1318 (CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) 1319 (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT) 1320 (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) 1321 (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT) 1322 1323 // TODO: DIVxU also. 1324 1325 // Absorb flag constants into SBB ops. 1326 (SBBQcarrymask (FlagEQ)) -> (MOVQconst [0]) 1327 (SBBQcarrymask (FlagLT_ULT)) -> (MOVQconst [-1]) 1328 (SBBQcarrymask (FlagLT_UGT)) -> (MOVQconst [0]) 1329 (SBBQcarrymask (FlagGT_ULT)) -> (MOVQconst [-1]) 1330 (SBBQcarrymask (FlagGT_UGT)) -> (MOVQconst [0]) 1331 (SBBLcarrymask (FlagEQ)) -> (MOVLconst [0]) 1332 (SBBLcarrymask (FlagLT_ULT)) -> (MOVLconst [-1]) 1333 (SBBLcarrymask (FlagLT_UGT)) -> (MOVLconst [0]) 1334 (SBBLcarrymask (FlagGT_ULT)) -> (MOVLconst [-1]) 1335 (SBBLcarrymask (FlagGT_UGT)) -> (MOVLconst [0]) 1336 1337 // Absorb flag constants into branches. 1338 ((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) -> (First nil yes no) 1339 ((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) -> (First nil no yes) 1340 ((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) -> (First nil yes no) 1341 ((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) -> (First nil no yes) 1342 ((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) -> (First nil yes no) 1343 ((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) -> (First nil no yes) 1344 ((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) -> (First nil yes no) 1345 ((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) -> (First nil no yes) 1346 ((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) -> (First nil yes no) 1347 ((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) -> (First nil no yes) 1348 1349 // Absorb flag constants into SETxx ops. 1350 ((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) -> (MOVLconst [1]) 1351 ((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) -> (MOVLconst [0]) 1352 ((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) -> (MOVLconst [1]) 1353 ((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) -> (MOVLconst [0]) 1354 ((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) -> (MOVLconst [1]) 1355 ((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) -> (MOVLconst [0]) 1356 ((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) -> (MOVLconst [1]) 1357 ((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) -> (MOVLconst [0]) 1358 ((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) -> (MOVLconst [1]) 1359 ((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) -> (MOVLconst [0]) 1360 1361 (SETEQstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1362 (SETEQstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1363 (SETEQstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1364 (SETEQstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1365 (SETEQstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1366 1367 (SETNEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1368 (SETNEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1369 (SETNEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1370 (SETNEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1371 (SETNEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1372 1373 (SETLstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1374 (SETLstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1375 (SETLstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1376 (SETLstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1377 (SETLstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1378 1379 (SETLEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1380 (SETLEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1381 (SETLEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1382 (SETLEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1383 (SETLEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1384 1385 (SETGstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1386 (SETGstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1387 (SETGstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1388 (SETGstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1389 (SETGstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1390 1391 (SETGEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1392 (SETGEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1393 (SETGEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1394 (SETGEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1395 (SETGEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1396 1397 (SETBstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1398 (SETBstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1399 (SETBstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1400 (SETBstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1401 (SETBstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1402 1403 (SETBEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1404 (SETBEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1405 (SETBEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1406 (SETBEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1407 (SETBEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1408 1409 (SETAstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1410 (SETAstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1411 (SETAstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1412 (SETAstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1413 (SETAstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1414 1415 (SETAEstore [off] {sym} ptr x:(FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1416 (SETAEstore [off] {sym} ptr x:(FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1417 (SETAEstore [off] {sym} ptr x:(FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1418 (SETAEstore [off] {sym} ptr x:(FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [0]) mem) 1419 (SETAEstore [off] {sym} ptr x:(FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <x.Type> [1]) mem) 1420 1421 // Remove redundant *const ops 1422 (ADDQconst [0] x) -> x 1423 (ADDLconst [c] x) && int32(c)==0 -> x 1424 (SUBQconst [0] x) -> x 1425 (SUBLconst [c] x) && int32(c) == 0 -> x 1426 (ANDQconst [0] _) -> (MOVQconst [0]) 1427 (ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0]) 1428 (ANDQconst [-1] x) -> x 1429 (ANDLconst [c] x) && int32(c)==-1 -> x 1430 (ORQconst [0] x) -> x 1431 (ORLconst [c] x) && int32(c)==0 -> x 1432 (ORQconst [-1] _) -> (MOVQconst [-1]) 1433 (ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1]) 1434 (XORQconst [0] x) -> x 1435 (XORLconst [c] x) && int32(c)==0 -> x 1436 // TODO: since we got rid of the W/B versions, we might miss 1437 // things like (ANDLconst [0x100] x) which were formerly 1438 // (ANDBconst [0] x). Probably doesn't happen very often. 1439 // If we cared, we might do: 1440 // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0]) 1441 1442 // Remove redundant ops 1443 // Not in generic rules, because they may appear after lowering e. g. Slicemask 1444 (NEG(Q|L) (NEG(Q|L) x)) -> x 1445 1446 // Convert constant subtracts to constant adds 1447 (SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x) 1448 (SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x) 1449 1450 // generic constant folding 1451 // TODO: more of this 1452 (ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d]) 1453 (ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c+d))]) 1454 (ADDQconst [c] (ADDQconst [d] x)) && is32Bit(c+d) -> (ADDQconst [c+d] x) 1455 (ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [int64(int32(c+d))] x) 1456 (SUBQconst (MOVQconst [d]) [c]) -> (MOVQconst [d-c]) 1457 (SUBQconst (SUBQconst x [d]) [c]) && is32Bit(-c-d) -> (ADDQconst [-c-d] x) 1458 (SARQconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) 1459 (SARLconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int32(d))>>uint64(c)]) 1460 (SARWconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int16(d))>>uint64(c)]) 1461 (SARBconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int8(d))>>uint64(c)]) 1462 (NEGQ (MOVQconst [c])) -> (MOVQconst [-c]) 1463 (NEGL (MOVLconst [c])) -> (MOVLconst [int64(int32(-c))]) 1464 (MULQconst [c] (MOVQconst [d])) -> (MOVQconst [c*d]) 1465 (MULLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c*d))]) 1466 (ANDQconst [c] (MOVQconst [d])) -> (MOVQconst [c&d]) 1467 (ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d]) 1468 (ORQconst [c] (MOVQconst [d])) -> (MOVQconst [c|d]) 1469 (ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d]) 1470 (XORQconst [c] (MOVQconst [d])) -> (MOVQconst [c^d]) 1471 (XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) 1472 (NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) 1473 (NOTL (MOVLconst [c])) -> (MOVLconst [^c]) 1474 (BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))]) 1475 (BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))]) 1476 (BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))]) 1477 (BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))]) 1478 (BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))]) 1479 (BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))]) 1480 1481 // generic simplifications 1482 // TODO: more of this 1483 (ADDQ x (NEGQ y)) -> (SUBQ x y) 1484 (ADDL x (NEGL y)) -> (SUBL x y) 1485 (SUBQ x x) -> (MOVQconst [0]) 1486 (SUBL x x) -> (MOVLconst [0]) 1487 (ANDQ x x) -> x 1488 (ANDL x x) -> x 1489 (ORQ x x) -> x 1490 (ORL x x) -> x 1491 (XORQ x x) -> (MOVQconst [0]) 1492 (XORL x x) -> (MOVLconst [0]) 1493 (NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) -> (ADDQconst [-c] x) 1494 1495 // checking AND against 0. 1496 (CMPQconst (ANDQ x y) [0]) -> (TESTQ x y) 1497 (CMPLconst (ANDL x y) [0]) -> (TESTL x y) 1498 (CMPWconst (ANDL x y) [0]) -> (TESTW x y) 1499 (CMPBconst (ANDL x y) [0]) -> (TESTB x y) 1500 (CMPQconst (ANDQconst [c] x) [0]) -> (TESTQconst [c] x) 1501 (CMPLconst (ANDLconst [c] x) [0]) -> (TESTLconst [c] x) 1502 (CMPWconst (ANDLconst [c] x) [0]) -> (TESTWconst [int64(int16(c))] x) 1503 (CMPBconst (ANDLconst [c] x) [0]) -> (TESTBconst [int64(int8(c))] x) 1504 1505 // Convert TESTx to TESTxconst if possible. 1506 (TESTQ (MOVQconst [c]) x) && is32Bit(c) -> (TESTQconst [c] x) 1507 (TESTL (MOVLconst [c]) x) -> (TESTLconst [c] x) 1508 (TESTW (MOVLconst [c]) x) -> (TESTWconst [c] x) 1509 (TESTB (MOVLconst [c]) x) -> (TESTBconst [c] x) 1510 1511 // TEST %reg,%reg is shorter than CMP 1512 (CMPQconst x [0]) -> (TESTQ x x) 1513 (CMPLconst x [0]) -> (TESTL x x) 1514 (CMPWconst x [0]) -> (TESTW x x) 1515 (CMPBconst x [0]) -> (TESTB x x) 1516 (TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst -> (TESTQ x x) 1517 (TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTL x x) 1518 (TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTW x x) 1519 (TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTB x x) 1520 1521 // Combining byte loads into larger (unaligned) loads. 1522 // There are many ways these combinations could occur. This is 1523 // designed to match the way encoding/binary.LittleEndian does it. 1524 1525 // Little-endian loads 1526 1527 (ORL x0:(MOVBload [i0] {s} p mem) 1528 sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) 1529 && i1 == i0+1 1530 && x0.Uses == 1 1531 && x1.Uses == 1 1532 && sh.Uses == 1 1533 && mergePoint(b,x0,x1) != nil 1534 && clobber(x0) 1535 && clobber(x1) 1536 && clobber(sh) 1537 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) 1538 1539 (ORQ x0:(MOVBload [i0] {s} p mem) 1540 sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem))) 1541 && i1 == i0+1 1542 && x0.Uses == 1 1543 && x1.Uses == 1 1544 && sh.Uses == 1 1545 && mergePoint(b,x0,x1) != nil 1546 && clobber(x0) 1547 && clobber(x1) 1548 && clobber(sh) 1549 -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) 1550 1551 (ORL x0:(MOVWload [i0] {s} p mem) 1552 sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem))) 1553 && i1 == i0+2 1554 && x0.Uses == 1 1555 && x1.Uses == 1 1556 && sh.Uses == 1 1557 && mergePoint(b,x0,x1) != nil 1558 && clobber(x0) 1559 && clobber(x1) 1560 && clobber(sh) 1561 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) 1562 1563 (ORQ x0:(MOVWload [i0] {s} p mem) 1564 sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem))) 1565 && i1 == i0+2 1566 && x0.Uses == 1 1567 && x1.Uses == 1 1568 && sh.Uses == 1 1569 && mergePoint(b,x0,x1) != nil 1570 && clobber(x0) 1571 && clobber(x1) 1572 && clobber(sh) 1573 -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) 1574 1575 (ORQ x0:(MOVLload [i0] {s} p mem) 1576 sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) 1577 && i1 == i0+4 1578 && x0.Uses == 1 1579 && x1.Uses == 1 1580 && sh.Uses == 1 1581 && mergePoint(b,x0,x1) != nil 1582 && clobber(x0) 1583 && clobber(x1) 1584 && clobber(sh) 1585 -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) 1586 1587 (ORL 1588 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) 1589 or:(ORL 1590 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) 1591 y)) 1592 && i1 == i0+1 1593 && j1 == j0+8 1594 && j0 % 16 == 0 1595 && x0.Uses == 1 1596 && x1.Uses == 1 1597 && s0.Uses == 1 1598 && s1.Uses == 1 1599 && or.Uses == 1 1600 && mergePoint(b,x0,x1) != nil 1601 && clobber(x0) 1602 && clobber(x1) 1603 && clobber(s0) 1604 && clobber(s1) 1605 && clobber(or) 1606 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) 1607 1608 (ORQ 1609 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) 1610 or:(ORQ 1611 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) 1612 y)) 1613 && i1 == i0+1 1614 && j1 == j0+8 1615 && j0 % 16 == 0 1616 && x0.Uses == 1 1617 && x1.Uses == 1 1618 && s0.Uses == 1 1619 && s1.Uses == 1 1620 && or.Uses == 1 1621 && mergePoint(b,x0,x1) != nil 1622 && clobber(x0) 1623 && clobber(x1) 1624 && clobber(s0) 1625 && clobber(s1) 1626 && clobber(or) 1627 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) 1628 1629 (ORQ 1630 s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) 1631 or:(ORQ 1632 s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) 1633 y)) 1634 && i1 == i0+2 1635 && j1 == j0+16 1636 && j0 % 32 == 0 1637 && x0.Uses == 1 1638 && x1.Uses == 1 1639 && s0.Uses == 1 1640 && s1.Uses == 1 1641 && or.Uses == 1 1642 && mergePoint(b,x0,x1) != nil 1643 && clobber(x0) 1644 && clobber(x1) 1645 && clobber(s0) 1646 && clobber(s1) 1647 && clobber(or) 1648 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) 1649 1650 // Little-endian indexed loads 1651 1652 (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) 1653 sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) 1654 && i1 == i0+1 1655 && x0.Uses == 1 1656 && x1.Uses == 1 1657 && sh.Uses == 1 1658 && mergePoint(b,x0,x1) != nil 1659 && clobber(x0) 1660 && clobber(x1) 1661 && clobber(sh) 1662 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem) 1663 1664 (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) 1665 sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) 1666 && i1 == i0+1 1667 && x0.Uses == 1 1668 && x1.Uses == 1 1669 && sh.Uses == 1 1670 && mergePoint(b,x0,x1) != nil 1671 && clobber(x0) 1672 && clobber(x1) 1673 && clobber(sh) 1674 -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i0] {s} p idx mem) 1675 1676 (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) 1677 sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 1678 && i1 == i0+2 1679 && x0.Uses == 1 1680 && x1.Uses == 1 1681 && sh.Uses == 1 1682 && mergePoint(b,x0,x1) != nil 1683 && clobber(x0) 1684 && clobber(x1) 1685 && clobber(sh) 1686 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) 1687 1688 (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) 1689 sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 1690 && i1 == i0+2 1691 && x0.Uses == 1 1692 && x1.Uses == 1 1693 && sh.Uses == 1 1694 && mergePoint(b,x0,x1) != nil 1695 && clobber(x0) 1696 && clobber(x1) 1697 && clobber(sh) 1698 -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) 1699 1700 (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) 1701 sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem))) 1702 && i1 == i0+4 1703 && x0.Uses == 1 1704 && x1.Uses == 1 1705 && sh.Uses == 1 1706 && mergePoint(b,x0,x1) != nil 1707 && clobber(x0) 1708 && clobber(x1) 1709 && clobber(sh) 1710 -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem) 1711 1712 (ORL 1713 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1714 or:(ORL 1715 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1716 y)) 1717 && i1 == i0+1 1718 && j1 == j0+8 1719 && j0 % 16 == 0 1720 && x0.Uses == 1 1721 && x1.Uses == 1 1722 && s0.Uses == 1 1723 && s1.Uses == 1 1724 && or.Uses == 1 1725 && mergePoint(b,x0,x1) != nil 1726 && clobber(x0) 1727 && clobber(x1) 1728 && clobber(s0) 1729 && clobber(s1) 1730 && clobber(or) 1731 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) 1732 1733 (ORQ 1734 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 1735 or:(ORQ 1736 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 1737 y)) 1738 && i1 == i0+1 1739 && j1 == j0+8 1740 && j0 % 16 == 0 1741 && x0.Uses == 1 1742 && x1.Uses == 1 1743 && s0.Uses == 1 1744 && s1.Uses == 1 1745 && or.Uses == 1 1746 && mergePoint(b,x0,x1) != nil 1747 && clobber(x0) 1748 && clobber(x1) 1749 && clobber(s0) 1750 && clobber(s1) 1751 && clobber(or) 1752 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) 1753 1754 (ORQ 1755 s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1756 or:(ORQ 1757 s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) 1758 y)) 1759 && i1 == i0+2 1760 && j1 == j0+16 1761 && j0 % 32 == 0 1762 && x0.Uses == 1 1763 && x1.Uses == 1 1764 && s0.Uses == 1 1765 && s1.Uses == 1 1766 && or.Uses == 1 1767 && mergePoint(b,x0,x1) != nil 1768 && clobber(x0) 1769 && clobber(x1) 1770 && clobber(s0) 1771 && clobber(s1) 1772 && clobber(or) 1773 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y) 1774 1775 // Big-endian loads 1776 1777 (ORL 1778 x1:(MOVBload [i1] {s} p mem) 1779 sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem))) 1780 && i1 == i0+1 1781 && x0.Uses == 1 1782 && x1.Uses == 1 1783 && sh.Uses == 1 1784 && mergePoint(b,x0,x1) != nil 1785 && clobber(x0) 1786 && clobber(x1) 1787 && clobber(sh) 1788 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) 1789 1790 (ORQ 1791 x1:(MOVBload [i1] {s} p mem) 1792 sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem))) 1793 && i1 == i0+1 1794 && x0.Uses == 1 1795 && x1.Uses == 1 1796 && sh.Uses == 1 1797 && mergePoint(b,x0,x1) != nil 1798 && clobber(x0) 1799 && clobber(x1) 1800 && clobber(sh) 1801 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) 1802 1803 (ORL 1804 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) 1805 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) 1806 && i1 == i0+2 1807 && x0.Uses == 1 1808 && x1.Uses == 1 1809 && r0.Uses == 1 1810 && r1.Uses == 1 1811 && sh.Uses == 1 1812 && mergePoint(b,x0,x1) != nil 1813 && clobber(x0) 1814 && clobber(x1) 1815 && clobber(r0) 1816 && clobber(r1) 1817 && clobber(sh) 1818 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) 1819 1820 (ORQ 1821 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) 1822 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) 1823 && i1 == i0+2 1824 && x0.Uses == 1 1825 && x1.Uses == 1 1826 && r0.Uses == 1 1827 && r1.Uses == 1 1828 && sh.Uses == 1 1829 && mergePoint(b,x0,x1) != nil 1830 && clobber(x0) 1831 && clobber(x1) 1832 && clobber(r0) 1833 && clobber(r1) 1834 && clobber(sh) 1835 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) 1836 1837 (ORQ 1838 r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) 1839 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) 1840 && i1 == i0+4 1841 && x0.Uses == 1 1842 && x1.Uses == 1 1843 && r0.Uses == 1 1844 && r1.Uses == 1 1845 && sh.Uses == 1 1846 && mergePoint(b,x0,x1) != nil 1847 && clobber(x0) 1848 && clobber(x1) 1849 && clobber(r0) 1850 && clobber(r1) 1851 && clobber(sh) 1852 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) 1853 1854 (ORL 1855 s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) 1856 or:(ORL 1857 s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) 1858 y)) 1859 && i1 == i0+1 1860 && j1 == j0-8 1861 && j1 % 16 == 0 1862 && x0.Uses == 1 1863 && x1.Uses == 1 1864 && s0.Uses == 1 1865 && s1.Uses == 1 1866 && or.Uses == 1 1867 && mergePoint(b,x0,x1) != nil 1868 && clobber(x0) 1869 && clobber(x1) 1870 && clobber(s0) 1871 && clobber(s1) 1872 && clobber(or) 1873 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) 1874 1875 (ORQ 1876 s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) 1877 or:(ORQ 1878 s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) 1879 y)) 1880 && i1 == i0+1 1881 && j1 == j0-8 1882 && j1 % 16 == 0 1883 && x0.Uses == 1 1884 && x1.Uses == 1 1885 && s0.Uses == 1 1886 && s1.Uses == 1 1887 && or.Uses == 1 1888 && mergePoint(b,x0,x1) != nil 1889 && clobber(x0) 1890 && clobber(x1) 1891 && clobber(s0) 1892 && clobber(s1) 1893 && clobber(or) 1894 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) 1895 1896 (ORQ 1897 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) 1898 or:(ORQ 1899 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) 1900 y)) 1901 && i1 == i0+2 1902 && j1 == j0-16 1903 && j1 % 32 == 0 1904 && x0.Uses == 1 1905 && x1.Uses == 1 1906 && r0.Uses == 1 1907 && r1.Uses == 1 1908 && s0.Uses == 1 1909 && s1.Uses == 1 1910 && or.Uses == 1 1911 && mergePoint(b,x0,x1) != nil 1912 && clobber(x0) 1913 && clobber(x1) 1914 && clobber(r0) 1915 && clobber(r1) 1916 && clobber(s0) 1917 && clobber(s1) 1918 && clobber(or) 1919 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) 1920 1921 // Big-endian indexed loads 1922 1923 (ORL 1924 x1:(MOVBloadidx1 [i1] {s} p idx mem) 1925 sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) 1926 && i1 == i0+1 1927 && x0.Uses == 1 1928 && x1.Uses == 1 1929 && sh.Uses == 1 1930 && mergePoint(b,x0,x1) != nil 1931 && clobber(x0) 1932 && clobber(x1) 1933 && clobber(sh) 1934 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem)) 1935 1936 (ORQ 1937 x1:(MOVBloadidx1 [i1] {s} p idx mem) 1938 sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) 1939 && i1 == i0+1 1940 && x0.Uses == 1 1941 && x1.Uses == 1 1942 && sh.Uses == 1 1943 && mergePoint(b,x0,x1) != nil 1944 && clobber(x0) 1945 && clobber(x1) 1946 && clobber(sh) 1947 -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWloadidx1 [i0] {s} p idx mem)) 1948 1949 (ORL 1950 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1951 sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) 1952 && i1 == i0+2 1953 && x0.Uses == 1 1954 && x1.Uses == 1 1955 && r0.Uses == 1 1956 && r1.Uses == 1 1957 && sh.Uses == 1 1958 && mergePoint(b,x0,x1) != nil 1959 && clobber(x0) 1960 && clobber(x1) 1961 && clobber(r0) 1962 && clobber(r1) 1963 && clobber(sh) 1964 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem)) 1965 1966 (ORQ 1967 r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) 1968 sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) 1969 && i1 == i0+2 1970 && x0.Uses == 1 1971 && x1.Uses == 1 1972 && r0.Uses == 1 1973 && r1.Uses == 1 1974 && sh.Uses == 1 1975 && mergePoint(b,x0,x1) != nil 1976 && clobber(x0) 1977 && clobber(x1) 1978 && clobber(r0) 1979 && clobber(r1) 1980 && clobber(sh) 1981 -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLloadidx1 [i0] {s} p idx mem)) 1982 1983 (ORQ 1984 r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) 1985 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem)))) 1986 && i1 == i0+4 1987 && x0.Uses == 1 1988 && x1.Uses == 1 1989 && r0.Uses == 1 1990 && r1.Uses == 1 1991 && sh.Uses == 1 1992 && mergePoint(b,x0,x1) != nil 1993 && clobber(x0) 1994 && clobber(x1) 1995 && clobber(r0) 1996 && clobber(r1) 1997 && clobber(sh) 1998 -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQloadidx1 [i0] {s} p idx mem)) 1999 2000 (ORL 2001 s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 2002 or:(ORL 2003 s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 2004 y)) 2005 && i1 == i0+1 2006 && j1 == j0-8 2007 && j1 % 16 == 0 2008 && x0.Uses == 1 2009 && x1.Uses == 1 2010 && s0.Uses == 1 2011 && s1.Uses == 1 2012 && or.Uses == 1 2013 && mergePoint(b,x0,x1) != nil 2014 && clobber(x0) 2015 && clobber(x1) 2016 && clobber(s0) 2017 && clobber(s1) 2018 && clobber(or) 2019 -> @mergePoint(b,x0,x1) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) 2020 2021 (ORQ 2022 s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) 2023 or:(ORQ 2024 s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) 2025 y)) 2026 && i1 == i0+1 2027 && j1 == j0-8 2028 && j1 % 16 == 0 2029 && x0.Uses == 1 2030 && x1.Uses == 1 2031 && s0.Uses == 1 2032 && s1.Uses == 1 2033 && or.Uses == 1 2034 && mergePoint(b,x0,x1) != nil 2035 && clobber(x0) 2036 && clobber(x1) 2037 && clobber(s0) 2038 && clobber(s1) 2039 && clobber(or) 2040 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) 2041 2042 (ORQ 2043 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) 2044 or:(ORQ 2045 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) 2046 y)) 2047 && i1 == i0+2 2048 && j1 == j0-16 2049 && j1 % 32 == 0 2050 && x0.Uses == 1 2051 && x1.Uses == 1 2052 && r0.Uses == 1 2053 && r1.Uses == 1 2054 && s0.Uses == 1 2055 && s1.Uses == 1 2056 && or.Uses == 1 2057 && mergePoint(b,x0,x1) != nil 2058 && clobber(x0) 2059 && clobber(x1) 2060 && clobber(r0) 2061 && clobber(r1) 2062 && clobber(s0) 2063 && clobber(s1) 2064 && clobber(or) 2065 -> @mergePoint(b,x0,x1) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLloadidx1 [i0] {s} p idx mem))) y) 2066 2067 // Combine 2 byte stores + shift into rolw 8 + word store 2068 (MOVBstore [i] {s} p w 2069 x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) 2070 && x0.Uses == 1 2071 && clobber(x0) 2072 -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem) 2073 2074 (MOVBstoreidx1 [i] {s} p idx w 2075 x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem)) 2076 && x0.Uses == 1 2077 && clobber(x0) 2078 -> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst <w.Type> [8] w) mem) 2079 2080 // Combine stores + shifts into bswap and larger (unaligned) stores 2081 (MOVBstore [i] {s} p w 2082 x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) 2083 x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) 2084 x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) 2085 && x0.Uses == 1 2086 && x1.Uses == 1 2087 && x2.Uses == 1 2088 && clobber(x0) 2089 && clobber(x1) 2090 && clobber(x2) 2091 -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem) 2092 2093 (MOVBstoreidx1 [i] {s} p idx w 2094 x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) 2095 x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) 2096 x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem)))) 2097 && x0.Uses == 1 2098 && x1.Uses == 1 2099 && x2.Uses == 1 2100 && clobber(x0) 2101 && clobber(x1) 2102 && clobber(x2) 2103 -> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL <w.Type> w) mem) 2104 2105 (MOVBstore [i] {s} p w 2106 x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) 2107 x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) 2108 x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) 2109 x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) 2110 x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) 2111 x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) 2112 x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) 2113 && x0.Uses == 1 2114 && x1.Uses == 1 2115 && x2.Uses == 1 2116 && x3.Uses == 1 2117 && x4.Uses == 1 2118 && x5.Uses == 1 2119 && x6.Uses == 1 2120 && clobber(x0) 2121 && clobber(x1) 2122 && clobber(x2) 2123 && clobber(x3) 2124 && clobber(x4) 2125 && clobber(x5) 2126 && clobber(x6) 2127 -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem) 2128 2129 (MOVBstoreidx1 [i] {s} p idx w 2130 x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) 2131 x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) 2132 x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) 2133 x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) 2134 x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) 2135 x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) 2136 x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem)))))))) 2137 && x0.Uses == 1 2138 && x1.Uses == 1 2139 && x2.Uses == 1 2140 && x3.Uses == 1 2141 && x4.Uses == 1 2142 && x5.Uses == 1 2143 && x6.Uses == 1 2144 && clobber(x0) 2145 && clobber(x1) 2146 && clobber(x2) 2147 && clobber(x3) 2148 && clobber(x4) 2149 && clobber(x5) 2150 && clobber(x6) 2151 -> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ <w.Type> w) mem) 2152 2153 // Combine constant stores into larger (unaligned) stores. 2154 (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) 2155 && x.Uses == 1 2156 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2157 && clobber(x) 2158 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) 2159 (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) 2160 && x.Uses == 1 2161 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2162 && clobber(x) 2163 -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) 2164 (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) 2165 && x.Uses == 1 2166 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2167 && clobber(x) 2168 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) 2169 (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) 2170 && x.Uses == 1 2171 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2172 && clobber(x) 2173 -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) 2174 (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) 2175 && x.Uses == 1 2176 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2177 && clobber(x) 2178 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2179 (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) 2180 && x.Uses == 1 2181 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2182 && clobber(x) 2183 -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2184 (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) 2185 && config.useSSE 2186 && x.Uses == 1 2187 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() 2188 && ValAndOff(c).Val() == 0 2189 && ValAndOff(c2).Val() == 0 2190 && clobber(x) 2191 -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) 2192 2193 (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) 2194 && x.Uses == 1 2195 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() 2196 && clobber(x) 2197 -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) 2198 (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) 2199 && x.Uses == 1 2200 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2201 && clobber(x) 2202 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) 2203 (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem)) 2204 && x.Uses == 1 2205 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2206 && clobber(x) 2207 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2208 2209 (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) 2210 && x.Uses == 1 2211 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() 2212 && clobber(x) 2213 -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst <i.Type> [1] i) mem) 2214 (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem)) 2215 && x.Uses == 1 2216 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() 2217 && clobber(x) 2218 -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst <i.Type> [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) 2219 2220 // Combine stores into larger (unaligned) stores. 2221 (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) 2222 && x.Uses == 1 2223 && clobber(x) 2224 -> (MOVWstore [i-1] {s} p w mem) 2225 (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) 2226 && x.Uses == 1 2227 && clobber(x) 2228 -> (MOVWstore [i] {s} p w mem) 2229 (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) 2230 && x.Uses == 1 2231 && clobber(x) 2232 -> (MOVWstore [i-1] {s} p w0 mem) 2233 (MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) 2234 && x.Uses == 1 2235 && clobber(x) 2236 -> (MOVLstore [i-2] {s} p w mem) 2237 (MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) 2238 && x.Uses == 1 2239 && clobber(x) 2240 -> (MOVLstore [i-2] {s} p w0 mem) 2241 (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) 2242 && x.Uses == 1 2243 && clobber(x) 2244 -> (MOVQstore [i-4] {s} p w mem) 2245 (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) 2246 && x.Uses == 1 2247 && clobber(x) 2248 -> (MOVQstore [i-4] {s} p w0 mem) 2249 2250 (MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) 2251 && x.Uses == 1 2252 && clobber(x) 2253 -> (MOVWstoreidx1 [i-1] {s} p idx w mem) 2254 (MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem)) 2255 && x.Uses == 1 2256 && clobber(x) 2257 -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) 2258 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) 2259 && x.Uses == 1 2260 && clobber(x) 2261 -> (MOVLstoreidx1 [i-2] {s} p idx w mem) 2262 (MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem)) 2263 && x.Uses == 1 2264 && clobber(x) 2265 -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) 2266 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem)) 2267 && x.Uses == 1 2268 && clobber(x) 2269 -> (MOVQstoreidx1 [i-4] {s} p idx w mem) 2270 (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) 2271 && x.Uses == 1 2272 && clobber(x) 2273 -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem) 2274 2275 (MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) 2276 && x.Uses == 1 2277 && clobber(x) 2278 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w mem) 2279 (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) 2280 && x.Uses == 1 2281 && clobber(x) 2282 -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst <idx.Type> [1] idx) w0 mem) 2283 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem)) 2284 && x.Uses == 1 2285 && clobber(x) 2286 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w mem) 2287 (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) 2288 && x.Uses == 1 2289 && clobber(x) 2290 -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst <idx.Type> [2] idx) w0 mem) 2291 2292 (MOVBstore [i] {s} p 2293 x1:(MOVBload [j] {s2} p2 mem) 2294 mem2:(MOVBstore [i-1] {s} p 2295 x2:(MOVBload [j-1] {s2} p2 mem) mem)) 2296 && x1.Uses == 1 2297 && x2.Uses == 1 2298 && mem2.Uses == 1 2299 && clobber(x1) 2300 && clobber(x2) 2301 && clobber(mem2) 2302 -> (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem) 2303 2304 (MOVWstore [i] {s} p 2305 x1:(MOVWload [j] {s2} p2 mem) 2306 mem2:(MOVWstore [i-2] {s} p 2307 x2:(MOVWload [j-2] {s2} p2 mem) mem)) 2308 && x1.Uses == 1 2309 && x2.Uses == 1 2310 && mem2.Uses == 1 2311 && clobber(x1) 2312 && clobber(x2) 2313 && clobber(mem2) 2314 -> (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) 2315 2316 (MOVLstore [i] {s} p 2317 x1:(MOVLload [j] {s2} p2 mem) 2318 mem2:(MOVLstore [i-4] {s} p 2319 x2:(MOVLload [j-4] {s2} p2 mem) mem)) 2320 && x1.Uses == 1 2321 && x2.Uses == 1 2322 && mem2.Uses == 1 2323 && clobber(x1) 2324 && clobber(x2) 2325 && clobber(mem2) 2326 -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem) 2327 2328 (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2329 (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2330 (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2331 (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2332 (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2333 (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2334 (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2335 (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) 2336 2337 (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2338 (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2339 (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2340 (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2341 (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2342 (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2343 (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> 2344 (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) 2345 2346 (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2347 (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2348 (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2349 (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2350 (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2351 (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2352 (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> 2353 (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) 2354 2355 (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload [off1+off2] {sym} ptr mem) 2356 (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload [off1+off2] {sym} ptr mem) 2357 (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem) 2358 (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem) 2359 (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore [off1+off2] {sym} ptr val mem) 2360 (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem) 2361 (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem) 2362 (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem) 2363 (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2364 (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2365 (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2366 (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2367 (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2368 (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2369 (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> 2370 (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) 2371 2372 // Merge load and op 2373 // TODO: add indexed variants? 2374 ((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem) 2375 ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) 2376 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) 2377 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) 2378 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) 2379 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> 2380 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem) 2381 (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) 2382 (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) -> 2383 ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem) 2384 2385 // Merge ADDQconst and LEAQ into atomic loads. 2386 (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2387 (MOVQatomicload [off1+off2] {sym} ptr mem) 2388 (MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2389 (MOVLatomicload [off1+off2] {sym} ptr mem) 2390 (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 2391 (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) 2392 (MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> 2393 (MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) 2394 2395 // Merge ADDQconst and LEAQ into atomic stores. 2396 (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2397 (XCHGQ [off1+off2] {sym} val ptr mem) 2398 (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> 2399 (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2400 (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2401 (XCHGL [off1+off2] {sym} val ptr mem) 2402 (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> 2403 (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2404 2405 // Merge ADDQconst into atomic adds. 2406 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2407 (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2408 (XADDQlock [off1+off2] {sym} val ptr mem) 2409 (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> 2410 (XADDLlock [off1+off2] {sym} val ptr mem) 2411 2412 // Merge ADDQconst into atomic compare and swaps. 2413 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2414 (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> 2415 (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) 2416 (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> 2417 (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) 2418 2419 // We don't need the conditional move if we know the arg of BSF is not zero. 2420 (CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) && c != 0 -> x 2421 // Extension is unnecessary for trailing zeros. 2422 (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x)) 2423 (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x)) 2424 2425 // Simplify indexed loads/stores 2426 (MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem) 2427 (MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem) 2428 (MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem) 2429 (MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem) 2430 (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem) 2431 (MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem) 2432 (MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem) 2433 (MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem) 2434 (MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem) 2435 (MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem) 2436 (MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem) 2437 (MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem) 2438 (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem) 2439 (MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem) 2440 (MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem) 2441 (MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem) 2442 (MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem) 2443 (MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem) 2444 (MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem) 2445 (MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem) 2446 (MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem) 2447 (MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem) 2448 (MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem) 2449 (MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem) 2450 2451 // Redundant sign/zero extensions 2452 // Note: see issue 21963. We have to make sure we use the right type on 2453 // the resulting extension (the outer type, not the inner type). 2454 (MOVLQSX (MOVLQSX x)) -> (MOVLQSX x) 2455 (MOVLQSX (MOVWQSX x)) -> (MOVWQSX x) 2456 (MOVLQSX (MOVBQSX x)) -> (MOVBQSX x) 2457 (MOVWQSX (MOVWQSX x)) -> (MOVWQSX x) 2458 (MOVWQSX (MOVBQSX x)) -> (MOVBQSX x) 2459 (MOVBQSX (MOVBQSX x)) -> (MOVBQSX x) 2460 (MOVLQZX (MOVLQZX x)) -> (MOVLQZX x) 2461 (MOVLQZX (MOVWQZX x)) -> (MOVWQZX x) 2462 (MOVLQZX (MOVBQZX x)) -> (MOVBQZX x) 2463 (MOVWQZX (MOVWQZX x)) -> (MOVWQZX x) 2464 (MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) 2465 (MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) 2466 2467 (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) 2468 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> 2469 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) 2470 (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) 2471 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) -> 2472 ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) 2473 2474 // float <-> int register moves, with no conversion. 2475 // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. 2476 (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) -> (MOVQf2i val) 2477 (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) -> (MOVLf2i val) 2478 (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) -> (MOVQi2f val) 2479 (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) -> (MOVLi2f val) 2480 2481 // Other load-like ops. 2482 (ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ADDQ x (MOVQf2i y)) 2483 (ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ADDL x (MOVLf2i y)) 2484 (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (SUBQ x (MOVQf2i y)) 2485 (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (SUBL x (MOVLf2i y)) 2486 (ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ANDQ x (MOVQf2i y)) 2487 (ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ANDL x (MOVLf2i y)) 2488 ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> ( ORQ x (MOVQf2i y)) 2489 ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> ( ORL x (MOVLf2i y)) 2490 (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (XORQ x (MOVQf2i y)) 2491 (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (XORL x (MOVLf2i y)) 2492 2493 (ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (ADDSD x (MOVQi2f y)) 2494 (ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (ADDSS x (MOVLi2f y)) 2495 (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (SUBSD x (MOVQi2f y)) 2496 (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (SUBSS x (MOVLi2f y)) 2497 (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (MULSD x (MOVQi2f y)) 2498 (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (MULSS x (MOVLi2f y)) 2499 2500 // Redirect stores to use the other register set. 2501 (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) -> (MOVSDstore [off] {sym} ptr val mem) 2502 (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) -> (MOVSSstore [off] {sym} ptr val mem) 2503 (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) -> (MOVQstore [off] {sym} ptr val mem) 2504 (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) -> (MOVLstore [off] {sym} ptr val mem) 2505 2506 // Load args directly into the register class where it will be used. 2507 // We do this by just modifying the type of the Arg. 2508 (MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2509 (MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2510 (MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2511 (MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) 2512 2513 // LEAQ is rematerializeable, so this helps to avoid register spill. 2514 // See issue 22947 for details 2515 (ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x) 2516 2517 // HMULx is commutative, but its first argument must go in AX. 2518 // If possible, put a rematerializeable value in the first argument slot, 2519 // to reduce the odds that another value will be have to spilled 2520 // specifically to free up AX. 2521 (HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L) y x) 2522 (HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L)U y x) 2523 2524 // Fold loads into compares 2525 // Note: these may be undone by the flagalloc pass. 2526 (CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) -> (CMP(Q|L|W|B)load {sym} [off] ptr x mem) 2527 (CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) -> (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem)) 2528 2529 (CMP(Q|L|W|B)const l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) [c]) 2530 && l.Uses == 1 2531 && validValAndOff(c, off) 2532 && clobber(l) -> 2533 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(c,off)] ptr mem) 2534 2535 (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (CMPQconstload {sym} [makeValAndOff(c,off)] ptr mem) 2536 (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(c,off) -> (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) 2537 (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem) 2538 (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem) 2539 2540 (TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2) 2541 && l == l2 2542 && l.Uses == 2 2543 && validValAndOff(0,off) 2544 && clobber(l) -> 2545 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0,off)] ptr mem) 2546 2547 (MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))]) 2548 (MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.BigEndian))]) 2549 (MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read32(sym, off, config.BigEndian))]) 2550 (MOVQload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read64(sym, off, config.BigEndian))])