github.com/bir3/gocompiler@v0.3.205/src/cmd/compile/internal/ssa/_gen/AMD64.rules (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Lowering arithmetic 6 (Add(64|32|16|8) ...) => (ADD(Q|L|L|L) ...) 7 (AddPtr ...) => (ADDQ ...) 8 (Add(32|64)F ...) => (ADDS(S|D) ...) 9 10 (Sub(64|32|16|8) ...) => (SUB(Q|L|L|L) ...) 11 (SubPtr ...) => (SUBQ ...) 12 (Sub(32|64)F ...) => (SUBS(S|D) ...) 13 14 (Mul(64|32|16|8) ...) => (MUL(Q|L|L|L) ...) 15 (Mul(32|64)F ...) => (MULS(S|D) ...) 16 17 (Select0 (Mul64uover x y)) => (Select0 <typ.UInt64> (MULQU x y)) 18 (Select0 (Mul32uover x y)) => (Select0 <typ.UInt32> (MULLU x y)) 19 (Select1 (Mul(64|32)uover x y)) => (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y))) 20 21 (Hmul(64|32) ...) => (HMUL(Q|L) ...) 22 (Hmul(64|32)u ...) => (HMUL(Q|L)U ...) 23 24 (Div(64|32|16) [a] x y) => (Select0 (DIV(Q|L|W) [a] x y)) 25 (Div8 x y) => (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 26 (Div(64|32|16)u x y) => (Select0 (DIV(Q|L|W)U x y)) 27 (Div8u x y) => (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 28 (Div(32|64)F ...) => (DIVS(S|D) ...) 29 30 (Select0 (Add64carry x y c)) => 31 (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) 32 (Select1 (Add64carry x y c)) => 33 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) 34 (Select0 (Sub64borrow x y c)) => 35 (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))) 36 (Select1 (Sub64borrow x y c)) => 37 (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c)))))) 38 39 // Optimize ADCQ and friends 40 (ADCQ x (MOVQconst [c]) carry) && is32Bit(c) => (ADCQconst x [int32(c)] carry) 41 (ADCQ x y (FlagEQ)) => (ADDQcarry x y) 42 (ADCQconst x [c] (FlagEQ)) => (ADDQconstcarry x [c]) 43 (ADDQcarry x (MOVQconst [c])) && is32Bit(c) => (ADDQconstcarry x [int32(c)]) 44 (SBBQ x (MOVQconst [c]) borrow) && is32Bit(c) => (SBBQconst x [int32(c)] borrow) 45 (SBBQ x y (FlagEQ)) => (SUBQborrow x y) 46 (SBBQconst x [c] (FlagEQ)) => (SUBQconstborrow x [c]) 47 (SUBQborrow x (MOVQconst [c])) && is32Bit(c) => (SUBQconstborrow x [int32(c)]) 48 (Select1 (NEGLflags (MOVQconst [0]))) => (FlagEQ) 49 (Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) => x 50 51 52 (Mul64uhilo ...) => (MULQU2 ...) 53 (Div128u ...) => (DIVQU2 ...) 54 55 (Avg64u ...) => (AVGQU ...) 56 57 (Mod(64|32|16) [a] x y) => (Select1 (DIV(Q|L|W) [a] x y)) 58 (Mod8 x y) => (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) 59 (Mod(64|32|16)u x y) => (Select1 (DIV(Q|L|W)U x y)) 60 (Mod8u x y) => (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) 61 62 (And(64|32|16|8) ...) => (AND(Q|L|L|L) ...) 63 (Or(64|32|16|8) ...) => (OR(Q|L|L|L) ...) 64 (Xor(64|32|16|8) ...) => (XOR(Q|L|L|L) ...) 65 (Com(64|32|16|8) ...) => (NOT(Q|L|L|L) ...) 66 67 (Neg(64|32|16|8) ...) => (NEG(Q|L|L|L) ...) 68 (Neg32F x) => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))])) 69 (Neg64F x) => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)])) 70 71 // Lowering boolean ops 72 (AndB ...) => (ANDL ...) 73 (OrB ...) => (ORL ...) 74 (Not x) => (XORLconst [1] x) 75 76 // Lowering pointer arithmetic 77 (OffPtr [off] ptr) && is32Bit(off) => (ADDQconst [int32(off)] ptr) 78 (OffPtr [off] ptr) => (ADDQ (MOVQconst [off]) ptr) 79 80 // Lowering other arithmetic 81 (Ctz64 x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x) 82 (Ctz32 x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) 83 (Ctz64 <t> x) && buildcfg.GOAMD64 < 3 => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x))) 84 (Ctz32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x))) 85 (Ctz16 x) => (BSFL (BTSLconst <typ.UInt32> [16] x)) 86 (Ctz8 x) => (BSFL (BTSLconst <typ.UInt32> [ 8] x)) 87 88 (Ctz64NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTQ x) 89 (Ctz32NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) 90 (Ctz16NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) 91 (Ctz8NonZero x) && buildcfg.GOAMD64 >= 3 => (TZCNTL x) 92 (Ctz64NonZero x) && buildcfg.GOAMD64 < 3 => (Select0 (BSFQ x)) 93 (Ctz32NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x) 94 (Ctz16NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x) 95 (Ctz8NonZero x) && buildcfg.GOAMD64 < 3 => (BSFL x) 96 97 // BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0. 98 // However, for zero-extended values, we can cheat a bit, and calculate 99 // BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently 100 // places the index of the highest set bit where we want it. 101 // For GOAMD64>=3, BitLen can be calculated by OperandSize - LZCNT(x). 102 (BitLen64 <t> x) && buildcfg.GOAMD64 < 3 => (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x)))) 103 (BitLen32 x) && buildcfg.GOAMD64 < 3 => (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x)))) 104 (BitLen16 x) && buildcfg.GOAMD64 < 3 => (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x))) 105 (BitLen8 x) && buildcfg.GOAMD64 < 3 => (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x))) 106 (BitLen64 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-64] (LZCNTQ x))) 107 // Use 64-bit version to allow const-fold remove unnecessary arithmetic. 108 (BitLen32 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL x))) 109 (BitLen16 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVWQZX <x.Type> x)))) 110 (BitLen8 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x)))) 111 112 (Bswap(64|32) ...) => (BSWAP(Q|L) ...) 113 114 (PopCount(64|32) ...) => (POPCNT(Q|L) ...) 115 (PopCount16 x) => (POPCNTL (MOVWQZX <typ.UInt32> x)) 116 (PopCount8 x) => (POPCNTL (MOVBQZX <typ.UInt32> x)) 117 118 (Sqrt ...) => (SQRTSD ...) 119 (Sqrt32 ...) => (SQRTSS ...) 120 121 (RoundToEven x) => (ROUNDSD [0] x) 122 (Floor x) => (ROUNDSD [1] x) 123 (Ceil x) => (ROUNDSD [2] x) 124 (Trunc x) => (ROUNDSD [3] x) 125 126 (FMA x y z) => (VFMADD231SD z x y) 127 128 // Lowering extension 129 // Note: we always extend to 64 bits even though some ops don't need that many result bits. 130 (SignExt8to16 ...) => (MOVBQSX ...) 131 (SignExt8to32 ...) => (MOVBQSX ...) 132 (SignExt8to64 ...) => (MOVBQSX ...) 133 (SignExt16to32 ...) => (MOVWQSX ...) 134 (SignExt16to64 ...) => (MOVWQSX ...) 135 (SignExt32to64 ...) => (MOVLQSX ...) 136 137 (ZeroExt8to16 ...) => (MOVBQZX ...) 138 (ZeroExt8to32 ...) => (MOVBQZX ...) 139 (ZeroExt8to64 ...) => (MOVBQZX ...) 140 (ZeroExt16to32 ...) => (MOVWQZX ...) 141 (ZeroExt16to64 ...) => (MOVWQZX ...) 142 (ZeroExt32to64 ...) => (MOVLQZX ...) 143 144 (Slicemask <t> x) => (SARQconst (NEGQ <t> x) [63]) 145 146 (SpectreIndex <t> x y) => (CMOVQCC x (MOVQconst [0]) (CMPQ x y)) 147 (SpectreSliceIndex <t> x y) => (CMOVQHI x (MOVQconst [0]) (CMPQ x y)) 148 149 // Lowering truncation 150 // Because we ignore high parts of registers, truncates are just copies. 151 (Trunc16to8 ...) => (Copy ...) 152 (Trunc32to8 ...) => (Copy ...) 153 (Trunc32to16 ...) => (Copy ...) 154 (Trunc64to8 ...) => (Copy ...) 155 (Trunc64to16 ...) => (Copy ...) 156 (Trunc64to32 ...) => (Copy ...) 157 158 // Lowering float <-> int 159 (Cvt32to32F ...) => (CVTSL2SS ...) 160 (Cvt32to64F ...) => (CVTSL2SD ...) 161 (Cvt64to32F ...) => (CVTSQ2SS ...) 162 (Cvt64to64F ...) => (CVTSQ2SD ...) 163 164 (Cvt32Fto32 ...) => (CVTTSS2SL ...) 165 (Cvt32Fto64 ...) => (CVTTSS2SQ ...) 166 (Cvt64Fto32 ...) => (CVTTSD2SL ...) 167 (Cvt64Fto64 ...) => (CVTTSD2SQ ...) 168 169 (Cvt32Fto64F ...) => (CVTSS2SD ...) 170 (Cvt64Fto32F ...) => (CVTSD2SS ...) 171 172 (Round(32|64)F ...) => (Copy ...) 173 174 (CvtBoolToUint8 ...) => (Copy ...) 175 176 // Lowering shifts 177 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value. 178 // result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff) 179 (Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 180 (Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 181 (Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 182 (Lsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 183 184 (Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLQ x y) 185 (Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y) 186 (Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y) 187 (Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y) 188 189 (Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64]))) 190 (Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32]))) 191 (Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16]))) 192 (Rsh8Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8]))) 193 194 (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRQ x y) 195 (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRL x y) 196 (Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRW x y) 197 (Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRB x y) 198 199 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value. 200 // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width. 201 (Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64]))))) 202 (Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32]))))) 203 (Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16]))))) 204 (Rsh8x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8]))))) 205 206 (Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SARQ x y) 207 (Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SARL x y) 208 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y) 209 (Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y) 210 211 // Lowering integer comparisons 212 (Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y)) 213 (Less(64|32|16|8)U x y) => (SETB (CMP(Q|L|W|B) x y)) 214 (Leq(64|32|16|8) x y) => (SETLE (CMP(Q|L|W|B) x y)) 215 (Leq(64|32|16|8)U x y) => (SETBE (CMP(Q|L|W|B) x y)) 216 (Eq(Ptr|64|32|16|8|B) x y) => (SETEQ (CMP(Q|Q|L|W|B|B) x y)) 217 (Neq(Ptr|64|32|16|8|B) x y) => (SETNE (CMP(Q|Q|L|W|B|B) x y)) 218 219 // Lowering floating point comparisons 220 // Note Go assembler gets UCOMISx operand order wrong, but it is right here 221 // and the operands are reversed when generating assembly language. 222 (Eq(32|64)F x y) => (SETEQF (UCOMIS(S|D) x y)) 223 (Neq(32|64)F x y) => (SETNEF (UCOMIS(S|D) x y)) 224 // Use SETGF/SETGEF with reversed operands to dodge NaN case. 225 (Less(32|64)F x y) => (SETGF (UCOMIS(S|D) y x)) 226 (Leq(32|64)F x y) => (SETGEF (UCOMIS(S|D) y x)) 227 228 // Lowering loads 229 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVQload ptr mem) 230 (Load <t> ptr mem) && is32BitInt(t) => (MOVLload ptr mem) 231 (Load <t> ptr mem) && is16BitInt(t) => (MOVWload ptr mem) 232 (Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) => (MOVBload ptr mem) 233 (Load <t> ptr mem) && is32BitFloat(t) => (MOVSSload ptr mem) 234 (Load <t> ptr mem) && is64BitFloat(t) => (MOVSDload ptr mem) 235 236 // Lowering stores 237 // These more-specific FP versions of Store pattern should come first. 238 (Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (MOVSDstore ptr val mem) 239 (Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (MOVSSstore ptr val mem) 240 241 (Store {t} ptr val mem) && t.Size() == 8 => (MOVQstore ptr val mem) 242 (Store {t} ptr val mem) && t.Size() == 4 => (MOVLstore ptr val mem) 243 (Store {t} ptr val mem) && t.Size() == 2 => (MOVWstore ptr val mem) 244 (Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem) 245 246 // Lowering moves 247 (Move [0] _ _ mem) => mem 248 (Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem) 249 (Move [2] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) 250 (Move [4] dst src mem) => (MOVLstore dst (MOVLload src mem) mem) 251 (Move [8] dst src mem) => (MOVQstore dst (MOVQload src mem) mem) 252 (Move [16] dst src mem) && config.useSSE => (MOVOstore dst (MOVOload src mem) mem) 253 (Move [16] dst src mem) && !config.useSSE => 254 (MOVQstore [8] dst (MOVQload [8] src mem) 255 (MOVQstore dst (MOVQload src mem) mem)) 256 257 (Move [32] dst src mem) => 258 (Move [16] 259 (OffPtr <dst.Type> dst [16]) 260 (OffPtr <src.Type> src [16]) 261 (Move [16] dst src mem)) 262 263 (Move [48] dst src mem) && config.useSSE => 264 (Move [32] 265 (OffPtr <dst.Type> dst [16]) 266 (OffPtr <src.Type> src [16]) 267 (Move [16] dst src mem)) 268 269 (Move [64] dst src mem) && config.useSSE => 270 (Move [32] 271 (OffPtr <dst.Type> dst [32]) 272 (OffPtr <src.Type> src [32]) 273 (Move [32] dst src mem)) 274 275 (Move [3] dst src mem) => 276 (MOVBstore [2] dst (MOVBload [2] src mem) 277 (MOVWstore dst (MOVWload src mem) mem)) 278 (Move [5] dst src mem) => 279 (MOVBstore [4] dst (MOVBload [4] src mem) 280 (MOVLstore dst (MOVLload src mem) mem)) 281 (Move [6] dst src mem) => 282 (MOVWstore [4] dst (MOVWload [4] src mem) 283 (MOVLstore dst (MOVLload src mem) mem)) 284 (Move [7] dst src mem) => 285 (MOVLstore [3] dst (MOVLload [3] src mem) 286 (MOVLstore dst (MOVLload src mem) mem)) 287 (Move [9] dst src mem) => 288 (MOVBstore [8] dst (MOVBload [8] src mem) 289 (MOVQstore dst (MOVQload src mem) mem)) 290 (Move [10] dst src mem) => 291 (MOVWstore [8] dst (MOVWload [8] src mem) 292 (MOVQstore dst (MOVQload src mem) mem)) 293 (Move [12] dst src mem) => 294 (MOVLstore [8] dst (MOVLload [8] src mem) 295 (MOVQstore dst (MOVQload src mem) mem)) 296 (Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 => 297 (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) 298 (MOVQstore dst (MOVQload src mem) mem)) 299 300 // Adjust moves to be a multiple of 16 bytes. 301 (Move [s] dst src mem) 302 && s > 16 && s%16 != 0 && s%16 <= 8 => 303 (Move [s-s%16] 304 (OffPtr <dst.Type> dst [s%16]) 305 (OffPtr <src.Type> src [s%16]) 306 (MOVQstore dst (MOVQload src mem) mem)) 307 (Move [s] dst src mem) 308 && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE => 309 (Move [s-s%16] 310 (OffPtr <dst.Type> dst [s%16]) 311 (OffPtr <src.Type> src [s%16]) 312 (MOVOstore dst (MOVOload src mem) mem)) 313 (Move [s] dst src mem) 314 && s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE => 315 (Move [s-s%16] 316 (OffPtr <dst.Type> dst [s%16]) 317 (OffPtr <src.Type> src [s%16]) 318 (MOVQstore [8] dst (MOVQload [8] src mem) 319 (MOVQstore dst (MOVQload src mem) mem))) 320 321 // Medium copying uses a duff device. 322 (Move [s] dst src mem) 323 && s > 64 && s <= 16*64 && s%16 == 0 324 && !config.noDuffDevice && logLargeCopy(v, s) => 325 (DUFFCOPY [s] dst src mem) 326 327 // Large copying uses REP MOVSQ. 328 (Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 && logLargeCopy(v, s) => 329 (REPMOVSQ dst src (MOVQconst [s/8]) mem) 330 331 // Lowering Zero instructions 332 (Zero [0] _ mem) => mem 333 (Zero [1] destptr mem) => (MOVBstoreconst [makeValAndOff(0,0)] destptr mem) 334 (Zero [2] destptr mem) => (MOVWstoreconst [makeValAndOff(0,0)] destptr mem) 335 (Zero [4] destptr mem) => (MOVLstoreconst [makeValAndOff(0,0)] destptr mem) 336 (Zero [8] destptr mem) => (MOVQstoreconst [makeValAndOff(0,0)] destptr mem) 337 338 (Zero [3] destptr mem) => 339 (MOVBstoreconst [makeValAndOff(0,2)] destptr 340 (MOVWstoreconst [makeValAndOff(0,0)] destptr mem)) 341 (Zero [5] destptr mem) => 342 (MOVBstoreconst [makeValAndOff(0,4)] destptr 343 (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) 344 (Zero [6] destptr mem) => 345 (MOVWstoreconst [makeValAndOff(0,4)] destptr 346 (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) 347 (Zero [7] destptr mem) => 348 (MOVLstoreconst [makeValAndOff(0,3)] destptr 349 (MOVLstoreconst [makeValAndOff(0,0)] destptr mem)) 350 351 // Strip off any fractional word zeroing. 352 (Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE => 353 (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) 354 (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) 355 356 // Zero small numbers of words directly. 357 (Zero [16] destptr mem) && !config.useSSE => 358 (MOVQstoreconst [makeValAndOff(0,8)] destptr 359 (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) 360 (Zero [24] destptr mem) && !config.useSSE => 361 (MOVQstoreconst [makeValAndOff(0,16)] destptr 362 (MOVQstoreconst [makeValAndOff(0,8)] destptr 363 (MOVQstoreconst [makeValAndOff(0,0)] destptr mem))) 364 (Zero [32] destptr mem) && !config.useSSE => 365 (MOVQstoreconst [makeValAndOff(0,24)] destptr 366 (MOVQstoreconst [makeValAndOff(0,16)] destptr 367 (MOVQstoreconst [makeValAndOff(0,8)] destptr 368 (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)))) 369 370 (Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE => 371 (MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr 372 (MOVQstoreconst [makeValAndOff(0,0)] destptr mem)) 373 374 // Adjust zeros to be a multiple of 16 bytes. 375 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE => 376 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 377 (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) 378 379 (Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE => 380 (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) 381 (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) 382 383 (Zero [16] destptr mem) && config.useSSE => 384 (MOVOstoreconst [makeValAndOff(0,0)] destptr mem) 385 (Zero [32] destptr mem) && config.useSSE => 386 (MOVOstoreconst [makeValAndOff(0,16)] destptr 387 (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)) 388 (Zero [48] destptr mem) && config.useSSE => 389 (MOVOstoreconst [makeValAndOff(0,32)] destptr 390 (MOVOstoreconst [makeValAndOff(0,16)] destptr 391 (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))) 392 (Zero [64] destptr mem) && config.useSSE => 393 (MOVOstoreconst [makeValAndOff(0,48)] destptr 394 (MOVOstoreconst [makeValAndOff(0,32)] destptr 395 (MOVOstoreconst [makeValAndOff(0,16)] destptr 396 (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)))) 397 398 // Medium zeroing uses a duff device. 399 (Zero [s] destptr mem) 400 && s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice => 401 (DUFFZERO [s] destptr mem) 402 403 // Large zeroing uses REP STOSQ. 404 (Zero [s] destptr mem) 405 && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) 406 && s%8 == 0 => 407 (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) 408 409 // Lowering constants 410 (Const8 [c]) => (MOVLconst [int32(c)]) 411 (Const16 [c]) => (MOVLconst [int32(c)]) 412 (Const32 ...) => (MOVLconst ...) 413 (Const64 ...) => (MOVQconst ...) 414 (Const32F ...) => (MOVSSconst ...) 415 (Const64F ...) => (MOVSDconst ...) 416 (ConstNil ) => (MOVQconst [0]) 417 (ConstBool [c]) => (MOVLconst [b2i32(c)]) 418 419 // Lowering calls 420 (StaticCall ...) => (CALLstatic ...) 421 (ClosureCall ...) => (CALLclosure ...) 422 (InterCall ...) => (CALLinter ...) 423 (TailCall ...) => (CALLtail ...) 424 425 // Lowering conditional moves 426 // If the condition is a SETxx, we can just run a CMOV from the comparison that was 427 // setting the flags. 428 // Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL 429 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t)) 430 => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 431 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t) 432 => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 433 (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t) 434 => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) 435 436 // If the condition does not set the flags, we need to generate a comparison. 437 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 438 => (CondSelect <t> x y (MOVBQZX <typ.UInt64> check)) 439 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 440 => (CondSelect <t> x y (MOVWQZX <typ.UInt64> check)) 441 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 442 => (CondSelect <t> x y (MOVLQZX <typ.UInt64> check)) 443 444 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) 445 => (CMOVQNE y x (CMPQconst [0] check)) 446 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) 447 => (CMOVLNE y x (CMPQconst [0] check)) 448 (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) 449 => (CMOVWNE y x (CMPQconst [0] check)) 450 451 // Absorb InvertFlags 452 (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 453 => (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 454 (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 455 => (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 456 (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) 457 => (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond) 458 459 // Absorb constants generated during lower 460 (CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) => x 461 (CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) => y 462 (CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) => x 463 (CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) => y 464 (CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) => x 465 (CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) => y 466 (CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) => x 467 (CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) => y 468 (CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) => x 469 (CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) => y 470 471 // Miscellaneous 472 (IsNonNil p) => (SETNE (TESTQ p p)) 473 (IsInBounds idx len) => (SETB (CMPQ idx len)) 474 (IsSliceInBounds idx len) => (SETBE (CMPQ idx len)) 475 (NilCheck ...) => (LoweredNilCheck ...) 476 (GetG mem) && v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal => (LoweredGetG mem) // only lower in old ABI. in new ABI we have a G register. 477 (GetClosurePtr ...) => (LoweredGetClosurePtr ...) 478 (GetCallerPC ...) => (LoweredGetCallerPC ...) 479 (GetCallerSP ...) => (LoweredGetCallerSP ...) 480 481 (HasCPUFeature {s}) => (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s}))) 482 (Addr {sym} base) => (LEAQ {sym} base) 483 (LocalAddr {sym} base _) => (LEAQ {sym} base) 484 485 (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 => (SETLstore [off] {sym} ptr x mem) 486 (MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 => (SETLEstore [off] {sym} ptr x mem) 487 (MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 => (SETGstore [off] {sym} ptr x mem) 488 (MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 => (SETGEstore [off] {sym} ptr x mem) 489 (MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 => (SETEQstore [off] {sym} ptr x mem) 490 (MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 => (SETNEstore [off] {sym} ptr x mem) 491 (MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 => (SETBstore [off] {sym} ptr x mem) 492 (MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 => (SETBEstore [off] {sym} ptr x mem) 493 (MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 => (SETAstore [off] {sym} ptr x mem) 494 (MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 => (SETAEstore [off] {sym} ptr x mem) 495 496 // block rewrites 497 (If (SETL cmp) yes no) => (LT cmp yes no) 498 (If (SETLE cmp) yes no) => (LE cmp yes no) 499 (If (SETG cmp) yes no) => (GT cmp yes no) 500 (If (SETGE cmp) yes no) => (GE cmp yes no) 501 (If (SETEQ cmp) yes no) => (EQ cmp yes no) 502 (If (SETNE cmp) yes no) => (NE cmp yes no) 503 (If (SETB cmp) yes no) => (ULT cmp yes no) 504 (If (SETBE cmp) yes no) => (ULE cmp yes no) 505 (If (SETA cmp) yes no) => (UGT cmp yes no) 506 (If (SETAE cmp) yes no) => (UGE cmp yes no) 507 (If (SETO cmp) yes no) => (OS cmp yes no) 508 509 // Special case for floating point - LF/LEF not generated 510 (If (SETGF cmp) yes no) => (UGT cmp yes no) 511 (If (SETGEF cmp) yes no) => (UGE cmp yes no) 512 (If (SETEQF cmp) yes no) => (EQF cmp yes no) 513 (If (SETNEF cmp) yes no) => (NEF cmp yes no) 514 515 (If cond yes no) => (NE (TESTB cond cond) yes no) 516 517 (JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB))) 518 519 // Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here. 520 (AtomicLoad8 ptr mem) => (MOVBatomicload ptr mem) 521 (AtomicLoad32 ptr mem) => (MOVLatomicload ptr mem) 522 (AtomicLoad64 ptr mem) => (MOVQatomicload ptr mem) 523 (AtomicLoadPtr ptr mem) => (MOVQatomicload ptr mem) 524 525 // Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load. 526 // TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those? 527 (AtomicStore8 ptr val mem) => (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem)) 528 (AtomicStore32 ptr val mem) => (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem)) 529 (AtomicStore64 ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem)) 530 (AtomicStorePtrNoWB ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem)) 531 532 // Atomic exchanges. 533 (AtomicExchange32 ptr val mem) => (XCHGL val ptr mem) 534 (AtomicExchange64 ptr val mem) => (XCHGQ val ptr mem) 535 536 // Atomic adds. 537 (AtomicAdd32 ptr val mem) => (AddTupleFirst32 val (XADDLlock val ptr mem)) 538 (AtomicAdd64 ptr val mem) => (AddTupleFirst64 val (XADDQlock val ptr mem)) 539 (Select0 <t> (AddTupleFirst32 val tuple)) => (ADDL val (Select0 <t> tuple)) 540 (Select1 (AddTupleFirst32 _ tuple)) => (Select1 tuple) 541 (Select0 <t> (AddTupleFirst64 val tuple)) => (ADDQ val (Select0 <t> tuple)) 542 (Select1 (AddTupleFirst64 _ tuple)) => (Select1 tuple) 543 544 // Atomic compare and swap. 545 (AtomicCompareAndSwap32 ptr old new_ mem) => (CMPXCHGLlock ptr old new_ mem) 546 (AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem) 547 548 // Atomic memory updates. 549 (AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem) 550 (AtomicAnd32 ptr val mem) => (ANDLlock ptr val mem) 551 (AtomicOr8 ptr val mem) => (ORBlock ptr val mem) 552 (AtomicOr32 ptr val mem) => (ORLlock ptr val mem) 553 554 // Write barrier. 555 (WB ...) => (LoweredWB ...) 556 557 (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) 558 (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) 559 (PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem) 560 561 // lowering rotates 562 (RotateLeft8 ...) => (ROLB ...) 563 (RotateLeft16 ...) => (ROLW ...) 564 (RotateLeft32 ...) => (ROLL ...) 565 (RotateLeft64 ...) => (ROLQ ...) 566 567 // *************************** 568 // Above: lowering rules 569 // Below: optimizations 570 // *************************** 571 // TODO: Should the optimizations be a separate pass? 572 573 // Fold boolean tests into blocks 574 (NE (TESTB (SETL cmp) (SETL cmp)) yes no) => (LT cmp yes no) 575 (NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) => (LE cmp yes no) 576 (NE (TESTB (SETG cmp) (SETG cmp)) yes no) => (GT cmp yes no) 577 (NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) => (GE cmp yes no) 578 (NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) => (EQ cmp yes no) 579 (NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) => (NE cmp yes no) 580 (NE (TESTB (SETB cmp) (SETB cmp)) yes no) => (ULT cmp yes no) 581 (NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) => (ULE cmp yes no) 582 (NE (TESTB (SETA cmp) (SETA cmp)) yes no) => (UGT cmp yes no) 583 (NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) => (UGE cmp yes no) 584 (NE (TESTB (SETO cmp) (SETO cmp)) yes no) => (OS cmp yes no) 585 586 // Unsigned comparisons to 0/1 587 (ULT (TEST(Q|L|W|B) x x) yes no) => (First no yes) 588 (UGE (TEST(Q|L|W|B) x x) yes no) => (First yes no) 589 (SETB (TEST(Q|L|W|B) x x)) => (ConstBool [false]) 590 (SETAE (TEST(Q|L|W|B) x x)) => (ConstBool [true]) 591 592 // x & 1 != 0 -> x & 1 593 (SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x) 594 (SETB (BT(L|Q)const [0] x)) => (AND(L|Q)const [1] x) 595 596 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded 597 // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag 598 // into tests for carry flags. 599 // ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis 600 // mutandis, for UGE and SETAE, and CC and SETCC. 601 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y)) 602 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y)) 603 ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c)) 604 => ((ULT|UGE) (BTLconst [int8(log32(c))] x)) 605 ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c)) 606 => ((ULT|UGE) (BTQconst [int8(log32(c))] x)) 607 ((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) 608 => ((ULT|UGE) (BTQconst [int8(log64(c))] x)) 609 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y)) 610 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y)) 611 (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c)) 612 => (SET(B|AE) (BTLconst [int8(log32(c))] x)) 613 (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c)) 614 => (SET(B|AE) (BTQconst [int8(log32(c))] x)) 615 (SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c) 616 => (SET(B|AE) (BTQconst [int8(log64(c))] x)) 617 // SET..store variant 618 (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem) 619 => (SET(B|AE)store [off] {sym} ptr (BTL x y) mem) 620 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem) 621 => (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem) 622 (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c)) 623 => (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem) 624 (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c)) 625 => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem) 626 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) 627 => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log64(c))] x) mem) 628 629 // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules 630 // and further combining shifts. 631 (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x) 632 (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x) 633 (BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x) 634 (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x) 635 (BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x) 636 (BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x) 637 638 // Rewrite a & 1 != 1 into a & 1 == 0. 639 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test. 640 (SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) => (SET(EQ|NE) (CMPLconst [0] s)) 641 (SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem) 642 (SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) => (SET(EQ|NE) (CMPQconst [0] s)) 643 (SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem) 644 645 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b) 646 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y) 647 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) 648 649 // Convert ORconst into BTS, if the code gets smaller, with boundary being 650 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). 651 ((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128 652 => (BT(S|C)Qconst [int8(log32(c))] x) 653 ((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 654 => (BT(S|C)Lconst [int8(log32(c))] x) 655 ((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 656 => (BT(S|C)Qconst [int8(log64(c))] x) 657 ((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128 658 => (BT(S|C)Lconst [int8(log32(c))] x) 659 660 // Recognize bit clearing: a &^= 1<<b 661 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y) 662 (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) 663 (ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 664 => (BTRQconst [int8(log32(^c))] x) 665 (ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 666 => (BTRLconst [int8(log32(^c))] x) 667 (ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 668 => (BTRQconst [int8(log64(^c))] x) 669 (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 670 => (BTRLconst [int8(log32(^c))] x) 671 672 // Special-case bit patterns on first/last bit. 673 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, 674 // for instance: 675 // x & 0xFFFF0000 -> (x >> 16) << 16 676 // x & 0x80000000 -> (x >> 31) << 31 677 // 678 // In case the mask is just one bit (like second example above), it conflicts 679 // with the above rules to detect bit-testing / bit-clearing of first/last bit. 680 // We thus special-case them, by detecting the shift patterns. 681 682 // Special case resetting first/last bit 683 (SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) 684 => (BTR(L|Q)const [0] x) 685 (SHRLconst [1] (SHLLconst [1] x)) 686 => (BTRLconst [31] x) 687 (SHRQconst [1] (SHLQconst [1] x)) 688 => (BTRQconst [63] x) 689 690 // Special case testing first/last bit (with double-shift generated by generic.rules) 691 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 692 => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 693 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 694 => ((SETB|SETAE|ULT|UGE) (BTQconst [31] x)) 695 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 696 => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 697 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 698 => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 699 700 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 701 => ((SETB|SETAE|ULT|UGE) (BTQconst [0] x)) 702 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 703 => ((SETB|SETAE|ULT|UGE) (BTLconst [0] x)) 704 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 705 => (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem) 706 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 707 => (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem) 708 709 // Special-case manually testing last bit with "a>>63 != 0" (without "&1") 710 ((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 711 => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x)) 712 ((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 713 => ((SETB|SETAE|ULT|UGE) (BTLconst [31] x)) 714 (SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 715 => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem) 716 (SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 717 => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem) 718 719 // Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1) 720 (BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) 721 (BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x) 722 (BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) 723 (BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x) 724 725 // Fold boolean negation into SETcc. 726 (XORLconst [1] (SETNE x)) => (SETEQ x) 727 (XORLconst [1] (SETEQ x)) => (SETNE x) 728 (XORLconst [1] (SETL x)) => (SETGE x) 729 (XORLconst [1] (SETGE x)) => (SETL x) 730 (XORLconst [1] (SETLE x)) => (SETG x) 731 (XORLconst [1] (SETG x)) => (SETLE x) 732 (XORLconst [1] (SETB x)) => (SETAE x) 733 (XORLconst [1] (SETAE x)) => (SETB x) 734 (XORLconst [1] (SETBE x)) => (SETA x) 735 (XORLconst [1] (SETA x)) => (SETBE x) 736 737 // Special case for floating point - LF/LEF not generated 738 (NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) => (UGT cmp yes no) 739 (NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) => (UGE cmp yes no) 740 (NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) => (EQF cmp yes no) 741 (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) => (NEF cmp yes no) 742 743 // Disabled because it interferes with the pattern match above and makes worse code. 744 // (SETNEF x) => (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x)) 745 // (SETEQF x) => (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x)) 746 747 // fold constants into instructions 748 (ADDQ x (MOVQconst [c])) && is32Bit(c) => (ADDQconst [int32(c)] x) 749 (ADDQ x (MOVLconst [c])) => (ADDQconst [c] x) 750 (ADDL x (MOVLconst [c])) => (ADDLconst [c] x) 751 752 (SUBQ x (MOVQconst [c])) && is32Bit(c) => (SUBQconst x [int32(c)]) 753 (SUBQ (MOVQconst [c]) x) && is32Bit(c) => (NEGQ (SUBQconst <v.Type> x [int32(c)])) 754 (SUBL x (MOVLconst [c])) => (SUBLconst x [c]) 755 (SUBL (MOVLconst [c]) x) => (NEGL (SUBLconst <v.Type> x [c])) 756 757 (MULQ x (MOVQconst [c])) && is32Bit(c) => (MULQconst [int32(c)] x) 758 (MULL x (MOVLconst [c])) => (MULLconst [c] x) 759 760 (ANDQ x (MOVQconst [c])) && is32Bit(c) => (ANDQconst [int32(c)] x) 761 (ANDL x (MOVLconst [c])) => (ANDLconst [c] x) 762 763 (AND(L|Q)const [c] (AND(L|Q)const [d] x)) => (AND(L|Q)const [c & d] x) 764 (XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x) 765 (OR(L|Q)const [c] (OR(L|Q)const [d] x)) => (OR(L|Q)const [c | d] x) 766 767 (BTRLconst [c] (ANDLconst [d] x)) => (ANDLconst [d &^ (1<<uint32(c))] x) 768 (ANDLconst [c] (BTRLconst [d] x)) => (ANDLconst [c &^ (1<<uint32(d))] x) 769 (BTRLconst [c] (BTRLconst [d] x)) => (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x) 770 771 (BTCLconst [c] (XORLconst [d] x)) => (XORLconst [d ^ 1<<uint32(c)] x) 772 (XORLconst [c] (BTCLconst [d] x)) => (XORLconst [c ^ 1<<uint32(d)] x) 773 (BTCLconst [c] (BTCLconst [d] x)) => (XORLconst [1<<uint32(c) | 1<<uint32(d)] x) 774 775 (BTSLconst [c] (ORLconst [d] x)) => (ORLconst [d | 1<<uint32(c)] x) 776 (ORLconst [c] (BTSLconst [d] x)) => (ORLconst [c | 1<<uint32(d)] x) 777 (BTSLconst [c] (BTSLconst [d] x)) => (ORLconst [1<<uint32(c) | 1<<uint32(d)] x) 778 779 (BTRQconst [c] (ANDQconst [d] x)) && is32Bit(int64(d) &^ (1<<uint32(c))) => (ANDQconst [d &^ (1<<uint32(c))] x) 780 (ANDQconst [c] (BTRQconst [d] x)) && is32Bit(int64(c) &^ (1<<uint32(d))) => (ANDQconst [c &^ (1<<uint32(d))] x) 781 (BTRQconst [c] (BTRQconst [d] x)) && is32Bit(^(1<<uint32(c) | 1<<uint32(d))) => (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x) 782 783 (BTCQconst [c] (XORQconst [d] x)) && is32Bit(int64(d) ^ 1<<uint32(c)) => (XORQconst [d ^ 1<<uint32(c)] x) 784 (XORQconst [c] (BTCQconst [d] x)) && is32Bit(int64(c) ^ 1<<uint32(d)) => (XORQconst [c ^ 1<<uint32(d)] x) 785 (BTCQconst [c] (BTCQconst [d] x)) && is32Bit(1<<uint32(c) ^ 1<<uint32(d)) => (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x) 786 787 (BTSQconst [c] (ORQconst [d] x)) && is32Bit(int64(d) | 1<<uint32(c)) => (ORQconst [d | 1<<uint32(c)] x) 788 (ORQconst [c] (BTSQconst [d] x)) && is32Bit(int64(c) | 1<<uint32(d)) => (ORQconst [c | 1<<uint32(d)] x) 789 (BTSQconst [c] (BTSQconst [d] x)) && is32Bit(1<<uint32(c) | 1<<uint32(d)) => (ORQconst [1<<uint32(c) | 1<<uint32(d)] x) 790 791 792 (MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x) 793 (MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x) 794 795 (ORQ x (MOVQconst [c])) && is32Bit(c) => (ORQconst [int32(c)] x) 796 (ORQ x (MOVLconst [c])) => (ORQconst [c] x) 797 (ORL x (MOVLconst [c])) => (ORLconst [c] x) 798 799 (XORQ x (MOVQconst [c])) && is32Bit(c) => (XORQconst [int32(c)] x) 800 (XORL x (MOVLconst [c])) => (XORLconst [c] x) 801 802 (SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x) 803 (SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x) 804 805 (SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x) 806 (SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x) 807 (SHRW x (MOV(Q|L)const [c])) && c&31 < 16 => (SHRWconst [int8(c&31)] x) 808 (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0]) 809 (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x) 810 (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0]) 811 812 (SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x) 813 (SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x) 814 (SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x) 815 (SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x) 816 817 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs. 818 ((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) 819 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 820 ((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) 821 ((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y)) 822 823 ((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) 824 ((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 825 ((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) 826 ((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y)) 827 828 ((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y) 829 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 830 ((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y) 831 ((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y)) 832 833 ((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y) 834 ((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL <t> y)) 835 ((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y) 836 ((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y)) 837 838 // rotate left negative = rotate right 839 (ROLQ x (NEG(Q|L) y)) => (RORQ x y) 840 (ROLL x (NEG(Q|L) y)) => (RORL x y) 841 (ROLW x (NEG(Q|L) y)) => (RORW x y) 842 (ROLB x (NEG(Q|L) y)) => (RORB x y) 843 844 // rotate right negative = rotate left 845 (RORQ x (NEG(Q|L) y)) => (ROLQ x y) 846 (RORL x (NEG(Q|L) y)) => (ROLL x y) 847 (RORW x (NEG(Q|L) y)) => (ROLW x y) 848 (RORB x (NEG(Q|L) y)) => (ROLB x y) 849 850 // rotate by constants 851 (ROLQ x (MOV(Q|L)const [c])) => (ROLQconst [int8(c&63)] x) 852 (ROLL x (MOV(Q|L)const [c])) => (ROLLconst [int8(c&31)] x) 853 (ROLW x (MOV(Q|L)const [c])) => (ROLWconst [int8(c&15)] x) 854 (ROLB x (MOV(Q|L)const [c])) => (ROLBconst [int8(c&7) ] x) 855 856 (RORQ x (MOV(Q|L)const [c])) => (ROLQconst [int8((-c)&63)] x) 857 (RORL x (MOV(Q|L)const [c])) => (ROLLconst [int8((-c)&31)] x) 858 (RORW x (MOV(Q|L)const [c])) => (ROLWconst [int8((-c)&15)] x) 859 (RORB x (MOV(Q|L)const [c])) => (ROLBconst [int8((-c)&7) ] x) 860 861 // Constant shift simplifications 862 ((SHLQ|SHRQ|SARQ)const x [0]) => x 863 ((SHLL|SHRL|SARL)const x [0]) => x 864 ((SHRW|SARW)const x [0]) => x 865 ((SHRB|SARB)const x [0]) => x 866 ((ROLQ|ROLL|ROLW|ROLB)const x [0]) => x 867 868 // Multi-register shifts 869 (ORQ (SH(R|L)Q lo bits) (SH(L|R)Q hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits) 870 (ORQ (SH(R|L)XQ lo bits) (SH(L|R)XQ hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits) 871 872 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits) 873 // because the x86 instructions are defined to use all 5 bits of the shift even 874 // for the small shifts. I don't think we'll ever generate a weird shift (e.g. 875 // (SHRW x (MOVLconst [24])), but just in case. 876 877 (CMPQ x (MOVQconst [c])) && is32Bit(c) => (CMPQconst x [int32(c)]) 878 (CMPQ (MOVQconst [c]) x) && is32Bit(c) => (InvertFlags (CMPQconst x [int32(c)])) 879 (CMPL x (MOVLconst [c])) => (CMPLconst x [c]) 880 (CMPL (MOVLconst [c]) x) => (InvertFlags (CMPLconst x [c])) 881 (CMPW x (MOVLconst [c])) => (CMPWconst x [int16(c)]) 882 (CMPW (MOVLconst [c]) x) => (InvertFlags (CMPWconst x [int16(c)])) 883 (CMPB x (MOVLconst [c])) => (CMPBconst x [int8(c)]) 884 (CMPB (MOVLconst [c]) x) => (InvertFlags (CMPBconst x [int8(c)])) 885 886 // Canonicalize the order of arguments to comparisons - helps with CSE. 887 (CMP(Q|L|W|B) x y) && canonLessThan(x,y) => (InvertFlags (CMP(Q|L|W|B) y x)) 888 889 // Using MOVZX instead of AND is cheaper. 890 (AND(Q|L)const [ 0xFF] x) => (MOVBQZX x) 891 (AND(Q|L)const [0xFFFF] x) => (MOVWQZX x) 892 // This rule is currently invalid because 0xFFFFFFFF is not representable by a signed int32. 893 // Commenting out for now, because it also can't trigger because of the is32bit guard on the 894 // ANDQconst lowering-rule, above, prevents 0xFFFFFFFF from matching (for the same reason) 895 // Using an alternate form of this rule segfaults some binaries because of 896 // adverse interactions with other passes. 897 // (ANDQconst [0xFFFFFFFF] x) => (MOVLQZX x) 898 899 // strength reduction 900 // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: 901 // 1 - addq, shlq, leaq, negq, subq 902 // 3 - imulq 903 // This limits the rewrites to two instructions. 904 // Note that negq always operates in-place, 905 // which can require a register-register move 906 // to preserve the original value, 907 // so it must be used with care. 908 (MUL(Q|L)const [-9] x) => (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x)) 909 (MUL(Q|L)const [-5] x) => (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x)) 910 (MUL(Q|L)const [-3] x) => (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x)) 911 (MUL(Q|L)const [-1] x) => (NEG(Q|L) x) 912 (MUL(Q|L)const [ 0] _) => (MOV(Q|L)const [0]) 913 (MUL(Q|L)const [ 1] x) => x 914 (MUL(Q|L)const [ 3] x) => (LEA(Q|L)2 x x) 915 (MUL(Q|L)const [ 5] x) => (LEA(Q|L)4 x x) 916 (MUL(Q|L)const [ 7] x) => (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x)) 917 (MUL(Q|L)const [ 9] x) => (LEA(Q|L)8 x x) 918 (MUL(Q|L)const [11] x) => (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x)) 919 (MUL(Q|L)const [13] x) => (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x)) 920 (MUL(Q|L)const [19] x) => (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x)) 921 (MUL(Q|L)const [21] x) => (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x)) 922 (MUL(Q|L)const [25] x) => (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x)) 923 (MUL(Q|L)const [27] x) => (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x)) 924 (MUL(Q|L)const [37] x) => (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x)) 925 (MUL(Q|L)const [41] x) => (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x)) 926 (MUL(Q|L)const [45] x) => (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x)) 927 (MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) 928 (MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) 929 930 (MUL(Q|L)const [c] x) && isPowerOfTwo64(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const <v.Type> [int8(log64(int64(c)+1))] x) x) 931 (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [int8(log32(c-1))] x) x) 932 (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [int8(log32(c-2))] x) x) 933 (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [int8(log32(c-4))] x) x) 934 (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-8) && c >= 136 => (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [int8(log32(c-8))] x) x) 935 (MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo32(c/3) => (SHL(Q|L)const [int8(log32(c/3))] (LEA(Q|L)2 <v.Type> x x)) 936 (MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo32(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 <v.Type> x x)) 937 (MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo32(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 <v.Type> x x)) 938 939 // combine add/shift into LEAQ/LEAL 940 (ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y) 941 (ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y) 942 (ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y) 943 (ADD(L|Q) x (ADD(L|Q) y y)) => (LEA(L|Q)2 x y) 944 (ADD(L|Q) x (ADD(L|Q) x y)) => (LEA(L|Q)2 y x) 945 946 // combine ADDQ/ADDQconst into LEAQ1/LEAL1 947 (ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y) 948 (ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y) 949 (ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x) 950 951 // fold ADDQ/ADDL into LEAQ/LEAL 952 (ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x) 953 (LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x) 954 (LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y) 955 (ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y) 956 957 // fold ADDQconst/ADDLconst into LEAQx/LEALx 958 (ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)1 [c+d] {s} x y) 959 (ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)2 [c+d] {s} x y) 960 (ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)4 [c+d] {s} x y) 961 (ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)8 [c+d] {s} x y) 962 (LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)1 [c+d] {s} x y) 963 (LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)2 [c+d] {s} x y) 964 (LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB => (LEA(Q|L)2 [c+2*d] {s} x y) 965 (LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)4 [c+d] {s} x y) 966 (LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB => (LEA(Q|L)4 [c+4*d] {s} x y) 967 (LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)8 [c+d] {s} x y) 968 (LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y) 969 970 // fold shifts into LEAQx/LEALx 971 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y) 972 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y) 973 (LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y) 974 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y) 975 (LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y) 976 (LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y) 977 978 // reverse ordering of compare instruction 979 (SETL (InvertFlags x)) => (SETG x) 980 (SETG (InvertFlags x)) => (SETL x) 981 (SETB (InvertFlags x)) => (SETA x) 982 (SETA (InvertFlags x)) => (SETB x) 983 (SETLE (InvertFlags x)) => (SETGE x) 984 (SETGE (InvertFlags x)) => (SETLE x) 985 (SETBE (InvertFlags x)) => (SETAE x) 986 (SETAE (InvertFlags x)) => (SETBE x) 987 (SETEQ (InvertFlags x)) => (SETEQ x) 988 (SETNE (InvertFlags x)) => (SETNE x) 989 990 (SETLstore [off] {sym} ptr (InvertFlags x) mem) => (SETGstore [off] {sym} ptr x mem) 991 (SETGstore [off] {sym} ptr (InvertFlags x) mem) => (SETLstore [off] {sym} ptr x mem) 992 (SETBstore [off] {sym} ptr (InvertFlags x) mem) => (SETAstore [off] {sym} ptr x mem) 993 (SETAstore [off] {sym} ptr (InvertFlags x) mem) => (SETBstore [off] {sym} ptr x mem) 994 (SETLEstore [off] {sym} ptr (InvertFlags x) mem) => (SETGEstore [off] {sym} ptr x mem) 995 (SETGEstore [off] {sym} ptr (InvertFlags x) mem) => (SETLEstore [off] {sym} ptr x mem) 996 (SETBEstore [off] {sym} ptr (InvertFlags x) mem) => (SETAEstore [off] {sym} ptr x mem) 997 (SETAEstore [off] {sym} ptr (InvertFlags x) mem) => (SETBEstore [off] {sym} ptr x mem) 998 (SETEQstore [off] {sym} ptr (InvertFlags x) mem) => (SETEQstore [off] {sym} ptr x mem) 999 (SETNEstore [off] {sym} ptr (InvertFlags x) mem) => (SETNEstore [off] {sym} ptr x mem) 1000 1001 // sign extended loads 1002 // Note: The combined instruction must end up in the same block 1003 // as the original load. If not, we end up making a value with 1004 // memory type live in two different blocks, which can lead to 1005 // multiple memory values alive simultaneously. 1006 // Make sure we don't combine these ops if the load has another use. 1007 // This prevents a single load from being split into multiple loads 1008 // which then might return different values. See test/atomicload.go. 1009 (MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1010 (MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1011 (MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1012 (MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem) 1013 (MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1014 (MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1015 (MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1016 (MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem) 1017 (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1018 (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1019 (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem) 1020 (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1021 (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1022 (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem) 1023 (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1024 (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem) 1025 (MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1026 (MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem) 1027 1028 (MOVLQZX x) && zeroUpper32Bits(x,3) => x 1029 (MOVWQZX x) && zeroUpper48Bits(x,3) => x 1030 (MOVBQZX x) && zeroUpper56Bits(x,3) => x 1031 1032 // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) 1033 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQZX x) 1034 (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQZX x) 1035 (MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQZX x) 1036 (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x 1037 (MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQSX x) 1038 (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQSX x) 1039 (MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQSX x) 1040 1041 // Fold extensions and ANDs together. 1042 (MOVBQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xff] x) 1043 (MOVWQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xffff] x) 1044 (MOVLQZX (ANDLconst [c] x)) => (ANDLconst [c] x) 1045 (MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 => (ANDLconst [c & 0x7f] x) 1046 (MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 => (ANDLconst [c & 0x7fff] x) 1047 (MOVLQSX (ANDLconst [c] x)) && uint32(c) & 0x80000000 == 0 => (ANDLconst [c & 0x7fffffff] x) 1048 1049 // Don't extend before storing 1050 (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) => (MOVLstore [off] {sym} ptr x mem) 1051 (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) => (MOVWstore [off] {sym} ptr x mem) 1052 (MOVBstore [off] {sym} ptr (MOVBQSX x) mem) => (MOVBstore [off] {sym} ptr x mem) 1053 (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) => (MOVLstore [off] {sym} ptr x mem) 1054 (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) => (MOVWstore [off] {sym} ptr x mem) 1055 (MOVBstore [off] {sym} ptr (MOVBQZX x) mem) => (MOVBstore [off] {sym} ptr x mem) 1056 1057 // fold constants into memory operations 1058 // Note that this is not always a good idea because if not all the uses of 1059 // the ADDQconst get eliminated, we still have to compute the ADDQconst and we now 1060 // have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one. 1061 // Nevertheless, let's do it! 1062 (MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => 1063 (MOV(Q|L|W|B|SS|SD|O)load [off1+off2] {sym} ptr mem) 1064 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => 1065 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {sym} ptr val mem) 1066 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => 1067 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem) 1068 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => 1069 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem) 1070 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => 1071 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem) 1072 (CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => 1073 (CMP(Q|L|W|B)load [off1+off2] {sym} base val mem) 1074 (CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => 1075 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) 1076 1077 ((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => 1078 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) 1079 ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => 1080 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) 1081 ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => 1082 ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) 1083 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => 1084 ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) 1085 ((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => 1086 ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem) 1087 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => 1088 ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem) 1089 1090 // Fold constants into stores. 1091 (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validVal(c) => 1092 (MOVQstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) 1093 (MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) => 1094 (MOVLstoreconst [makeValAndOff(int32(c),off)] {sym} ptr mem) 1095 (MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) => 1096 (MOVWstoreconst [makeValAndOff(int32(int16(c)),off)] {sym} ptr mem) 1097 (MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) => 1098 (MOVBstoreconst [makeValAndOff(int32(int8(c)),off)] {sym} ptr mem) 1099 1100 // Fold address offsets into constant stores. 1101 (MOV(Q|L|W|B|O)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd32(off) => 1102 (MOV(Q|L|W|B|O)storeconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem) 1103 1104 // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows 1105 // what variables are being read/written by the ops. 1106 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem) 1107 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1108 (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem) 1109 (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1110 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1111 (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1112 (MOV(Q|L|W|B|O)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) => 1113 (MOV(Q|L|W|B|O)storeconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem) 1114 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1115 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1116 (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1117 ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1118 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1119 ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1120 ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1121 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1122 ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1123 (CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1124 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1125 (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1126 (CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1127 && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => 1128 (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) 1129 1130 ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1131 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1132 ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1133 ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) 1134 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1135 ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) 1136 ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1137 && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => 1138 ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) 1139 ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) 1140 && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => 1141 ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) 1142 ((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1143 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1144 ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1145 ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) 1146 && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1147 ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) 1148 1149 // fold LEAQs together 1150 (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1151 (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) 1152 1153 // LEAQ into LEAQ1 1154 (LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => 1155 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1156 1157 // LEAQ1 into LEAQ 1158 (LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1159 (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) 1160 1161 // LEAQ into LEAQ[248] 1162 (LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => 1163 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1164 (LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => 1165 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1166 (LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => 1167 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1168 1169 // LEAQ[248] into LEAQ 1170 (LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1171 (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) 1172 (LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1173 (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) 1174 (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1175 (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) 1176 1177 // LEAQ[1248] into LEAQ[1248]. Only some such merges are possible. 1178 (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1179 (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y) 1180 (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 1181 (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x) 1182 (LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil => 1183 (LEAQ4 [off1+2*off2] {sym1} x y) 1184 (LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil => 1185 (LEAQ8 [off1+4*off2] {sym1} x y) 1186 // TODO: more? 1187 1188 // Lower LEAQ2/4/8 when the offset is a constant 1189 (LEAQ2 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*2) => 1190 (LEAQ [off+int32(scale)*2] {sym} x) 1191 (LEAQ4 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*4) => 1192 (LEAQ [off+int32(scale)*4] {sym} x) 1193 (LEAQ8 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*8) => 1194 (LEAQ [off+int32(scale)*8] {sym} x) 1195 1196 // Absorb InvertFlags into branches. 1197 (LT (InvertFlags cmp) yes no) => (GT cmp yes no) 1198 (GT (InvertFlags cmp) yes no) => (LT cmp yes no) 1199 (LE (InvertFlags cmp) yes no) => (GE cmp yes no) 1200 (GE (InvertFlags cmp) yes no) => (LE cmp yes no) 1201 (ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) 1202 (UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) 1203 (ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) 1204 (UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) 1205 (EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) 1206 (NE (InvertFlags cmp) yes no) => (NE cmp yes no) 1207 1208 // Constant comparisons. 1209 (CMPQconst (MOVQconst [x]) [y]) && x==int64(y) => (FlagEQ) 1210 (CMPQconst (MOVQconst [x]) [y]) && x<int64(y) && uint64(x)<uint64(int64(y)) => (FlagLT_ULT) 1211 (CMPQconst (MOVQconst [x]) [y]) && x<int64(y) && uint64(x)>uint64(int64(y)) => (FlagLT_UGT) 1212 (CMPQconst (MOVQconst [x]) [y]) && x>int64(y) && uint64(x)<uint64(int64(y)) => (FlagGT_ULT) 1213 (CMPQconst (MOVQconst [x]) [y]) && x>int64(y) && uint64(x)>uint64(int64(y)) => (FlagGT_UGT) 1214 (CMPLconst (MOVLconst [x]) [y]) && x==y => (FlagEQ) 1215 (CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)<uint32(y) => (FlagLT_ULT) 1216 (CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)>uint32(y) => (FlagLT_UGT) 1217 (CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)<uint32(y) => (FlagGT_ULT) 1218 (CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)>uint32(y) => (FlagGT_UGT) 1219 (CMPWconst (MOVLconst [x]) [y]) && int16(x)==y => (FlagEQ) 1220 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)<uint16(y) => (FlagLT_ULT) 1221 (CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)>uint16(y) => (FlagLT_UGT) 1222 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)<uint16(y) => (FlagGT_ULT) 1223 (CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)>uint16(y) => (FlagGT_UGT) 1224 (CMPBconst (MOVLconst [x]) [y]) && int8(x)==y => (FlagEQ) 1225 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)<uint8(y) => (FlagLT_ULT) 1226 (CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)>uint8(y) => (FlagLT_UGT) 1227 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)<uint8(y) => (FlagGT_ULT) 1228 (CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)>uint8(y) => (FlagGT_UGT) 1229 1230 // CMPQconst requires a 32 bit const, but we can still constant-fold 64 bit consts. 1231 // In theory this applies to any of the simplifications above, 1232 // but CMPQ is the only one I've actually seen occur. 1233 (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x==y => (FlagEQ) 1234 (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)<uint64(y) => (FlagLT_ULT) 1235 (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)>uint64(y) => (FlagLT_UGT) 1236 (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)<uint64(y) => (FlagGT_ULT) 1237 (CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)>uint64(y) => (FlagGT_UGT) 1238 1239 // Other known comparisons. 1240 (CMPQconst (MOVBQZX _) [c]) && 0xFF < c => (FlagLT_ULT) 1241 (CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c => (FlagLT_ULT) 1242 (CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) => (FlagLT_ULT) 1243 (CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) => (FlagLT_ULT) 1244 (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) 1245 (CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) 1246 (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) 1247 (CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < n => (FlagLT_ULT) 1248 (CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < n => (FlagLT_ULT) 1249 1250 // TESTQ c c sets flags like CMPQ c 0. 1251 (TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c == 0 => (FlagEQ) 1252 (TESTLconst [c] (MOVLconst [c])) && c == 0 => (FlagEQ) 1253 (TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c < 0 => (FlagLT_UGT) 1254 (TESTLconst [c] (MOVLconst [c])) && c < 0 => (FlagLT_UGT) 1255 (TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c > 0 => (FlagGT_UGT) 1256 (TESTLconst [c] (MOVLconst [c])) && c > 0 => (FlagGT_UGT) 1257 1258 // TODO: DIVxU also. 1259 1260 // Absorb flag constants into SBB ops. 1261 (SBBQcarrymask (FlagEQ)) => (MOVQconst [0]) 1262 (SBBQcarrymask (FlagLT_ULT)) => (MOVQconst [-1]) 1263 (SBBQcarrymask (FlagLT_UGT)) => (MOVQconst [0]) 1264 (SBBQcarrymask (FlagGT_ULT)) => (MOVQconst [-1]) 1265 (SBBQcarrymask (FlagGT_UGT)) => (MOVQconst [0]) 1266 (SBBLcarrymask (FlagEQ)) => (MOVLconst [0]) 1267 (SBBLcarrymask (FlagLT_ULT)) => (MOVLconst [-1]) 1268 (SBBLcarrymask (FlagLT_UGT)) => (MOVLconst [0]) 1269 (SBBLcarrymask (FlagGT_ULT)) => (MOVLconst [-1]) 1270 (SBBLcarrymask (FlagGT_UGT)) => (MOVLconst [0]) 1271 1272 // Absorb flag constants into branches. 1273 ((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) => (First yes no) 1274 ((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) => (First no yes) 1275 ((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) => (First yes no) 1276 ((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) => (First no yes) 1277 ((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) => (First yes no) 1278 ((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) => (First no yes) 1279 ((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) => (First yes no) 1280 ((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) => (First no yes) 1281 ((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) => (First yes no) 1282 ((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) => (First no yes) 1283 1284 // Absorb flag constants into SETxx ops. 1285 ((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) => (MOVLconst [1]) 1286 ((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) => (MOVLconst [0]) 1287 ((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) => (MOVLconst [1]) 1288 ((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) => (MOVLconst [0]) 1289 ((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) => (MOVLconst [1]) 1290 ((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) => (MOVLconst [0]) 1291 ((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) => (MOVLconst [1]) 1292 ((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) => (MOVLconst [0]) 1293 ((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) => (MOVLconst [1]) 1294 ((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) => (MOVLconst [0]) 1295 1296 (SETEQstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1297 (SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1298 (SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1299 (SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1300 (SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1301 1302 (SETNEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1303 (SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1304 (SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1305 (SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1306 (SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1307 1308 (SETLstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1309 (SETLstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1310 (SETLstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1311 (SETLstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1312 (SETLstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1313 1314 (SETLEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1315 (SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1316 (SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1317 (SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1318 (SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1319 1320 (SETGstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1321 (SETGstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1322 (SETGstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1323 (SETGstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1324 (SETGstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1325 1326 (SETGEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1327 (SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1328 (SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1329 (SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1330 (SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1331 1332 (SETBstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1333 (SETBstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1334 (SETBstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1335 (SETBstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1336 (SETBstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1337 1338 (SETBEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1339 (SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1340 (SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1341 (SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1342 (SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1343 1344 (SETAstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1345 (SETAstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1346 (SETAstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1347 (SETAstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1348 (SETAstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1349 1350 (SETAEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1351 (SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1352 (SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1353 (SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) 1354 (SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) 1355 1356 // Remove redundant *const ops 1357 (ADDQconst [0] x) => x 1358 (ADDLconst [c] x) && c==0 => x 1359 (SUBQconst [0] x) => x 1360 (SUBLconst [c] x) && c==0 => x 1361 (ANDQconst [0] _) => (MOVQconst [0]) 1362 (ANDLconst [c] _) && c==0 => (MOVLconst [0]) 1363 (ANDQconst [-1] x) => x 1364 (ANDLconst [c] x) && c==-1 => x 1365 (ORQconst [0] x) => x 1366 (ORLconst [c] x) && c==0 => x 1367 (ORQconst [-1] _) => (MOVQconst [-1]) 1368 (ORLconst [c] _) && c==-1 => (MOVLconst [-1]) 1369 (XORQconst [0] x) => x 1370 (XORLconst [c] x) && c==0 => x 1371 // TODO: since we got rid of the W/B versions, we might miss 1372 // things like (ANDLconst [0x100] x) which were formerly 1373 // (ANDBconst [0] x). Probably doesn't happen very often. 1374 // If we cared, we might do: 1375 // (ANDLconst <t> [c] x) && t.Size()==1 && int8(x)==0 -> (MOVLconst [0]) 1376 1377 // Remove redundant ops 1378 // Not in generic rules, because they may appear after lowering e. g. Slicemask 1379 (NEG(Q|L) (NEG(Q|L) x)) => x 1380 (NEG(Q|L) s:(SUB(Q|L) x y)) && s.Uses == 1 => (SUB(Q|L) y x) 1381 1382 // Convert constant subtracts to constant adds 1383 (SUBQconst [c] x) && c != -(1<<31) => (ADDQconst [-c] x) 1384 (SUBLconst [c] x) => (ADDLconst [-c] x) 1385 1386 // generic constant folding 1387 // TODO: more of this 1388 (ADDQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)+d]) 1389 (ADDLconst [c] (MOVLconst [d])) => (MOVLconst [c+d]) 1390 (ADDQconst [c] (ADDQconst [d] x)) && is32Bit(int64(c)+int64(d)) => (ADDQconst [c+d] x) 1391 (ADDLconst [c] (ADDLconst [d] x)) => (ADDLconst [c+d] x) 1392 (SUBQconst (MOVQconst [d]) [c]) => (MOVQconst [d-int64(c)]) 1393 (SUBQconst (SUBQconst x [d]) [c]) && is32Bit(int64(-c)-int64(d)) => (ADDQconst [-c-d] x) 1394 (SARQconst [c] (MOVQconst [d])) => (MOVQconst [d>>uint64(c)]) 1395 (SARLconst [c] (MOVQconst [d])) => (MOVQconst [int64(int32(d))>>uint64(c)]) 1396 (SARWconst [c] (MOVQconst [d])) => (MOVQconst [int64(int16(d))>>uint64(c)]) 1397 (SARBconst [c] (MOVQconst [d])) => (MOVQconst [int64(int8(d))>>uint64(c)]) 1398 (NEGQ (MOVQconst [c])) => (MOVQconst [-c]) 1399 (NEGL (MOVLconst [c])) => (MOVLconst [-c]) 1400 (MULQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)*d]) 1401 (MULLconst [c] (MOVLconst [d])) => (MOVLconst [c*d]) 1402 (ANDQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)&d]) 1403 (ANDLconst [c] (MOVLconst [d])) => (MOVLconst [c&d]) 1404 (ORQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)|d]) 1405 (ORLconst [c] (MOVLconst [d])) => (MOVLconst [c|d]) 1406 (XORQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)^d]) 1407 (XORLconst [c] (MOVLconst [d])) => (MOVLconst [c^d]) 1408 (NOTQ (MOVQconst [c])) => (MOVQconst [^c]) 1409 (NOTL (MOVLconst [c])) => (MOVLconst [^c]) 1410 (BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1<<uint32(c))]) 1411 (BTSLconst [c] (MOVLconst [d])) => (MOVLconst [d|(1<<uint32(c))]) 1412 (BTRQconst [c] (MOVQconst [d])) => (MOVQconst [d&^(1<<uint32(c))]) 1413 (BTRLconst [c] (MOVLconst [d])) => (MOVLconst [d&^(1<<uint32(c))]) 1414 (BTCQconst [c] (MOVQconst [d])) => (MOVQconst [d^(1<<uint32(c))]) 1415 (BTCLconst [c] (MOVLconst [d])) => (MOVLconst [d^(1<<uint32(c))]) 1416 1417 // If c or d doesn't fit into 32 bits, then we can't construct ORQconst, 1418 // but we can still constant-fold. 1419 // In theory this applies to any of the simplifications above, 1420 // but ORQ is the only one I've actually seen occur. 1421 (ORQ (MOVQconst [c]) (MOVQconst [d])) => (MOVQconst [c|d]) 1422 1423 // generic simplifications 1424 // TODO: more of this 1425 (ADDQ x (NEGQ y)) => (SUBQ x y) 1426 (ADDL x (NEGL y)) => (SUBL x y) 1427 (SUBQ x x) => (MOVQconst [0]) 1428 (SUBL x x) => (MOVLconst [0]) 1429 (ANDQ x x) => x 1430 (ANDL x x) => x 1431 (ORQ x x) => x 1432 (ORL x x) => x 1433 (XORQ x x) => (MOVQconst [0]) 1434 (XORL x x) => (MOVLconst [0]) 1435 1436 (SHLLconst [d] (MOVLconst [c])) => (MOVLconst [c << uint64(d)]) 1437 (SHLQconst [d] (MOVQconst [c])) => (MOVQconst [c << uint64(d)]) 1438 (SHLQconst [d] (MOVLconst [c])) => (MOVQconst [int64(c) << uint64(d)]) 1439 1440 // Fold NEG into ADDconst/MULconst. Take care to keep c in 32 bit range. 1441 (NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) => (ADDQconst [-c] x) 1442 (MULQconst [c] (NEGQ x)) && c != -(1<<31) => (MULQconst [-c] x) 1443 1444 // checking AND against 0. 1445 (CMPQconst a:(ANDQ x y) [0]) && a.Uses == 1 => (TESTQ x y) 1446 (CMPLconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTL x y) 1447 (CMPWconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTW x y) 1448 (CMPBconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTB x y) 1449 (CMPQconst a:(ANDQconst [c] x) [0]) && a.Uses == 1 => (TESTQconst [c] x) 1450 (CMPLconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTLconst [c] x) 1451 (CMPWconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTWconst [int16(c)] x) 1452 (CMPBconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTBconst [int8(c)] x) 1453 1454 // Convert TESTx to TESTxconst if possible. 1455 (TESTQ (MOVQconst [c]) x) && is32Bit(c) => (TESTQconst [int32(c)] x) 1456 (TESTL (MOVLconst [c]) x) => (TESTLconst [c] x) 1457 (TESTW (MOVLconst [c]) x) => (TESTWconst [int16(c)] x) 1458 (TESTB (MOVLconst [c]) x) => (TESTBconst [int8(c)] x) 1459 1460 // TEST %reg,%reg is shorter than CMP 1461 (CMPQconst x [0]) => (TESTQ x x) 1462 (CMPLconst x [0]) => (TESTL x x) 1463 (CMPWconst x [0]) => (TESTW x x) 1464 (CMPBconst x [0]) => (TESTB x x) 1465 (TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst => (TESTQ x x) 1466 (TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTL x x) 1467 (TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTW x x) 1468 (TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTB x x) 1469 1470 // Convert LEAQ1 back to ADDQ if we can 1471 (LEAQ1 [0] x y) && v.Aux == nil => (ADDQ x y) 1472 1473 // Combining byte loads into larger (unaligned) loads. 1474 // There are many ways these combinations could occur. This is 1475 // designed to match the way encoding/binary.LittleEndian does it. 1476 1477 // Little-endian loads 1478 1479 (OR(L|Q) x0:(MOVBload [i0] {s} p mem) 1480 sh:(SHL(L|Q)const [8] x1:(MOVBload [i1] {s} p mem))) 1481 && i1 == i0+1 1482 && x0.Uses == 1 1483 && x1.Uses == 1 1484 && sh.Uses == 1 1485 && mergePoint(b,x0,x1) != nil 1486 && clobber(x0, x1, sh) 1487 => @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) 1488 1489 (OR(L|Q) x0:(MOVBload [i] {s} p0 mem) 1490 sh:(SHL(L|Q)const [8] x1:(MOVBload [i] {s} p1 mem))) 1491 && x0.Uses == 1 1492 && x1.Uses == 1 1493 && sh.Uses == 1 1494 && sequentialAddresses(p0, p1, 1) 1495 && mergePoint(b,x0,x1) != nil 1496 && clobber(x0, x1, sh) 1497 => @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) 1498 1499 (OR(L|Q) x0:(MOVWload [i0] {s} p mem) 1500 sh:(SHL(L|Q)const [16] x1:(MOVWload [i1] {s} p mem))) 1501 && i1 == i0+2 1502 && x0.Uses == 1 1503 && x1.Uses == 1 1504 && sh.Uses == 1 1505 && mergePoint(b,x0,x1) != nil 1506 && clobber(x0, x1, sh) 1507 => @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) 1508 1509 (OR(L|Q) x0:(MOVWload [i] {s} p0 mem) 1510 sh:(SHL(L|Q)const [16] x1:(MOVWload [i] {s} p1 mem))) 1511 && x0.Uses == 1 1512 && x1.Uses == 1 1513 && sh.Uses == 1 1514 && sequentialAddresses(p0, p1, 2) 1515 && mergePoint(b,x0,x1) != nil 1516 && clobber(x0, x1, sh) 1517 => @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) 1518 1519 (ORQ x0:(MOVLload [i0] {s} p mem) 1520 sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) 1521 && i1 == i0+4 1522 && x0.Uses == 1 1523 && x1.Uses == 1 1524 && sh.Uses == 1 1525 && mergePoint(b,x0,x1) != nil 1526 && clobber(x0, x1, sh) 1527 => @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) 1528 1529 (ORQ x0:(MOVLload [i] {s} p0 mem) 1530 sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem))) 1531 && x0.Uses == 1 1532 && x1.Uses == 1 1533 && sh.Uses == 1 1534 && sequentialAddresses(p0, p1, 4) 1535 && mergePoint(b,x0,x1) != nil 1536 && clobber(x0, x1, sh) 1537 => @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem) 1538 1539 (OR(L|Q) 1540 s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem)) 1541 or:(OR(L|Q) 1542 s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem)) 1543 y)) 1544 && i1 == i0+1 1545 && j1 == j0+8 1546 && j0 % 16 == 0 1547 && x0.Uses == 1 1548 && x1.Uses == 1 1549 && s0.Uses == 1 1550 && s1.Uses == 1 1551 && or.Uses == 1 1552 && mergePoint(b,x0,x1,y) != nil 1553 && clobber(x0, x1, s0, s1, or) 1554 => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) 1555 1556 (OR(L|Q) 1557 s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem)) 1558 or:(OR(L|Q) 1559 s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem)) 1560 y)) 1561 && j1 == j0+8 1562 && j0 % 16 == 0 1563 && x0.Uses == 1 1564 && x1.Uses == 1 1565 && s0.Uses == 1 1566 && s1.Uses == 1 1567 && or.Uses == 1 1568 && sequentialAddresses(p0, p1, 1) 1569 && mergePoint(b,x0,x1,y) != nil 1570 && clobber(x0, x1, s0, s1, or) 1571 => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y) 1572 1573 (ORQ 1574 s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) 1575 or:(ORQ 1576 s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) 1577 y)) 1578 && i1 == i0+2 1579 && j1 == j0+16 1580 && j0 % 32 == 0 1581 && x0.Uses == 1 1582 && x1.Uses == 1 1583 && s0.Uses == 1 1584 && s1.Uses == 1 1585 && or.Uses == 1 1586 && mergePoint(b,x0,x1,y) != nil 1587 && clobber(x0, x1, s0, s1, or) 1588 => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) 1589 1590 (ORQ 1591 s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) 1592 or:(ORQ 1593 s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem)) 1594 y)) 1595 && j1 == j0+16 1596 && j0 % 32 == 0 1597 && x0.Uses == 1 1598 && x1.Uses == 1 1599 && s0.Uses == 1 1600 && s1.Uses == 1 1601 && or.Uses == 1 1602 && sequentialAddresses(p0, p1, 2) 1603 && mergePoint(b,x0,x1,y) != nil 1604 && clobber(x0, x1, s0, s1, or) 1605 => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y) 1606 1607 // Big-endian loads 1608 1609 (OR(L|Q) 1610 x1:(MOVBload [i1] {s} p mem) 1611 sh:(SHL(L|Q)const [8] x0:(MOVBload [i0] {s} p mem))) 1612 && i1 == i0+1 1613 && x0.Uses == 1 1614 && x1.Uses == 1 1615 && sh.Uses == 1 1616 && mergePoint(b,x0,x1) != nil 1617 && clobber(x0, x1, sh) 1618 => @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) 1619 1620 (OR(L|Q) 1621 x1:(MOVBload [i] {s} p1 mem) 1622 sh:(SHL(L|Q)const [8] x0:(MOVBload [i] {s} p0 mem))) 1623 && x0.Uses == 1 1624 && x1.Uses == 1 1625 && sh.Uses == 1 1626 && sequentialAddresses(p0, p1, 1) 1627 && mergePoint(b,x0,x1) != nil 1628 && clobber(x0, x1, sh) 1629 => @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem)) 1630 1631 (OR(L|Q) 1632 r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) 1633 sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) 1634 && i1 == i0+2 1635 && x0.Uses == 1 1636 && x1.Uses == 1 1637 && r0.Uses == 1 1638 && r1.Uses == 1 1639 && sh.Uses == 1 1640 && mergePoint(b,x0,x1) != nil 1641 && clobber(x0, x1, r0, r1, sh) 1642 => @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) 1643 1644 (OR(L|Q) 1645 r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) 1646 sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))) 1647 && x0.Uses == 1 1648 && x1.Uses == 1 1649 && r0.Uses == 1 1650 && r1.Uses == 1 1651 && sh.Uses == 1 1652 && sequentialAddresses(p0, p1, 2) 1653 && mergePoint(b,x0,x1) != nil 1654 && clobber(x0, x1, r0, r1, sh) 1655 => @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem)) 1656 1657 (ORQ 1658 r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) 1659 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) 1660 && i1 == i0+4 1661 && x0.Uses == 1 1662 && x1.Uses == 1 1663 && r0.Uses == 1 1664 && r1.Uses == 1 1665 && sh.Uses == 1 1666 && mergePoint(b,x0,x1) != nil 1667 && clobber(x0, x1, r0, r1, sh) 1668 => @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) 1669 1670 (ORQ 1671 r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) 1672 sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem)))) 1673 && x0.Uses == 1 1674 && x1.Uses == 1 1675 && r0.Uses == 1 1676 && r1.Uses == 1 1677 && sh.Uses == 1 1678 && sequentialAddresses(p0, p1, 4) 1679 && mergePoint(b,x0,x1) != nil 1680 && clobber(x0, x1, r0, r1, sh) 1681 => @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem)) 1682 1683 (OR(L|Q) 1684 s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem)) 1685 or:(OR(L|Q) 1686 s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem)) 1687 y)) 1688 && i1 == i0+1 1689 && j1 == j0-8 1690 && j1 % 16 == 0 1691 && x0.Uses == 1 1692 && x1.Uses == 1 1693 && s0.Uses == 1 1694 && s1.Uses == 1 1695 && or.Uses == 1 1696 && mergePoint(b,x0,x1,y) != nil 1697 && clobber(x0, x1, s0, s1, or) 1698 => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) 1699 1700 (OR(L|Q) 1701 s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem)) 1702 or:(OR(L|Q) 1703 s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem)) 1704 y)) 1705 && j1 == j0-8 1706 && j1 % 16 == 0 1707 && x0.Uses == 1 1708 && x1.Uses == 1 1709 && s0.Uses == 1 1710 && s1.Uses == 1 1711 && or.Uses == 1 1712 && sequentialAddresses(p0, p1, 1) 1713 && mergePoint(b,x0,x1,y) != nil 1714 && clobber(x0, x1, s0, s1, or) 1715 => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y) 1716 1717 (ORQ 1718 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) 1719 or:(ORQ 1720 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) 1721 y)) 1722 && i1 == i0+2 1723 && j1 == j0-16 1724 && j1 % 32 == 0 1725 && x0.Uses == 1 1726 && x1.Uses == 1 1727 && r0.Uses == 1 1728 && r1.Uses == 1 1729 && s0.Uses == 1 1730 && s1.Uses == 1 1731 && or.Uses == 1 1732 && mergePoint(b,x0,x1,y) != nil 1733 && clobber(x0, x1, r0, r1, s0, s1, or) 1734 => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) 1735 1736 (ORQ 1737 s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) 1738 or:(ORQ 1739 s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))) 1740 y)) 1741 && j1 == j0-16 1742 && j1 % 32 == 0 1743 && x0.Uses == 1 1744 && x1.Uses == 1 1745 && r0.Uses == 1 1746 && r1.Uses == 1 1747 && s0.Uses == 1 1748 && s1.Uses == 1 1749 && or.Uses == 1 1750 && sequentialAddresses(p0, p1, 2) 1751 && mergePoint(b,x0,x1,y) != nil 1752 && clobber(x0, x1, r0, r1, s0, s1, or) 1753 => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y) 1754 1755 // Combine 2 byte stores + shift into rolw 8 + word store 1756 (MOVBstore [i] {s} p w 1757 x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) 1758 && x0.Uses == 1 1759 && clobber(x0) 1760 => (MOVWstore [i-1] {s} p (ROLWconst <typ.UInt16> [8] w) mem) 1761 (MOVBstore [i] {s} p1 w 1762 x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem)) 1763 && x0.Uses == 1 1764 && sequentialAddresses(p0, p1, 1) 1765 && clobber(x0) 1766 => (MOVWstore [i] {s} p0 (ROLWconst <typ.UInt16> [8] w) mem) 1767 1768 // Combine stores + shifts into bswap and larger (unaligned) stores 1769 (MOVBstore [i] {s} p w 1770 x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) 1771 x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) 1772 x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) 1773 && x0.Uses == 1 1774 && x1.Uses == 1 1775 && x2.Uses == 1 1776 && clobber(x0, x1, x2) 1777 => (MOVLstore [i-3] {s} p (BSWAPL <typ.UInt32> w) mem) 1778 (MOVBstore [i] {s} p3 w 1779 x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) 1780 x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) 1781 x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem)))) 1782 && x0.Uses == 1 1783 && x1.Uses == 1 1784 && x2.Uses == 1 1785 && sequentialAddresses(p0, p1, 1) 1786 && sequentialAddresses(p1, p2, 1) 1787 && sequentialAddresses(p2, p3, 1) 1788 && clobber(x0, x1, x2) 1789 => (MOVLstore [i] {s} p0 (BSWAPL <typ.UInt32> w) mem) 1790 1791 (MOVBstore [i] {s} p w 1792 x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) 1793 x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) 1794 x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) 1795 x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) 1796 x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) 1797 x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) 1798 x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) 1799 && x0.Uses == 1 1800 && x1.Uses == 1 1801 && x2.Uses == 1 1802 && x3.Uses == 1 1803 && x4.Uses == 1 1804 && x5.Uses == 1 1805 && x6.Uses == 1 1806 && clobber(x0, x1, x2, x3, x4, x5, x6) 1807 => (MOVQstore [i-7] {s} p (BSWAPQ <typ.UInt64> w) mem) 1808 (MOVBstore [i] {s} p7 w 1809 x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) 1810 x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) 1811 x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w) 1812 x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w) 1813 x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w) 1814 x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w) 1815 x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem)))))))) 1816 && x0.Uses == 1 1817 && x1.Uses == 1 1818 && x2.Uses == 1 1819 && x3.Uses == 1 1820 && x4.Uses == 1 1821 && x5.Uses == 1 1822 && x6.Uses == 1 1823 && sequentialAddresses(p0, p1, 1) 1824 && sequentialAddresses(p1, p2, 1) 1825 && sequentialAddresses(p2, p3, 1) 1826 && sequentialAddresses(p3, p4, 1) 1827 && sequentialAddresses(p4, p5, 1) 1828 && sequentialAddresses(p5, p6, 1) 1829 && sequentialAddresses(p6, p7, 1) 1830 && clobber(x0, x1, x2, x3, x4, x5, x6) 1831 => (MOVQstore [i] {s} p0 (BSWAPQ <typ.UInt64> w) mem) 1832 1833 // Combine constant stores into larger (unaligned) stores. 1834 (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) 1835 && x.Uses == 1 1836 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) 1837 && clobber(x) 1838 => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem) 1839 (MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem)) 1840 && x.Uses == 1 1841 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) 1842 && clobber(x) 1843 => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem) 1844 (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) 1845 && x.Uses == 1 1846 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) 1847 && clobber(x) 1848 => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem) 1849 (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem)) 1850 && x.Uses == 1 1851 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) 1852 && clobber(x) 1853 => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem) 1854 (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) 1855 && x.Uses == 1 1856 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) 1857 && clobber(x) 1858 => (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) 1859 (MOVLstoreconst [a] {s} p0 x:(MOVLstoreconst [c] {s} p1 mem)) 1860 && x.Uses == 1 1861 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) 1862 && clobber(x) 1863 => (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) 1864 (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem)) 1865 && config.useSSE 1866 && x.Uses == 1 1867 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) 1868 && a.Val() == 0 1869 && c.Val() == 0 1870 && clobber(x) 1871 => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) 1872 (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) 1873 && config.useSSE 1874 && x.Uses == 1 1875 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) 1876 && a.Val() == 0 1877 && c.Val() == 0 1878 && clobber(x) 1879 => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) 1880 1881 // Combine stores into larger (unaligned) stores. Little endian. 1882 (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) 1883 && x.Uses == 1 1884 && clobber(x) 1885 => (MOVWstore [i-1] {s} p w mem) 1886 (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) 1887 && x.Uses == 1 1888 && clobber(x) 1889 => (MOVWstore [i] {s} p w mem) 1890 (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) 1891 && x.Uses == 1 1892 && clobber(x) 1893 => (MOVWstore [i-1] {s} p w0 mem) 1894 (MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i] {s} p0 w mem)) 1895 && x.Uses == 1 1896 && sequentialAddresses(p0, p1, 1) 1897 && clobber(x) 1898 => (MOVWstore [i] {s} p0 w mem) 1899 (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) mem)) 1900 && x.Uses == 1 1901 && sequentialAddresses(p0, p1, 1) 1902 && clobber(x) 1903 => (MOVWstore [i] {s} p0 w mem) 1904 (MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem)) 1905 && x.Uses == 1 1906 && sequentialAddresses(p0, p1, 1) 1907 && clobber(x) 1908 => (MOVWstore [i] {s} p0 w0 mem) 1909 1910 (MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) 1911 && x.Uses == 1 1912 && clobber(x) 1913 => (MOVLstore [i-2] {s} p w mem) 1914 (MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) 1915 && x.Uses == 1 1916 && clobber(x) 1917 => (MOVLstore [i-2] {s} p w0 mem) 1918 (MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i] {s} p0 w mem)) 1919 && x.Uses == 1 1920 && sequentialAddresses(p0, p1, 2) 1921 && clobber(x) 1922 => (MOVLstore [i] {s} p0 w mem) 1923 (MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem)) 1924 && x.Uses == 1 1925 && sequentialAddresses(p0, p1, 2) 1926 && clobber(x) 1927 => (MOVLstore [i] {s} p0 w0 mem) 1928 1929 (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) 1930 && x.Uses == 1 1931 && clobber(x) 1932 => (MOVQstore [i-4] {s} p w mem) 1933 (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) 1934 && x.Uses == 1 1935 && clobber(x) 1936 => (MOVQstore [i-4] {s} p w0 mem) 1937 (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem)) 1938 && x.Uses == 1 1939 && sequentialAddresses(p0, p1, 4) 1940 && clobber(x) 1941 => (MOVQstore [i] {s} p0 w mem) 1942 (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem)) 1943 && x.Uses == 1 1944 && sequentialAddresses(p0, p1, 4) 1945 && clobber(x) 1946 => (MOVQstore [i] {s} p0 w0 mem) 1947 1948 (MOVBstore [c3] {s} p3 (SHRQconst [56] w) 1949 x1:(MOVWstore [c2] {s} p2 (SHRQconst [40] w) 1950 x2:(MOVLstore [c1] {s} p1 (SHRQconst [8] w) 1951 x3:(MOVBstore [c0] {s} p0 w mem)))) 1952 && x1.Uses == 1 1953 && x2.Uses == 1 1954 && x3.Uses == 1 1955 && sequentialAddresses(p0, p1, int64(1 + c0 - c1)) 1956 && sequentialAddresses(p0, p2, int64(5 + c0 - c2)) 1957 && sequentialAddresses(p0, p3, int64(7 + c0 - c3)) 1958 && clobber(x1, x2, x3) 1959 => (MOVQstore [c0] {s} p0 w mem) 1960 1961 (MOVBstore [i] {s} p 1962 x1:(MOVBload [j] {s2} p2 mem) 1963 mem2:(MOVBstore [i-1] {s} p 1964 x2:(MOVBload [j-1] {s2} p2 mem) mem)) 1965 && x1.Uses == 1 1966 && x2.Uses == 1 1967 && mem2.Uses == 1 1968 && clobber(x1, x2, mem2) 1969 => (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem) 1970 1971 (MOVWstore [i] {s} p 1972 x1:(MOVWload [j] {s2} p2 mem) 1973 mem2:(MOVWstore [i-2] {s} p 1974 x2:(MOVWload [j-2] {s2} p2 mem) mem)) 1975 && x1.Uses == 1 1976 && x2.Uses == 1 1977 && mem2.Uses == 1 1978 && clobber(x1, x2, mem2) 1979 => (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) 1980 1981 (MOVLstore [i] {s} p 1982 x1:(MOVLload [j] {s2} p2 mem) 1983 mem2:(MOVLstore [i-4] {s} p 1984 x2:(MOVLload [j-4] {s2} p2 mem) mem)) 1985 && x1.Uses == 1 1986 && x2.Uses == 1 1987 && mem2.Uses == 1 1988 && clobber(x1, x2, mem2) 1989 => (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem) 1990 1991 // Merge load and op 1992 // TODO: add indexed variants? 1993 ((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem) 1994 ((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) 1995 ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) 1996 ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) 1997 (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) 1998 (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => 1999 ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) 2000 (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) 2001 (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => 2002 ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) 2003 2004 // Merge ADDQconst and LEAQ into atomic loads. 2005 (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => 2006 (MOV(Q|L|B)atomicload [off1+off2] {sym} ptr mem) 2007 (MOV(Q|L|B)atomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => 2008 (MOV(Q|L|B)atomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem) 2009 2010 // Merge ADDQconst and LEAQ into atomic stores. 2011 (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => 2012 (XCHGQ [off1+off2] {sym} val ptr mem) 2013 (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB => 2014 (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2015 (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => 2016 (XCHGL [off1+off2] {sym} val ptr mem) 2017 (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB => 2018 (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) 2019 2020 // Merge ADDQconst into atomic adds. 2021 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2022 (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => 2023 (XADDQlock [off1+off2] {sym} val ptr mem) 2024 (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => 2025 (XADDLlock [off1+off2] {sym} val ptr mem) 2026 2027 // Merge ADDQconst into atomic compare and swaps. 2028 // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. 2029 (CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(int64(off1)+int64(off2)) => 2030 (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) 2031 (CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(int64(off1)+int64(off2)) => 2032 (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) 2033 2034 // We don't need the conditional move if we know the arg of BSF is not zero. 2035 (CMOVQEQ x _ (Select1 (BS(F|R)Q (ORQconst [c] _)))) && c != 0 => x 2036 // Extension is unnecessary for trailing zeros. 2037 (BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) => (BSFQ (ORQconst <t> [1<<8] x)) 2038 (BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) => (BSFQ (ORQconst <t> [1<<16] x)) 2039 2040 // Redundant sign/zero extensions 2041 // Note: see issue 21963. We have to make sure we use the right type on 2042 // the resulting extension (the outer type, not the inner type). 2043 (MOVLQSX (MOVLQSX x)) => (MOVLQSX x) 2044 (MOVLQSX (MOVWQSX x)) => (MOVWQSX x) 2045 (MOVLQSX (MOVBQSX x)) => (MOVBQSX x) 2046 (MOVWQSX (MOVWQSX x)) => (MOVWQSX x) 2047 (MOVWQSX (MOVBQSX x)) => (MOVBQSX x) 2048 (MOVBQSX (MOVBQSX x)) => (MOVBQSX x) 2049 (MOVLQZX (MOVLQZX x)) => (MOVLQZX x) 2050 (MOVLQZX (MOVWQZX x)) => (MOVWQZX x) 2051 (MOVLQZX (MOVBQZX x)) => (MOVBQZX x) 2052 (MOVWQZX (MOVWQZX x)) => (MOVWQZX x) 2053 (MOVWQZX (MOVBQZX x)) => (MOVBQZX x) 2054 (MOVBQZX (MOVBQZX x)) => (MOVBQZX x) 2055 2056 (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) 2057 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) => 2058 ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) 2059 (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) 2060 && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) => 2061 ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) 2062 2063 // float <-> int register moves, with no conversion. 2064 // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. 2065 (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) => (MOVQf2i val) 2066 (MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) => (MOVLf2i val) 2067 (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) => (MOVQi2f val) 2068 (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) => (MOVLi2f val) 2069 2070 // Other load-like ops. 2071 (ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (ADDQ x (MOVQf2i y)) 2072 (ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (ADDL x (MOVLf2i y)) 2073 (SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (SUBQ x (MOVQf2i y)) 2074 (SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (SUBL x (MOVLf2i y)) 2075 (ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (ANDQ x (MOVQf2i y)) 2076 (ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (ANDL x (MOVLf2i y)) 2077 ( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => ( ORQ x (MOVQf2i y)) 2078 ( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => ( ORL x (MOVLf2i y)) 2079 (XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (XORQ x (MOVQf2i y)) 2080 (XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (XORL x (MOVLf2i y)) 2081 2082 (ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (ADDSD x (MOVQi2f y)) 2083 (ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (ADDSS x (MOVLi2f y)) 2084 (SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (SUBSD x (MOVQi2f y)) 2085 (SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (SUBSS x (MOVLi2f y)) 2086 (MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (MULSD x (MOVQi2f y)) 2087 (MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (MULSS x (MOVLi2f y)) 2088 2089 // Redirect stores to use the other register set. 2090 (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) => (MOVSDstore [off] {sym} ptr val mem) 2091 (MOVLstore [off] {sym} ptr (MOVLf2i val) mem) => (MOVSSstore [off] {sym} ptr val mem) 2092 (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) => (MOVQstore [off] {sym} ptr val mem) 2093 (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) => (MOVLstore [off] {sym} ptr val mem) 2094 2095 // Load args directly into the register class where it will be used. 2096 // We do this by just modifying the type of the Arg. 2097 (MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) 2098 (MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) 2099 (MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) 2100 (MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) 2101 2102 // LEAQ is rematerializeable, so this helps to avoid register spill. 2103 // See issue 22947 for details 2104 (ADD(Q|L)const [off] x:(SP)) => (LEA(Q|L) [off] x) 2105 2106 // HMULx is commutative, but its first argument must go in AX. 2107 // If possible, put a rematerializeable value in the first argument slot, 2108 // to reduce the odds that another value will be have to spilled 2109 // specifically to free up AX. 2110 (HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() => (HMUL(Q|L) y x) 2111 (HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() => (HMUL(Q|L)U y x) 2112 2113 // Fold loads into compares 2114 // Note: these may be undone by the flagalloc pass. 2115 (CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (CMP(Q|L|W|B)load {sym} [off] ptr x mem) 2116 (CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem)) 2117 2118 (CMP(Q|L)const l:(MOV(Q|L)load {sym} [off] ptr mem) [c]) 2119 && l.Uses == 1 2120 && clobber(l) => 2121 @l.Block (CMP(Q|L)constload {sym} [makeValAndOff(c,off)] ptr mem) 2122 (CMP(W|B)const l:(MOV(W|B)load {sym} [off] ptr mem) [c]) 2123 && l.Uses == 1 2124 && clobber(l) => 2125 @l.Block (CMP(W|B)constload {sym} [makeValAndOff(int32(c),off)] ptr mem) 2126 2127 (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validVal(c) => (CMPQconstload {sym} [makeValAndOff(int32(c),off)] ptr mem) 2128 (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) 2129 (CMPWload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPWconstload {sym} [makeValAndOff(int32(int16(c)),off)] ptr mem) 2130 (CMPBload {sym} [off] ptr (MOVLconst [c]) mem) => (CMPBconstload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) 2131 2132 (TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2) 2133 && l == l2 2134 && l.Uses == 2 2135 && clobber(l) => 2136 @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0, off)] ptr mem) 2137 2138 // Convert ANDload to MOVload when we can do the AND in a containing TEST op. 2139 // Only do when it's within the same block, so we don't have flags live across basic block boundaries. 2140 // See issue 44228. 2141 (TEST(Q|L) a:(AND(Q|L)load [off] {sym} x ptr mem) a) && a.Uses == 2 && a.Block == v.Block && clobber(a) => (TEST(Q|L) (MOV(Q|L)load <a.Type> [off] {sym} ptr mem) x) 2142 2143 (MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read8(sym, int64(off)))]) 2144 (MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) 2145 (MOVLload [off] {sym} (SB) _) && symIsRO(sym) => (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) 2146 (MOVQload [off] {sym} (SB) _) && symIsRO(sym) => (MOVQconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))]) 2147 (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem) && symIsRO(srcSym) => 2148 (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder))]) 2149 (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder))]) mem)) 2150 2151 // Arch-specific inlining for small or disjoint runtime.memmove 2152 // Match post-lowering calls, memory version. 2153 (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem))))) 2154 && sc.Val64() >= 0 2155 && isSameCall(sym, "runtime.memmove") 2156 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 2157 && isInlinableMemmove(dst, src, sc.Val64(), config) 2158 && clobber(s1, s2, s3, call) 2159 => (Move [sc.Val64()] dst src mem) 2160 2161 // Match post-lowering calls, register version. 2162 (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem)) 2163 && sz >= 0 2164 && isSameCall(sym, "runtime.memmove") 2165 && call.Uses == 1 2166 && isInlinableMemmove(dst, src, sz, config) 2167 && clobber(call) 2168 => (Move [sz] dst src mem) 2169 2170 // Prefetch instructions 2171 (PrefetchCache ...) => (PrefetchT0 ...) 2172 (PrefetchCacheStreamed ...) => (PrefetchNTA ...) 2173 2174 // CPUID feature: BMI1. 2175 (AND(Q|L) x (NOT(Q|L) y)) && buildcfg.GOAMD64 >= 3 => (ANDN(Q|L) x y) 2176 (AND(Q|L) x (NEG(Q|L) x)) && buildcfg.GOAMD64 >= 3 => (BLSI(Q|L) x) 2177 (XOR(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSMSK(Q|L) x) 2178 (AND(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSR(Q|L) x) 2179 2180 (BSWAP(Q|L) (BSWAP(Q|L) p)) => p 2181 2182 // CPUID feature: MOVBE. 2183 (MOV(Q|L)store [i] {s} p x:(BSWAP(Q|L) w) mem) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBE(Q|L)store [i] {s} p w mem) 2184 (MOVBE(Q|L)store [i] {s} p x:(BSWAP(Q|L) w) mem) && x.Uses == 1 => (MOV(Q|L)store [i] {s} p w mem) 2185 (BSWAP(Q|L) x:(MOV(Q|L)load [i] {s} p mem)) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => @x.Block (MOVBE(Q|L)load [i] {s} p mem) 2186 (BSWAP(Q|L) x:(MOVBE(Q|L)load [i] {s} p mem)) && x.Uses == 1 => @x.Block (MOV(Q|L)load [i] {s} p mem) 2187 (MOVWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBEWstore [i] {s} p w mem) 2188 (MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 => (MOVWstore [i] {s} p w mem) 2189 2190 (ORQ x0:(MOVBELload [i0] {s} p mem) 2191 sh:(SHLQconst [32] x1:(MOVBELload [i1] {s} p mem))) 2192 && i0 == i1+4 2193 && x0.Uses == 1 2194 && x1.Uses == 1 2195 && sh.Uses == 1 2196 && mergePoint(b,x0,x1) != nil 2197 && clobber(x0, x1, sh) 2198 => @mergePoint(b,x0,x1) (MOVBEQload [i1] {s} p mem) 2199 2200 (ORQ x0:(MOVBELload [i] {s} p0 mem) 2201 sh:(SHLQconst [32] x1:(MOVBELload [i] {s} p1 mem))) 2202 && x0.Uses == 1 2203 && x1.Uses == 1 2204 && sh.Uses == 1 2205 && sequentialAddresses(p1, p0, 4) 2206 && mergePoint(b,x0,x1) != nil 2207 && clobber(x0, x1, sh) 2208 => @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem) 2209 2210 (SAR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem) 2211 (SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem) 2212 (SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem) 2213 2214 ((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVQconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) 2215 ((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem)) 2216 ((SHL|SHR|SAR)XLload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Lconst [int8(c&31)] (MOVLload [off] {sym} ptr mem))