github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/lower_constant.go (about) 1 package arm64 2 3 import ( 4 "github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc" 5 "github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa" 6 ) 7 8 // lowerConstant allocates a new VReg and inserts the instruction to load the constant value. 9 func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) { 10 val := instr.Return() 11 valType := val.Type() 12 13 vr = m.compiler.AllocateVReg(valType) 14 m.InsertLoadConstant(instr, vr) 15 return 16 } 17 18 // InsertLoadConstant implements backend.Machine. 19 func (m *machine) InsertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) { 20 val := instr.Return() 21 valType := val.Type() 22 v := instr.ConstantVal() 23 24 if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc. 25 v = v & ((1 << valType.Bits()) - 1) 26 } 27 28 switch valType { 29 case ssa.TypeF32: 30 loadF := m.allocateInstr() 31 loadF.asLoadFpuConst32(vr, v) 32 m.insert(loadF) 33 case ssa.TypeF64: 34 loadF := m.allocateInstr() 35 loadF.asLoadFpuConst64(vr, v) 36 m.insert(loadF) 37 case ssa.TypeI32: 38 if v == 0 { 39 m.InsertMove(vr, xzrVReg, ssa.TypeI32) 40 } else { 41 m.lowerConstantI32(vr, int32(v)) 42 } 43 case ssa.TypeI64: 44 if v == 0 { 45 m.InsertMove(vr, xzrVReg, ssa.TypeI64) 46 } else { 47 m.lowerConstantI64(vr, int64(v)) 48 } 49 default: 50 panic("TODO") 51 } 52 } 53 54 // The following logics are based on the old asm/arm64 package. 55 // https://github.com/bananabytelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go 56 57 func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) { 58 // Following the logic here: 59 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637 60 ic := int64(uint32(c)) 61 if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) { 62 if isBitMaskImmediate(uint64(c), false) { 63 m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false) 64 return 65 } 66 } 67 68 if t := const16bitAligned(int64(uint32(c))); t >= 0 { 69 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 70 // We could load it into temporary with movk. 71 m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false) 72 } else if t := const16bitAligned(int64(^c)); t >= 0 { 73 // Also, if the inverse of the const can fit within 16-bit range, do the same ^^. 74 m.insertMOVN(dst, uint64(^c>>(16*t)), t, false) 75 } else if isBitMaskImmediate(uint64(uint32(c)), false) { 76 m.lowerConstViaBitMaskImmediate(uint64(c), dst, false) 77 } else { 78 // Otherwise, we use MOVZ and MOVK to load it. 79 c16 := uint16(c) 80 m.insertMOVZ(dst, uint64(c16), 0, false) 81 c16 = uint16(uint32(c) >> 16) 82 m.insertMOVK(dst, uint64(c16), 1, false) 83 } 84 } 85 86 func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) { 87 // Following the logic here: 88 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852 89 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { 90 if isBitMaskImmediate(uint64(c), true) { 91 m.lowerConstViaBitMaskImmediate(uint64(c), dst, true) 92 return 93 } 94 } 95 96 if t := const16bitAligned(c); t >= 0 { 97 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 98 // We could load it into temporary with movk. 99 m.insertMOVZ(dst, uint64(c)>>(16*t), t, true) 100 } else if t := const16bitAligned(^c); t >= 0 { 101 // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. 102 m.insertMOVN(dst, uint64(^c)>>(16*t), t, true) 103 } else if isBitMaskImmediate(uint64(c), true) { 104 m.lowerConstViaBitMaskImmediate(uint64(c), dst, true) 105 } else { 106 m.load64bitConst(c, dst) 107 } 108 } 109 110 func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) { 111 instr := m.allocateInstr() 112 instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64) 113 m.insert(instr) 114 } 115 116 // isBitMaskImmediate determines if the value can be encoded as "bitmask immediate". 117 // 118 // Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits. 119 // Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits. 120 // 121 // See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate- 122 func isBitMaskImmediate(x uint64, _64 bool) bool { 123 // All zeros and ones are not "bitmask immediate" by definition. 124 if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) { 125 return false 126 } 127 128 switch { 129 case x != x>>32|x<<32: 130 // e = 64 131 case x != x>>16|x<<48: 132 // e = 32 (x == x>>32|x<<32). 133 // e.g. 0x00ff_ff00_00ff_ff00 134 x = uint64(int32(x)) 135 case x != x>>8|x<<56: 136 // e = 16 (x == x>>16|x<<48). 137 // e.g. 0x00ff_00ff_00ff_00ff 138 x = uint64(int16(x)) 139 case x != x>>4|x<<60: 140 // e = 8 (x == x>>8|x<<56). 141 // e.g. 0x0f0f_0f0f_0f0f_0f0f 142 x = uint64(int8(x)) 143 default: 144 // e = 4 or 2. 145 return true 146 } 147 return sequenceOfSetbits(x) || sequenceOfSetbits(^x) 148 } 149 150 // sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1). 151 // For example: 0b1110 -> true, 0b1010 -> false 152 func sequenceOfSetbits(x uint64) bool { 153 y := getLowestBit(x) 154 // If x is a sequence of set bit, this should results in the number 155 // with only one set bit (i.e. power of two). 156 y += x 157 return (y-1)&y == 0 158 } 159 160 func getLowestBit(x uint64) uint64 { 161 return x & (^x + 1) 162 } 163 164 // const16bitAligned check if the value is on the 16-bit alignment. 165 // If so, returns the shift num divided by 16, and otherwise -1. 166 func const16bitAligned(v int64) (ret int) { 167 ret = -1 168 for s := 0; s < 64; s += 16 { 169 if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 { 170 ret = s / 16 171 break 172 } 173 } 174 return 175 } 176 177 // load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit 178 // consts as in the Go assembler. 179 // 180 // See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759 181 func (m *machine) load64bitConst(c int64, dst regalloc.VReg) { 182 var bits [4]uint64 183 var zeros, negs int 184 for i := 0; i < 4; i++ { 185 bits[i] = uint64(c) >> uint(i*16) & 0xffff 186 if v := bits[i]; v == 0 { 187 zeros++ 188 } else if v == 0xffff { 189 negs++ 190 } 191 } 192 193 if zeros == 3 { 194 // one MOVZ instruction. 195 for i, v := range bits { 196 if v != 0 { 197 m.insertMOVZ(dst, v, i, true) 198 } 199 } 200 } else if negs == 3 { 201 // one MOVN instruction. 202 for i, v := range bits { 203 if v != 0xffff { 204 v = ^v 205 m.insertMOVN(dst, v, i, true) 206 } 207 } 208 } else if zeros == 2 { 209 // one MOVZ then one OVK. 210 var movz bool 211 for i, v := range bits { 212 if !movz && v != 0 { // MOVZ. 213 m.insertMOVZ(dst, v, i, true) 214 movz = true 215 } else if v != 0 { 216 m.insertMOVK(dst, v, i, true) 217 } 218 } 219 220 } else if negs == 2 { 221 // one MOVN then one or two MOVK. 222 var movn bool 223 for i, v := range bits { // Emit MOVN. 224 if !movn && v != 0xffff { 225 v = ^v 226 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 227 m.insertMOVN(dst, v, i, true) 228 movn = true 229 } else if v != 0xffff { 230 m.insertMOVK(dst, v, i, true) 231 } 232 } 233 234 } else if zeros == 1 { 235 // one MOVZ then two MOVK. 236 var movz bool 237 for i, v := range bits { 238 if !movz && v != 0 { // MOVZ. 239 m.insertMOVZ(dst, v, i, true) 240 movz = true 241 } else if v != 0 { 242 m.insertMOVK(dst, v, i, true) 243 } 244 } 245 246 } else if negs == 1 { 247 // one MOVN then two MOVK. 248 var movn bool 249 for i, v := range bits { // Emit MOVN. 250 if !movn && v != 0xffff { 251 v = ^v 252 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 253 m.insertMOVN(dst, v, i, true) 254 movn = true 255 } else if v != 0xffff { 256 m.insertMOVK(dst, v, i, true) 257 } 258 } 259 260 } else { 261 // one MOVZ then up to three MOVK. 262 var movz bool 263 for i, v := range bits { 264 if !movz && v != 0 { // MOVZ. 265 m.insertMOVZ(dst, v, i, true) 266 movz = true 267 } else if v != 0 { 268 m.insertMOVK(dst, v, i, true) 269 } 270 } 271 } 272 } 273 274 func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) { 275 instr := m.allocateInstr() 276 instr.asMOVZ(dst, v, uint64(shift), dst64) 277 m.insert(instr) 278 } 279 280 func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) { 281 instr := m.allocateInstr() 282 instr.asMOVK(dst, v, uint64(shift), dst64) 283 m.insert(instr) 284 } 285 286 func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) { 287 instr := m.allocateInstr() 288 instr.asMOVN(dst, v, uint64(shift), dst64) 289 m.insert(instr) 290 }