github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/arm64/lower_constant.go (about) 1 package arm64 2 3 import ( 4 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" 5 "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" 6 ) 7 8 // lowerConstant allocates a new VReg and inserts the instruction to load the constant value. 9 func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) { 10 val := instr.Return() 11 valType := val.Type() 12 13 vr = m.compiler.AllocateVReg(valType) 14 v := instr.ConstantVal() 15 m.insertLoadConstant(v, valType, vr) 16 return 17 } 18 19 // InsertLoadConstantBlockArg implements backend.Machine. 20 func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) { 21 val := instr.Return() 22 valType := val.Type() 23 v := instr.ConstantVal() 24 load := m.allocateInstr() 25 load.asLoadConstBlockArg(v, valType, vr) 26 m.insert(load) 27 } 28 29 func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) { 30 v, typ, dst := i.loadConstBlockArgData() 31 m.insertLoadConstant(v, typ, dst) 32 } 33 34 func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) { 35 if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc. 36 v = v & ((1 << valType.Bits()) - 1) 37 } 38 39 switch valType { 40 case ssa.TypeF32: 41 loadF := m.allocateInstr() 42 loadF.asLoadFpuConst32(vr, v) 43 m.insert(loadF) 44 case ssa.TypeF64: 45 loadF := m.allocateInstr() 46 loadF.asLoadFpuConst64(vr, v) 47 m.insert(loadF) 48 case ssa.TypeI32: 49 if v == 0 { 50 m.InsertMove(vr, xzrVReg, ssa.TypeI32) 51 } else { 52 m.lowerConstantI32(vr, int32(v)) 53 } 54 case ssa.TypeI64: 55 if v == 0 { 56 m.InsertMove(vr, xzrVReg, ssa.TypeI64) 57 } else { 58 m.lowerConstantI64(vr, int64(v)) 59 } 60 default: 61 panic("TODO") 62 } 63 } 64 65 // The following logics are based on the old asm/arm64 package. 66 // https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go 67 68 func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) { 69 // Following the logic here: 70 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637 71 ic := int64(uint32(c)) 72 if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) { 73 if isBitMaskImmediate(uint64(c), false) { 74 m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false) 75 return 76 } 77 } 78 79 if t := const16bitAligned(int64(uint32(c))); t >= 0 { 80 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 81 // We could load it into temporary with movk. 82 m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false) 83 } else if t := const16bitAligned(int64(^c)); t >= 0 { 84 // Also, if the inverse of the const can fit within 16-bit range, do the same ^^. 85 m.insertMOVN(dst, uint64(^c>>(16*t)), t, false) 86 } else if isBitMaskImmediate(uint64(uint32(c)), false) { 87 m.lowerConstViaBitMaskImmediate(uint64(c), dst, false) 88 } else { 89 // Otherwise, we use MOVZ and MOVK to load it. 90 c16 := uint16(c) 91 m.insertMOVZ(dst, uint64(c16), 0, false) 92 c16 = uint16(uint32(c) >> 16) 93 m.insertMOVK(dst, uint64(c16), 1, false) 94 } 95 } 96 97 func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) { 98 // Following the logic here: 99 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852 100 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { 101 if isBitMaskImmediate(uint64(c), true) { 102 m.lowerConstViaBitMaskImmediate(uint64(c), dst, true) 103 return 104 } 105 } 106 107 if t := const16bitAligned(c); t >= 0 { 108 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 109 // We could load it into temporary with movk. 110 m.insertMOVZ(dst, uint64(c)>>(16*t), t, true) 111 } else if t := const16bitAligned(^c); t >= 0 { 112 // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. 113 m.insertMOVN(dst, uint64(^c)>>(16*t), t, true) 114 } else if isBitMaskImmediate(uint64(c), true) { 115 m.lowerConstViaBitMaskImmediate(uint64(c), dst, true) 116 } else { 117 m.load64bitConst(c, dst) 118 } 119 } 120 121 func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) { 122 instr := m.allocateInstr() 123 instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64) 124 m.insert(instr) 125 } 126 127 // isBitMaskImmediate determines if the value can be encoded as "bitmask immediate". 128 // 129 // Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits. 130 // Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits. 131 // 132 // See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate- 133 func isBitMaskImmediate(x uint64, _64 bool) bool { 134 // All zeros and ones are not "bitmask immediate" by definition. 135 if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) { 136 return false 137 } 138 139 switch { 140 case x != x>>32|x<<32: 141 // e = 64 142 case x != x>>16|x<<48: 143 // e = 32 (x == x>>32|x<<32). 144 // e.g. 0x00ff_ff00_00ff_ff00 145 x = uint64(int32(x)) 146 case x != x>>8|x<<56: 147 // e = 16 (x == x>>16|x<<48). 148 // e.g. 0x00ff_00ff_00ff_00ff 149 x = uint64(int16(x)) 150 case x != x>>4|x<<60: 151 // e = 8 (x == x>>8|x<<56). 152 // e.g. 0x0f0f_0f0f_0f0f_0f0f 153 x = uint64(int8(x)) 154 default: 155 // e = 4 or 2. 156 return true 157 } 158 return sequenceOfSetbits(x) || sequenceOfSetbits(^x) 159 } 160 161 // sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1). 162 // For example: 0b1110 -> true, 0b1010 -> false 163 func sequenceOfSetbits(x uint64) bool { 164 y := getLowestBit(x) 165 // If x is a sequence of set bit, this should results in the number 166 // with only one set bit (i.e. power of two). 167 y += x 168 return (y-1)&y == 0 169 } 170 171 func getLowestBit(x uint64) uint64 { 172 return x & (^x + 1) 173 } 174 175 // const16bitAligned check if the value is on the 16-bit alignment. 176 // If so, returns the shift num divided by 16, and otherwise -1. 177 func const16bitAligned(v int64) (ret int) { 178 ret = -1 179 for s := 0; s < 64; s += 16 { 180 if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 { 181 ret = s / 16 182 break 183 } 184 } 185 return 186 } 187 188 // load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit 189 // consts as in the Go assembler. 190 // 191 // See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759 192 func (m *machine) load64bitConst(c int64, dst regalloc.VReg) { 193 var bits [4]uint64 194 var zeros, negs int 195 for i := 0; i < 4; i++ { 196 bits[i] = uint64(c) >> uint(i*16) & 0xffff 197 if v := bits[i]; v == 0 { 198 zeros++ 199 } else if v == 0xffff { 200 negs++ 201 } 202 } 203 204 if zeros == 3 { 205 // one MOVZ instruction. 206 for i, v := range bits { 207 if v != 0 { 208 m.insertMOVZ(dst, v, i, true) 209 } 210 } 211 } else if negs == 3 { 212 // one MOVN instruction. 213 for i, v := range bits { 214 if v != 0xffff { 215 v = ^v 216 m.insertMOVN(dst, v, i, true) 217 } 218 } 219 } else if zeros == 2 { 220 // one MOVZ then one OVK. 221 var movz bool 222 for i, v := range bits { 223 if !movz && v != 0 { // MOVZ. 224 m.insertMOVZ(dst, v, i, true) 225 movz = true 226 } else if v != 0 { 227 m.insertMOVK(dst, v, i, true) 228 } 229 } 230 231 } else if negs == 2 { 232 // one MOVN then one or two MOVK. 233 var movn bool 234 for i, v := range bits { // Emit MOVN. 235 if !movn && v != 0xffff { 236 v = ^v 237 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 238 m.insertMOVN(dst, v, i, true) 239 movn = true 240 } else if v != 0xffff { 241 m.insertMOVK(dst, v, i, true) 242 } 243 } 244 245 } else if zeros == 1 { 246 // one MOVZ then two MOVK. 247 var movz bool 248 for i, v := range bits { 249 if !movz && v != 0 { // MOVZ. 250 m.insertMOVZ(dst, v, i, true) 251 movz = true 252 } else if v != 0 { 253 m.insertMOVK(dst, v, i, true) 254 } 255 } 256 257 } else if negs == 1 { 258 // one MOVN then two MOVK. 259 var movn bool 260 for i, v := range bits { // Emit MOVN. 261 if !movn && v != 0xffff { 262 v = ^v 263 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 264 m.insertMOVN(dst, v, i, true) 265 movn = true 266 } else if v != 0xffff { 267 m.insertMOVK(dst, v, i, true) 268 } 269 } 270 271 } else { 272 // one MOVZ then up to three MOVK. 273 var movz bool 274 for i, v := range bits { 275 if !movz && v != 0 { // MOVZ. 276 m.insertMOVZ(dst, v, i, true) 277 movz = true 278 } else if v != 0 { 279 m.insertMOVK(dst, v, i, true) 280 } 281 } 282 } 283 } 284 285 func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) { 286 instr := m.allocateInstr() 287 instr.asMOVZ(dst, v, uint64(shift), dst64) 288 m.insert(instr) 289 } 290 291 func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) { 292 instr := m.allocateInstr() 293 instr.asMOVK(dst, v, uint64(shift), dst64) 294 m.insert(instr) 295 } 296 297 func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) { 298 instr := m.allocateInstr() 299 instr.asMOVN(dst, v, uint64(shift), dst64) 300 m.insert(instr) 301 }