github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/arm64/lower_constant.go (about)

     1  package arm64
     2  
     3  import (
     4  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
     5  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
     6  )
     7  
     8  // lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
     9  func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
    10  	val := instr.Return()
    11  	valType := val.Type()
    12  
    13  	vr = m.compiler.AllocateVReg(valType)
    14  	v := instr.ConstantVal()
    15  	m.insertLoadConstant(v, valType, vr)
    16  	return
    17  }
    18  
    19  // InsertLoadConstantBlockArg implements backend.Machine.
    20  func (m *machine) InsertLoadConstantBlockArg(instr *ssa.Instruction, vr regalloc.VReg) {
    21  	val := instr.Return()
    22  	valType := val.Type()
    23  	v := instr.ConstantVal()
    24  	load := m.allocateInstr()
    25  	load.asLoadConstBlockArg(v, valType, vr)
    26  	m.insert(load)
    27  }
    28  
    29  func (m *machine) lowerLoadConstantBlockArgAfterRegAlloc(i *instruction) {
    30  	v, typ, dst := i.loadConstBlockArgData()
    31  	m.insertLoadConstant(v, typ, dst)
    32  }
    33  
    34  func (m *machine) insertLoadConstant(v uint64, valType ssa.Type, vr regalloc.VReg) {
    35  	if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
    36  		v = v & ((1 << valType.Bits()) - 1)
    37  	}
    38  
    39  	switch valType {
    40  	case ssa.TypeF32:
    41  		loadF := m.allocateInstr()
    42  		loadF.asLoadFpuConst32(vr, v)
    43  		m.insert(loadF)
    44  	case ssa.TypeF64:
    45  		loadF := m.allocateInstr()
    46  		loadF.asLoadFpuConst64(vr, v)
    47  		m.insert(loadF)
    48  	case ssa.TypeI32:
    49  		if v == 0 {
    50  			m.InsertMove(vr, xzrVReg, ssa.TypeI32)
    51  		} else {
    52  			m.lowerConstantI32(vr, int32(v))
    53  		}
    54  	case ssa.TypeI64:
    55  		if v == 0 {
    56  			m.InsertMove(vr, xzrVReg, ssa.TypeI64)
    57  		} else {
    58  			m.lowerConstantI64(vr, int64(v))
    59  		}
    60  	default:
    61  		panic("TODO")
    62  	}
    63  }
    64  
    65  // The following logics are based on the old asm/arm64 package.
    66  // https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
    67  
    68  func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
    69  	// Following the logic here:
    70  	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
    71  	ic := int64(uint32(c))
    72  	if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
    73  		if isBitMaskImmediate(uint64(c), false) {
    74  			m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
    75  			return
    76  		}
    77  	}
    78  
    79  	if t := const16bitAligned(int64(uint32(c))); t >= 0 {
    80  		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
    81  		// We could load it into temporary with movk.
    82  		m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
    83  	} else if t := const16bitAligned(int64(^c)); t >= 0 {
    84  		// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
    85  		m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
    86  	} else if isBitMaskImmediate(uint64(uint32(c)), false) {
    87  		m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
    88  	} else {
    89  		// Otherwise, we use MOVZ and MOVK to load it.
    90  		c16 := uint16(c)
    91  		m.insertMOVZ(dst, uint64(c16), 0, false)
    92  		c16 = uint16(uint32(c) >> 16)
    93  		m.insertMOVK(dst, uint64(c16), 1, false)
    94  	}
    95  }
    96  
    97  func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
    98  	// Following the logic here:
    99  	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
   100  	if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
   101  		if isBitMaskImmediate(uint64(c), true) {
   102  			m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
   103  			return
   104  		}
   105  	}
   106  
   107  	if t := const16bitAligned(c); t >= 0 {
   108  		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
   109  		// We could load it into temporary with movk.
   110  		m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
   111  	} else if t := const16bitAligned(^c); t >= 0 {
   112  		// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
   113  		m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
   114  	} else if isBitMaskImmediate(uint64(c), true) {
   115  		m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
   116  	} else {
   117  		m.load64bitConst(c, dst)
   118  	}
   119  }
   120  
   121  func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
   122  	instr := m.allocateInstr()
   123  	instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
   124  	m.insert(instr)
   125  }
   126  
   127  // isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
   128  //
   129  //	Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
   130  //	Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
   131  //
   132  // See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
   133  func isBitMaskImmediate(x uint64, _64 bool) bool {
   134  	// All zeros and ones are not "bitmask immediate" by definition.
   135  	if x == 0 || (_64 && x == 0xffff_ffff_ffff_ffff) || (!_64 && x == 0xffff_ffff) {
   136  		return false
   137  	}
   138  
   139  	switch {
   140  	case x != x>>32|x<<32:
   141  		// e = 64
   142  	case x != x>>16|x<<48:
   143  		// e = 32 (x == x>>32|x<<32).
   144  		// e.g. 0x00ff_ff00_00ff_ff00
   145  		x = uint64(int32(x))
   146  	case x != x>>8|x<<56:
   147  		// e = 16 (x == x>>16|x<<48).
   148  		// e.g. 0x00ff_00ff_00ff_00ff
   149  		x = uint64(int16(x))
   150  	case x != x>>4|x<<60:
   151  		// e = 8 (x == x>>8|x<<56).
   152  		// e.g. 0x0f0f_0f0f_0f0f_0f0f
   153  		x = uint64(int8(x))
   154  	default:
   155  		// e = 4 or 2.
   156  		return true
   157  	}
   158  	return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
   159  }
   160  
   161  // sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
   162  // For example: 0b1110 -> true, 0b1010 -> false
   163  func sequenceOfSetbits(x uint64) bool {
   164  	y := getLowestBit(x)
   165  	// If x is a sequence of set bit, this should results in the number
   166  	// with only one set bit (i.e. power of two).
   167  	y += x
   168  	return (y-1)&y == 0
   169  }
   170  
   171  func getLowestBit(x uint64) uint64 {
   172  	return x & (^x + 1)
   173  }
   174  
   175  // const16bitAligned check if the value is on the 16-bit alignment.
   176  // If so, returns the shift num divided by 16, and otherwise -1.
   177  func const16bitAligned(v int64) (ret int) {
   178  	ret = -1
   179  	for s := 0; s < 64; s += 16 {
   180  		if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
   181  			ret = s / 16
   182  			break
   183  		}
   184  	}
   185  	return
   186  }
   187  
   188  // load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
   189  // consts as in the Go assembler.
   190  //
   191  // See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
   192  func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
   193  	var bits [4]uint64
   194  	var zeros, negs int
   195  	for i := 0; i < 4; i++ {
   196  		bits[i] = uint64(c) >> uint(i*16) & 0xffff
   197  		if v := bits[i]; v == 0 {
   198  			zeros++
   199  		} else if v == 0xffff {
   200  			negs++
   201  		}
   202  	}
   203  
   204  	if zeros == 3 {
   205  		// one MOVZ instruction.
   206  		for i, v := range bits {
   207  			if v != 0 {
   208  				m.insertMOVZ(dst, v, i, true)
   209  			}
   210  		}
   211  	} else if negs == 3 {
   212  		// one MOVN instruction.
   213  		for i, v := range bits {
   214  			if v != 0xffff {
   215  				v = ^v
   216  				m.insertMOVN(dst, v, i, true)
   217  			}
   218  		}
   219  	} else if zeros == 2 {
   220  		// one MOVZ then one OVK.
   221  		var movz bool
   222  		for i, v := range bits {
   223  			if !movz && v != 0 { // MOVZ.
   224  				m.insertMOVZ(dst, v, i, true)
   225  				movz = true
   226  			} else if v != 0 {
   227  				m.insertMOVK(dst, v, i, true)
   228  			}
   229  		}
   230  
   231  	} else if negs == 2 {
   232  		// one MOVN then one or two MOVK.
   233  		var movn bool
   234  		for i, v := range bits { // Emit MOVN.
   235  			if !movn && v != 0xffff {
   236  				v = ^v
   237  				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
   238  				m.insertMOVN(dst, v, i, true)
   239  				movn = true
   240  			} else if v != 0xffff {
   241  				m.insertMOVK(dst, v, i, true)
   242  			}
   243  		}
   244  
   245  	} else if zeros == 1 {
   246  		// one MOVZ then two MOVK.
   247  		var movz bool
   248  		for i, v := range bits {
   249  			if !movz && v != 0 { // MOVZ.
   250  				m.insertMOVZ(dst, v, i, true)
   251  				movz = true
   252  			} else if v != 0 {
   253  				m.insertMOVK(dst, v, i, true)
   254  			}
   255  		}
   256  
   257  	} else if negs == 1 {
   258  		// one MOVN then two MOVK.
   259  		var movn bool
   260  		for i, v := range bits { // Emit MOVN.
   261  			if !movn && v != 0xffff {
   262  				v = ^v
   263  				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
   264  				m.insertMOVN(dst, v, i, true)
   265  				movn = true
   266  			} else if v != 0xffff {
   267  				m.insertMOVK(dst, v, i, true)
   268  			}
   269  		}
   270  
   271  	} else {
   272  		// one MOVZ then up to three MOVK.
   273  		var movz bool
   274  		for i, v := range bits {
   275  			if !movz && v != 0 { // MOVZ.
   276  				m.insertMOVZ(dst, v, i, true)
   277  				movz = true
   278  			} else if v != 0 {
   279  				m.insertMOVK(dst, v, i, true)
   280  			}
   281  		}
   282  	}
   283  }
   284  
   285  func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
   286  	instr := m.allocateInstr()
   287  	instr.asMOVZ(dst, v, uint64(shift), dst64)
   288  	m.insert(instr)
   289  }
   290  
   291  func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
   292  	instr := m.allocateInstr()
   293  	instr.asMOVK(dst, v, uint64(shift), dst64)
   294  	m.insert(instr)
   295  }
   296  
   297  func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
   298  	instr := m.allocateInstr()
   299  	instr.asMOVN(dst, v, uint64(shift), dst64)
   300  	m.insert(instr)
   301  }