github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/lower_constant.go (about)

     1  package arm64
     2  
     3  import (
     4  	"github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc"
     5  	"github.com/wasilibs/wazerox/internal/engine/wazevo/ssa"
     6  )
     7  
     8  // lowerConstant allocates a new VReg and inserts the instruction to load the constant value.
     9  func (m *machine) lowerConstant(instr *ssa.Instruction) (vr regalloc.VReg) {
    10  	val := instr.Return()
    11  	valType := val.Type()
    12  
    13  	vr = m.compiler.AllocateVReg(valType)
    14  	m.InsertLoadConstant(instr, vr)
    15  	return
    16  }
    17  
    18  // InsertLoadConstant implements backend.Machine.
    19  func (m *machine) InsertLoadConstant(instr *ssa.Instruction, vr regalloc.VReg) {
    20  	val := instr.Return()
    21  	valType := val.Type()
    22  	v := instr.ConstantVal()
    23  
    24  	if valType.Bits() < 64 { // Clear the redundant bits just in case it's unexpectedly sign-extended, etc.
    25  		v = v & ((1 << valType.Bits()) - 1)
    26  	}
    27  
    28  	switch valType {
    29  	case ssa.TypeF32:
    30  		loadF := m.allocateInstr()
    31  		loadF.asLoadFpuConst32(vr, v)
    32  		m.insert(loadF)
    33  	case ssa.TypeF64:
    34  		loadF := m.allocateInstr()
    35  		loadF.asLoadFpuConst64(vr, v)
    36  		m.insert(loadF)
    37  	case ssa.TypeI32:
    38  		if v == 0 {
    39  			m.InsertMove(vr, xzrVReg, ssa.TypeI32)
    40  		} else {
    41  			m.lowerConstantI32(vr, int32(v))
    42  		}
    43  	case ssa.TypeI64:
    44  		if v == 0 {
    45  			m.InsertMove(vr, xzrVReg, ssa.TypeI64)
    46  		} else {
    47  			m.lowerConstantI64(vr, int64(v))
    48  		}
    49  	default:
    50  		panic("TODO")
    51  	}
    52  }
    53  
    54  // The following logics are based on the old asm/arm64 package.
    55  // https://github.com/tetratelabs/wazero/blob/39f2ff23a6d609e10c82b9cc0b981f6de5b87a9c/internal/asm/arm64/impl.go
    56  
    57  func (m *machine) lowerConstantI32(dst regalloc.VReg, c int32) {
    58  	// Following the logic here:
    59  	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637
    60  	ic := int64(uint32(c))
    61  	if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) {
    62  		if isBitMaskImmediate(uint64(c)) {
    63  			m.lowerConstViaBitMaskImmediate(uint64(uint32(c)), dst, false)
    64  			return
    65  		}
    66  	}
    67  
    68  	if t := const16bitAligned(int64(uint32(c))); t >= 0 {
    69  		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
    70  		// We could load it into temporary with movk.
    71  		m.insertMOVZ(dst, uint64(uint32(c)>>(16*t)), t, false)
    72  	} else if t := const16bitAligned(int64(^c)); t >= 0 {
    73  		// Also, if the inverse of the const can fit within 16-bit range, do the same ^^.
    74  		m.insertMOVN(dst, uint64(^c>>(16*t)), t, false)
    75  	} else if isBitMaskImmediate(uint64(uint32(c))) {
    76  		m.lowerConstViaBitMaskImmediate(uint64(c), dst, false)
    77  	} else {
    78  		// Otherwise, we use MOVZ and MOVK to load it.
    79  		c16 := uint16(c)
    80  		m.insertMOVZ(dst, uint64(c16), 0, false)
    81  		c16 = uint16(uint32(c) >> 16)
    82  		m.insertMOVK(dst, uint64(c16), 1, false)
    83  	}
    84  }
    85  
    86  func (m *machine) lowerConstantI64(dst regalloc.VReg, c int64) {
    87  	// Following the logic here:
    88  	// https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852
    89  	if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) {
    90  		if isBitMaskImmediate(uint64(c)) {
    91  			m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
    92  			return
    93  		}
    94  	}
    95  
    96  	if t := const16bitAligned(c); t >= 0 {
    97  		// If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000
    98  		// We could load it into temporary with movk.
    99  		m.insertMOVZ(dst, uint64(c)>>(16*t), t, true)
   100  	} else if t := const16bitAligned(^c); t >= 0 {
   101  		// Also, if the reverse of the const can fit within 16-bit range, do the same ^^.
   102  		m.insertMOVN(dst, uint64(^c)>>(16*t), t, true)
   103  	} else if isBitMaskImmediate(uint64(c)) {
   104  		m.lowerConstViaBitMaskImmediate(uint64(c), dst, true)
   105  	} else {
   106  		m.load64bitConst(c, dst)
   107  	}
   108  }
   109  
   110  func (m *machine) lowerConstViaBitMaskImmediate(c uint64, dst regalloc.VReg, b64 bool) {
   111  	instr := m.allocateInstr()
   112  	instr.asALUBitmaskImm(aluOpOrr, dst, xzrVReg, c, b64)
   113  	m.insert(instr)
   114  }
   115  
   116  // isBitMaskImmediate determines if the value can be encoded as "bitmask immediate".
   117  //
   118  //	Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits.
   119  //	Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits.
   120  //
   121  // See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate-
   122  func isBitMaskImmediate(x uint64) bool {
   123  	// All zeros and ones are not "bitmask immediate" by definition.
   124  	if x == 0 || x == 0xffff_ffff_ffff_ffff {
   125  		return false
   126  	}
   127  
   128  	switch {
   129  	case x != x>>32|x<<32:
   130  		// e = 64
   131  	case x != x>>16|x<<48:
   132  		// e = 32 (x == x>>32|x<<32).
   133  		// e.g. 0x00ff_ff00_00ff_ff00
   134  		x = uint64(int32(x))
   135  	case x != x>>8|x<<56:
   136  		// e = 16 (x == x>>16|x<<48).
   137  		// e.g. 0x00ff_00ff_00ff_00ff
   138  		x = uint64(int16(x))
   139  	case x != x>>4|x<<60:
   140  		// e = 8 (x == x>>8|x<<56).
   141  		// e.g. 0x0f0f_0f0f_0f0f_0f0f
   142  		x = uint64(int8(x))
   143  	default:
   144  		// e = 4 or 2.
   145  		return true
   146  	}
   147  	return sequenceOfSetbits(x) || sequenceOfSetbits(^x)
   148  }
   149  
   150  // sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1).
   151  // For example: 0b1110 -> true, 0b1010 -> false
   152  func sequenceOfSetbits(x uint64) bool {
   153  	y := getLowestBit(x)
   154  	// If x is a sequence of set bit, this should results in the number
   155  	// with only one set bit (i.e. power of two).
   156  	y += x
   157  	return (y-1)&y == 0
   158  }
   159  
   160  func getLowestBit(x uint64) uint64 {
   161  	return x & (^x + 1)
   162  }
   163  
   164  // const16bitAligned check if the value is on the 16-bit alignment.
   165  // If so, returns the shift num divided by 16, and otherwise -1.
   166  func const16bitAligned(v int64) (ret int) {
   167  	ret = -1
   168  	for s := 0; s < 64; s += 16 {
   169  		if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 {
   170  			ret = s / 16
   171  			break
   172  		}
   173  	}
   174  	return
   175  }
   176  
   177  // load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit
   178  // consts as in the Go assembler.
   179  //
   180  // See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759
   181  func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
   182  	var bits [4]uint64
   183  	var zeros, negs int
   184  	for i := 0; i < 4; i++ {
   185  		bits[i] = uint64(c) >> uint(i*16) & 0xffff
   186  		if v := bits[i]; v == 0 {
   187  			zeros++
   188  		} else if v == 0xffff {
   189  			negs++
   190  		}
   191  	}
   192  
   193  	if zeros == 3 {
   194  		// one MOVZ instruction.
   195  		for i, v := range bits {
   196  			if v != 0 {
   197  				m.insertMOVZ(dst, v, i, true)
   198  			}
   199  		}
   200  	} else if negs == 3 {
   201  		// one MOVN instruction.
   202  		for i, v := range bits {
   203  			if v != 0xffff {
   204  				v = ^v
   205  				m.insertMOVN(dst, v, i, true)
   206  			}
   207  		}
   208  	} else if zeros == 2 {
   209  		// one MOVZ then one OVK.
   210  		var movz bool
   211  		for i, v := range bits {
   212  			if !movz && v != 0 { // MOVZ.
   213  				m.insertMOVZ(dst, v, i, true)
   214  				movz = true
   215  			} else if v != 0 {
   216  				m.insertMOVK(dst, v, i, true)
   217  			}
   218  		}
   219  
   220  	} else if negs == 2 {
   221  		// one MOVN then one or two MOVK.
   222  		var movn bool
   223  		for i, v := range bits { // Emit MOVN.
   224  			if !movn && v != 0xffff {
   225  				v = ^v
   226  				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
   227  				m.insertMOVN(dst, v, i, true)
   228  				movn = true
   229  			} else if v != 0xffff {
   230  				m.insertMOVK(dst, v, i, true)
   231  			}
   232  		}
   233  
   234  	} else if zeros == 1 {
   235  		// one MOVZ then two MOVK.
   236  		var movz bool
   237  		for i, v := range bits {
   238  			if !movz && v != 0 { // MOVZ.
   239  				m.insertMOVZ(dst, v, i, true)
   240  				movz = true
   241  			} else if v != 0 {
   242  				m.insertMOVK(dst, v, i, true)
   243  			}
   244  		}
   245  
   246  	} else if negs == 1 {
   247  		// one MOVN then two MOVK.
   248  		var movn bool
   249  		for i, v := range bits { // Emit MOVN.
   250  			if !movn && v != 0xffff {
   251  				v = ^v
   252  				// https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN
   253  				m.insertMOVN(dst, v, i, true)
   254  				movn = true
   255  			} else if v != 0xffff {
   256  				m.insertMOVK(dst, v, i, true)
   257  			}
   258  		}
   259  
   260  	} else {
   261  		// one MOVZ then up to three MOVK.
   262  		var movz bool
   263  		for i, v := range bits {
   264  			if !movz && v != 0 { // MOVZ.
   265  				m.insertMOVZ(dst, v, i, true)
   266  				movz = true
   267  			} else if v != 0 {
   268  				m.insertMOVK(dst, v, i, true)
   269  			}
   270  		}
   271  	}
   272  }
   273  
   274  func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
   275  	instr := m.allocateInstr()
   276  	instr.asMOVZ(dst, v, uint64(shift), dst64)
   277  	m.insert(instr)
   278  }
   279  
   280  func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
   281  	instr := m.allocateInstr()
   282  	instr.asMOVK(dst, v, uint64(shift), dst64)
   283  	m.insert(instr)
   284  }
   285  
   286  func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
   287  	instr := m.allocateInstr()
   288  	instr.asMOVN(dst, v, uint64(shift), dst64)
   289  	m.insert(instr)
   290  }