github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/instr.go

github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/instr.go (about)

     1  package arm64
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"strings"
     7  
     8  	"github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc"
     9  	"github.com/wasilibs/wazerox/internal/engine/wazevo/ssa"
    10  )
    11  
    12  type (
    13  	// instruction represents either a real instruction in arm64, or the meta instructions
    14  	// that are convenient for code generation. For example, inline constants are also treated
    15  	// as instructions.
    16  	//
    17  	// Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation
    18  	// can be considered equivalent to the sequence of such instructions.
    19  	//
    20  	// Each field is interpreted depending on the kind.
    21  	//
    22  	// TODO: optimize the layout later once the impl settles.
    23  	instruction struct {
    24  		kind                instructionKind
    25  		prev, next          *instruction
    26  		u1, u2, u3          uint64
    27  		rd, rm, rn, ra      operand
    28  		amode               addressMode
    29  		abi                 *abiImpl
    30  		targets             []uint32
    31  		addedBeforeRegAlloc bool
    32  	}
    33  
    34  	// instructionKind represents the kind of instruction.
    35  	// This controls how the instruction struct is interpreted.
    36  	instructionKind int
    37  )
    38  
    39  // IsCall implements regalloc.Instr IsCall.
    40  func (i *instruction) IsCall() bool {
    41  	return i.kind == call
    42  }
    43  
    44  // IsIndirectCall implements regalloc.Instr IsIndirectCall.
    45  func (i *instruction) IsIndirectCall() bool {
    46  	return i.kind == callInd
    47  }
    48  
    49  // IsReturn implements regalloc.Instr IsReturn.
    50  func (i *instruction) IsReturn() bool {
    51  	return i.kind == ret
    52  }
    53  
    54  type defKind byte
    55  
    56  const (
    57  	defKindNone defKind = iota + 1
    58  	defKindRD
    59  	defKindCall
    60  )
    61  
    62  var defKinds = [numInstructionKinds]defKind{
    63  	adr:                  defKindRD,
    64  	aluRRR:               defKindRD,
    65  	aluRRRR:              defKindRD,
    66  	aluRRImm12:           defKindRD,
    67  	aluRRBitmaskImm:      defKindRD,
    68  	aluRRRShift:          defKindRD,
    69  	aluRRImmShift:        defKindRD,
    70  	aluRRRExtend:         defKindRD,
    71  	bitRR:                defKindRD,
    72  	movZ:                 defKindRD,
    73  	movK:                 defKindRD,
    74  	movN:                 defKindRD,
    75  	mov32:                defKindRD,
    76  	mov64:                defKindRD,
    77  	fpuMov64:             defKindRD,
    78  	fpuMov128:            defKindRD,
    79  	fpuRR:                defKindRD,
    80  	fpuRRR:               defKindRD,
    81  	nop0:                 defKindNone,
    82  	call:                 defKindCall,
    83  	callInd:              defKindCall,
    84  	ret:                  defKindNone,
    85  	store8:               defKindNone,
    86  	store16:              defKindNone,
    87  	store32:              defKindNone,
    88  	store64:              defKindNone,
    89  	exitSequence:         defKindNone,
    90  	condBr:               defKindNone,
    91  	br:                   defKindNone,
    92  	brTableSequence:      defKindNone,
    93  	cSet:                 defKindRD,
    94  	extend:               defKindRD,
    95  	fpuCmp:               defKindNone,
    96  	uLoad8:               defKindRD,
    97  	uLoad16:              defKindRD,
    98  	uLoad32:              defKindRD,
    99  	sLoad8:               defKindRD,
   100  	sLoad16:              defKindRD,
   101  	sLoad32:              defKindRD,
   102  	uLoad64:              defKindRD,
   103  	fpuLoad32:            defKindRD,
   104  	fpuLoad64:            defKindRD,
   105  	fpuLoad128:           defKindRD,
   106  	vecLoad1R:            defKindRD,
   107  	loadFpuConst32:       defKindRD,
   108  	loadFpuConst64:       defKindRD,
   109  	loadFpuConst128:      defKindRD,
   110  	fpuStore32:           defKindNone,
   111  	fpuStore64:           defKindNone,
   112  	fpuStore128:          defKindNone,
   113  	udf:                  defKindNone,
   114  	cSel:                 defKindRD,
   115  	fpuCSel:              defKindRD,
   116  	movToVec:             defKindRD,
   117  	movFromVec:           defKindRD,
   118  	movFromVecSigned:     defKindRD,
   119  	vecDup:               defKindRD,
   120  	vecDupElement:        defKindRD,
   121  	vecExtract:           defKindRD,
   122  	vecMisc:              defKindRD,
   123  	vecMovElement:        defKindRD,
   124  	vecLanes:             defKindRD,
   125  	vecShiftImm:          defKindRD,
   126  	vecTbl:               defKindRD,
   127  	vecTbl2:              defKindRD,
   128  	vecPermute:           defKindRD,
   129  	vecRRR:               defKindRD,
   130  	vecRRRRewrite:        defKindNone,
   131  	fpuToInt:             defKindRD,
   132  	intToFpu:             defKindRD,
   133  	cCmpImm:              defKindNone,
   134  	movToFPSR:            defKindNone,
   135  	movFromFPSR:          defKindRD,
   136  	emitSourceOffsetInfo: defKindNone,
   137  }
   138  
   139  // Defs returns the list of regalloc.VReg that are defined by the instruction.
   140  // In order to reduce the number of allocations, the caller can pass the slice to be used.
   141  func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {
   142  	*regs = (*regs)[:0]
   143  	switch defKinds[i.kind] {
   144  	case defKindNone:
   145  	case defKindRD:
   146  		*regs = append(*regs, i.rd.nr())
   147  	case defKindCall:
   148  		*regs = append(*regs, i.abi.retRealRegs...)
   149  	default:
   150  		panic(fmt.Sprintf("defKind for %v not defined", i))
   151  	}
   152  	return *regs
   153  }
   154  
   155  // AssignDef implements regalloc.Instr AssignDef.
   156  func (i *instruction) AssignDef(reg regalloc.VReg) {
   157  	switch defKinds[i.kind] {
   158  	case defKindNone:
   159  	case defKindRD:
   160  		i.rd = i.rd.assignReg(reg)
   161  	case defKindCall:
   162  		panic("BUG: call instructions shouldn't be assigned")
   163  	default:
   164  		panic(fmt.Sprintf("defKind for %v not defined", i))
   165  	}
   166  }
   167  
   168  type useKind byte
   169  
   170  const (
   171  	useKindNone useKind = iota + 1
   172  	useKindRN
   173  	useKindRNRM
   174  	useKindRNRMRA
   175  	useKindRNRN1RM
   176  	useKindRet
   177  	useKindCall
   178  	useKindCallInd
   179  	useKindAMode
   180  	useKindRNAMode
   181  	useKindCond
   182  	useKindVecRRRRewrite
   183  )
   184  
   185  var useKinds = [numInstructionKinds]useKind{
   186  	udf:                  useKindNone,
   187  	aluRRR:               useKindRNRM,
   188  	aluRRRR:              useKindRNRMRA,
   189  	aluRRImm12:           useKindRN,
   190  	aluRRBitmaskImm:      useKindRN,
   191  	aluRRRShift:          useKindRNRM,
   192  	aluRRImmShift:        useKindRN,
   193  	aluRRRExtend:         useKindRNRM,
   194  	bitRR:                useKindRN,
   195  	movZ:                 useKindNone,
   196  	movK:                 useKindNone,
   197  	movN:                 useKindNone,
   198  	mov32:                useKindRN,
   199  	mov64:                useKindRN,
   200  	fpuMov64:             useKindRN,
   201  	fpuMov128:            useKindRN,
   202  	fpuRR:                useKindRN,
   203  	fpuRRR:               useKindRNRM,
   204  	nop0:                 useKindNone,
   205  	call:                 useKindCall,
   206  	callInd:              useKindCallInd,
   207  	ret:                  useKindRet,
   208  	store8:               useKindRNAMode,
   209  	store16:              useKindRNAMode,
   210  	store32:              useKindRNAMode,
   211  	store64:              useKindRNAMode,
   212  	exitSequence:         useKindRN,
   213  	condBr:               useKindCond,
   214  	br:                   useKindNone,
   215  	brTableSequence:      useKindRN,
   216  	cSet:                 useKindNone,
   217  	extend:               useKindRN,
   218  	fpuCmp:               useKindRNRM,
   219  	uLoad8:               useKindAMode,
   220  	uLoad16:              useKindAMode,
   221  	uLoad32:              useKindAMode,
   222  	sLoad8:               useKindAMode,
   223  	sLoad16:              useKindAMode,
   224  	sLoad32:              useKindAMode,
   225  	uLoad64:              useKindAMode,
   226  	fpuLoad32:            useKindAMode,
   227  	fpuLoad64:            useKindAMode,
   228  	fpuLoad128:           useKindAMode,
   229  	fpuStore32:           useKindRNAMode,
   230  	fpuStore64:           useKindRNAMode,
   231  	fpuStore128:          useKindRNAMode,
   232  	loadFpuConst32:       useKindNone,
   233  	loadFpuConst64:       useKindNone,
   234  	loadFpuConst128:      useKindNone,
   235  	vecLoad1R:            useKindRN,
   236  	cSel:                 useKindRNRM,
   237  	fpuCSel:              useKindRNRM,
   238  	movToVec:             useKindRN,
   239  	movFromVec:           useKindRN,
   240  	movFromVecSigned:     useKindRN,
   241  	vecDup:               useKindRN,
   242  	vecDupElement:        useKindRN,
   243  	vecExtract:           useKindRNRM,
   244  	cCmpImm:              useKindRN,
   245  	vecMisc:              useKindRN,
   246  	vecMovElement:        useKindRN,
   247  	vecLanes:             useKindRN,
   248  	vecShiftImm:          useKindRN,
   249  	vecTbl:               useKindRNRM,
   250  	vecTbl2:              useKindRNRN1RM,
   251  	vecRRR:               useKindRNRM,
   252  	vecRRRRewrite:        useKindVecRRRRewrite,
   253  	vecPermute:           useKindRNRM,
   254  	fpuToInt:             useKindRN,
   255  	intToFpu:             useKindRN,
   256  	movToFPSR:            useKindRN,
   257  	movFromFPSR:          useKindNone,
   258  	adr:                  useKindNone,
   259  	emitSourceOffsetInfo: useKindNone,
   260  }
   261  
   262  // Uses returns the list of regalloc.VReg that are used by the instruction.
   263  // In order to reduce the number of allocations, the caller can pass the slice to be used.
   264  func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
   265  	*regs = (*regs)[:0]
   266  	switch useKinds[i.kind] {
   267  	case useKindNone:
   268  	case useKindRN:
   269  		if rn := i.rn.reg(); rn.Valid() {
   270  			*regs = append(*regs, rn)
   271  		}
   272  	case useKindRNRM:
   273  		if rn := i.rn.reg(); rn.Valid() {
   274  			*regs = append(*regs, rn)
   275  		}
   276  		if rm := i.rm.reg(); rm.Valid() {
   277  			*regs = append(*regs, rm)
   278  		}
   279  	case useKindRNRMRA:
   280  		if rn := i.rn.reg(); rn.Valid() {
   281  			*regs = append(*regs, rn)
   282  		}
   283  		if rm := i.rm.reg(); rm.Valid() {
   284  			*regs = append(*regs, rm)
   285  		}
   286  		if ra := i.ra.reg(); ra.Valid() {
   287  			*regs = append(*regs, ra)
   288  		}
   289  	case useKindRNRN1RM:
   290  		if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() {
   291  			rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
   292  			*regs = append(*regs, rn, rn1)
   293  		}
   294  		if rm := i.rm.reg(); rm.Valid() {
   295  			*regs = append(*regs, rm)
   296  		}
   297  	case useKindRet:
   298  		*regs = append(*regs, i.abi.retRealRegs...)
   299  	case useKindAMode:
   300  		if amodeRN := i.amode.rn; amodeRN.Valid() {
   301  			*regs = append(*regs, amodeRN)
   302  		}
   303  		if amodeRM := i.amode.rm; amodeRM.Valid() {
   304  			*regs = append(*regs, amodeRM)
   305  		}
   306  	case useKindRNAMode:
   307  		*regs = append(*regs, i.rn.reg())
   308  		if amodeRN := i.amode.rn; amodeRN.Valid() {
   309  			*regs = append(*regs, amodeRN)
   310  		}
   311  		if amodeRM := i.amode.rm; amodeRM.Valid() {
   312  			*regs = append(*regs, amodeRM)
   313  		}
   314  	case useKindCond:
   315  		cnd := cond(i.u1)
   316  		if cnd.kind() != condKindCondFlagSet {
   317  			*regs = append(*regs, cnd.register())
   318  		}
   319  	case useKindCall:
   320  		*regs = append(*regs, i.abi.argRealRegs...)
   321  	case useKindCallInd:
   322  		*regs = append(*regs, i.rn.nr())
   323  		*regs = append(*regs, i.abi.argRealRegs...)
   324  	case useKindVecRRRRewrite:
   325  		*regs = append(*regs, i.rn.reg())
   326  		*regs = append(*regs, i.rm.reg())
   327  		*regs = append(*regs, i.rd.reg())
   328  	default:
   329  		panic(fmt.Sprintf("useKind for %v not defined", i))
   330  	}
   331  	return *regs
   332  }
   333  
   334  func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
   335  	switch useKinds[i.kind] {
   336  	case useKindNone:
   337  	case useKindRN:
   338  		if rn := i.rn.reg(); rn.Valid() {
   339  			i.rn = i.rn.assignReg(reg)
   340  		}
   341  	case useKindRNRM:
   342  		if index == 0 {
   343  			if rn := i.rn.reg(); rn.Valid() {
   344  				i.rn = i.rn.assignReg(reg)
   345  			}
   346  		} else {
   347  			if rm := i.rm.reg(); rm.Valid() {
   348  				i.rm = i.rm.assignReg(reg)
   349  			}
   350  		}
   351  	case useKindVecRRRRewrite:
   352  		if index == 0 {
   353  			if rn := i.rn.reg(); rn.Valid() {
   354  				i.rn = i.rn.assignReg(reg)
   355  			}
   356  		} else if index == 1 {
   357  			if rm := i.rm.reg(); rm.Valid() {
   358  				i.rm = i.rm.assignReg(reg)
   359  			}
   360  		} else {
   361  			if rd := i.rd.reg(); rd.Valid() {
   362  				i.rd = i.rd.assignReg(reg)
   363  			}
   364  		}
   365  	case useKindRNRN1RM:
   366  		if index == 0 {
   367  			if rn := i.rn.reg(); rn.Valid() {
   368  				i.rn = i.rn.assignReg(reg)
   369  			}
   370  			if rn1 := i.rn.reg() + 1; rn1.Valid() {
   371  				i.rm = i.rm.assignReg(reg + 1)
   372  			}
   373  		} else {
   374  			if rm := i.rm.reg(); rm.Valid() {
   375  				i.rm = i.rm.assignReg(reg)
   376  			}
   377  		}
   378  	case useKindRNRMRA:
   379  		if index == 0 {
   380  			if rn := i.rn.reg(); rn.Valid() {
   381  				i.rn = i.rn.assignReg(reg)
   382  			}
   383  		} else if index == 1 {
   384  			if rm := i.rm.reg(); rm.Valid() {
   385  				i.rm = i.rm.assignReg(reg)
   386  			}
   387  		} else {
   388  			if ra := i.ra.reg(); ra.Valid() {
   389  				i.ra = i.ra.assignReg(reg)
   390  			}
   391  		}
   392  	case useKindRet:
   393  		panic("BUG: ret instructions shouldn't be assigned")
   394  	case useKindAMode:
   395  		if index == 0 {
   396  			if amodeRN := i.amode.rn; amodeRN.Valid() {
   397  				i.amode.rn = reg
   398  			}
   399  		} else {
   400  			if amodeRM := i.amode.rm; amodeRM.Valid() {
   401  				i.amode.rm = reg
   402  			}
   403  		}
   404  	case useKindRNAMode:
   405  		if index == 0 {
   406  			i.rn = i.rn.assignReg(reg)
   407  		} else if index == 1 {
   408  			if amodeRN := i.amode.rn; amodeRN.Valid() {
   409  				i.amode.rn = reg
   410  			} else {
   411  				panic("BUG")
   412  			}
   413  		} else {
   414  			if amodeRM := i.amode.rm; amodeRM.Valid() {
   415  				i.amode.rm = reg
   416  			} else {
   417  				panic("BUG")
   418  			}
   419  		}
   420  	case useKindCond:
   421  		c := cond(i.u1)
   422  		switch c.kind() {
   423  		case condKindRegisterZero:
   424  			i.u1 = uint64(registerAsRegZeroCond(reg))
   425  		case condKindRegisterNotZero:
   426  			i.u1 = uint64(registerAsRegNotZeroCond(reg))
   427  		}
   428  	case useKindCall:
   429  		panic("BUG: call instructions shouldn't be assigned")
   430  	case useKindCallInd:
   431  		i.rn = i.rn.assignReg(reg)
   432  	default:
   433  		panic(fmt.Sprintf("useKind for %v not defined", i))
   434  	}
   435  }
   436  
   437  func (i *instruction) asCall(ref ssa.FuncRef, abi *abiImpl) {
   438  	i.kind = call
   439  	i.u1 = uint64(ref)
   440  	i.abi = abi
   441  }
   442  
   443  func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *abiImpl) {
   444  	i.kind = callInd
   445  	i.rn = operandNR(ptr)
   446  	i.abi = abi
   447  }
   448  
   449  func (i *instruction) callFuncRef() ssa.FuncRef {
   450  	return ssa.FuncRef(i.u1)
   451  }
   452  
   453  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
   454  func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
   455  	i.kind = movZ
   456  	i.rd = operandNR(dst)
   457  	i.u1 = imm
   458  	i.u2 = shift
   459  	if dst64bit {
   460  		i.u3 = 1
   461  	}
   462  }
   463  
   464  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
   465  func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
   466  	i.kind = movK
   467  	i.rd = operandNR(dst)
   468  	i.u1 = imm
   469  	i.u2 = shift
   470  	if dst64bit {
   471  		i.u3 = 1
   472  	}
   473  }
   474  
   475  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
   476  func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
   477  	i.kind = movN
   478  	i.rd = operandNR(dst)
   479  	i.u1 = imm
   480  	i.u2 = shift
   481  	if dst64bit {
   482  		i.u3 = 1
   483  	}
   484  }
   485  
   486  func (i *instruction) asNop0() *instruction {
   487  	i.kind = nop0
   488  	return i
   489  }
   490  
   491  func (i *instruction) asNop0WithLabel(l label) {
   492  	i.kind = nop0
   493  	i.u1 = uint64(l)
   494  }
   495  
   496  func (i *instruction) nop0Label() label {
   497  	return label(i.u1)
   498  }
   499  
   500  func (i *instruction) asRet(abi *abiImpl) {
   501  	i.kind = ret
   502  	i.abi = abi
   503  }
   504  
   505  func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) {
   506  	i.kind = storeP64
   507  	i.rn = operandNR(src1)
   508  	i.rm = operandNR(src2)
   509  	i.amode = amode
   510  }
   511  
   512  func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) {
   513  	i.kind = loadP64
   514  	i.rn = operandNR(src1)
   515  	i.rm = operandNR(src2)
   516  	i.amode = amode
   517  }
   518  
   519  func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
   520  	switch sizeInBits {
   521  	case 8:
   522  		i.kind = store8
   523  	case 16:
   524  		i.kind = store16
   525  	case 32:
   526  		if src.reg().RegType() == regalloc.RegTypeInt {
   527  			i.kind = store32
   528  		} else {
   529  			i.kind = fpuStore32
   530  		}
   531  	case 64:
   532  		if src.reg().RegType() == regalloc.RegTypeInt {
   533  			i.kind = store64
   534  		} else {
   535  			i.kind = fpuStore64
   536  		}
   537  	case 128:
   538  		i.kind = fpuStore128
   539  	}
   540  	i.rn = src
   541  	i.amode = amode
   542  }
   543  
   544  func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
   545  	switch sizeInBits {
   546  	case 8:
   547  		i.kind = sLoad8
   548  	case 16:
   549  		i.kind = sLoad16
   550  	case 32:
   551  		i.kind = sLoad32
   552  	default:
   553  		panic("BUG")
   554  	}
   555  	i.rd = dst
   556  	i.amode = amode
   557  }
   558  
   559  func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
   560  	switch sizeInBits {
   561  	case 8:
   562  		i.kind = uLoad8
   563  	case 16:
   564  		i.kind = uLoad16
   565  	case 32:
   566  		i.kind = uLoad32
   567  	case 64:
   568  		i.kind = uLoad64
   569  	}
   570  	i.rd = dst
   571  	i.amode = amode
   572  }
   573  
   574  func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) {
   575  	switch sizeInBits {
   576  	case 32:
   577  		i.kind = fpuLoad32
   578  	case 64:
   579  		i.kind = fpuLoad64
   580  	case 128:
   581  		i.kind = fpuLoad128
   582  	}
   583  	i.rd = dst
   584  	i.amode = amode
   585  }
   586  
   587  func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
   588  	// NOTE: currently only has support for no-offset loads, though it is suspicious that
   589  	// we would need to support offset load (that is only available for post-index).
   590  	i.kind = vecLoad1R
   591  	i.rd = rd
   592  	i.rn = rn
   593  	i.u1 = uint64(arr)
   594  }
   595  
   596  func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) {
   597  	i.kind = cSet
   598  	i.rd = operandNR(rd)
   599  	i.u1 = uint64(c)
   600  	if mask {
   601  		i.u2 = 1
   602  	}
   603  }
   604  
   605  func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
   606  	i.kind = cSel
   607  	i.rd = rd
   608  	i.rn = rn
   609  	i.rm = rm
   610  	i.u1 = uint64(c)
   611  	if _64bit {
   612  		i.u3 = 1
   613  	}
   614  }
   615  
   616  func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
   617  	i.kind = fpuCSel
   618  	i.rd = rd
   619  	i.rn = rn
   620  	i.rm = rm
   621  	i.u1 = uint64(c)
   622  	if _64bit {
   623  		i.u3 = 1
   624  	}
   625  }
   626  
   627  func (i *instruction) asBr(target label) {
   628  	if target == returnLabel {
   629  		panic("BUG: call site should special case for returnLabel")
   630  	}
   631  	i.kind = br
   632  	i.u1 = uint64(target)
   633  }
   634  
   635  func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) {
   636  	i.kind = brTableSequence
   637  	i.rn = operandNR(indexReg)
   638  	i.targets = targets
   639  }
   640  
   641  func (i *instruction) brTableSequenceOffsetsResolved() {
   642  	i.u3 = 1 // indicate that the offsets are resolved, for debugging.
   643  }
   644  
   645  func (i *instruction) brLabel() label {
   646  	return label(i.u1)
   647  }
   648  
   649  // brOffsetResolved is called when the target label is resolved.
   650  func (i *instruction) brOffsetResolve(offset int64) {
   651  	i.u2 = uint64(offset)
   652  	i.u3 = 1 // indicate that the offset is resolved, for debugging.
   653  }
   654  
   655  func (i *instruction) brOffset() int64 {
   656  	return int64(i.u2)
   657  }
   658  
   659  // asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag.
   660  func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
   661  	i.kind = condBr
   662  	i.u1 = c.asUint64()
   663  	i.u2 = uint64(target)
   664  	if is64bit {
   665  		i.u3 = 1
   666  	}
   667  }
   668  
   669  func (i *instruction) setCondBrTargets(target label) {
   670  	i.u2 = uint64(target)
   671  }
   672  
   673  func (i *instruction) condBrLabel() label {
   674  	return label(i.u2)
   675  }
   676  
   677  // condBrOffsetResolve is called when the target label is resolved.
   678  func (i *instruction) condBrOffsetResolve(offset int64) {
   679  	i.rd.data = uint64(offset)
   680  	i.rd.data2 = 1 // indicate that the offset is resolved, for debugging.
   681  }
   682  
   683  // condBrOffsetResolved returns true if condBrOffsetResolve is already called.
   684  func (i *instruction) condBrOffsetResolved() bool {
   685  	return i.rd.data2 == 1
   686  }
   687  
   688  func (i *instruction) condBrOffset() int64 {
   689  	return int64(i.rd.data)
   690  }
   691  
   692  func (i *instruction) condBrCond() cond {
   693  	return cond(i.u1)
   694  }
   695  
   696  func (i *instruction) condBr64bit() bool {
   697  	return i.u3 == 1
   698  }
   699  
   700  func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) {
   701  	i.kind = loadFpuConst32
   702  	i.u1 = raw
   703  	i.rd = operandNR(rd)
   704  }
   705  
   706  func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) {
   707  	i.kind = loadFpuConst64
   708  	i.u1 = raw
   709  	i.rd = operandNR(rd)
   710  }
   711  
   712  func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) {
   713  	i.kind = loadFpuConst128
   714  	i.u1 = lo
   715  	i.u2 = hi
   716  	i.rd = operandNR(rd)
   717  }
   718  
   719  func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) {
   720  	i.kind = fpuCmp
   721  	i.rn, i.rm = rn, rm
   722  	if is64bit {
   723  		i.u3 = 1
   724  	}
   725  }
   726  
   727  func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) {
   728  	i.kind = cCmpImm
   729  	i.rn = rn
   730  	i.rm.data = imm
   731  	i.u1 = uint64(c)
   732  	i.u2 = uint64(flag)
   733  	if is64bit {
   734  		i.u3 = 1
   735  	}
   736  }
   737  
   738  // asALU setups a basic ALU instruction.
   739  func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
   740  	switch rm.kind {
   741  	case operandKindNR:
   742  		i.kind = aluRRR
   743  	case operandKindSR:
   744  		i.kind = aluRRRShift
   745  	case operandKindER:
   746  		i.kind = aluRRRExtend
   747  	case operandKindImm12:
   748  		i.kind = aluRRImm12
   749  	default:
   750  		panic("BUG")
   751  	}
   752  	i.u1 = uint64(aluOp)
   753  	i.rd, i.rn, i.rm = rd, rn, rm
   754  	if dst64bit {
   755  		i.u3 = 1
   756  	}
   757  }
   758  
   759  // asALU setups a basic ALU instruction.
   760  func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) {
   761  	i.kind = aluRRRR
   762  	i.u1 = uint64(aluOp)
   763  	i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra
   764  	if dst64bit {
   765  		i.u3 = 1
   766  	}
   767  }
   768  
   769  // asALUShift setups a shift based ALU instruction.
   770  func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
   771  	switch rm.kind {
   772  	case operandKindNR:
   773  		i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands.
   774  	case operandKindShiftImm:
   775  		i.kind = aluRRImmShift
   776  	default:
   777  		panic("BUG")
   778  	}
   779  	i.u1 = uint64(aluOp)
   780  	i.rd, i.rn, i.rm = rd, rn, rm
   781  	if dst64bit {
   782  		i.u3 = 1
   783  	}
   784  }
   785  
   786  func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) {
   787  	i.kind = aluRRBitmaskImm
   788  	i.u1 = uint64(aluOp)
   789  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   790  	i.u2 = imm
   791  	if dst64bit {
   792  		i.u3 = 1
   793  	}
   794  }
   795  
   796  func (i *instruction) asMovToFPSR(rn regalloc.VReg) {
   797  	i.kind = movToFPSR
   798  	i.rn = operandNR(rn)
   799  }
   800  
   801  func (i *instruction) asMovFromFPSR(rd regalloc.VReg) {
   802  	i.kind = movFromFPSR
   803  	i.rd = operandNR(rd)
   804  }
   805  
   806  func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) {
   807  	i.kind = bitRR
   808  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   809  	i.u1 = uint64(bitOp)
   810  	if is64bit {
   811  		i.u2 = 1
   812  	}
   813  }
   814  
   815  func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) {
   816  	i.kind = fpuRRR
   817  	i.u1 = uint64(op)
   818  	i.rd, i.rn, i.rm = rd, rn, rm
   819  	if dst64bit {
   820  		i.u3 = 1
   821  	}
   822  }
   823  
   824  func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) {
   825  	i.kind = fpuRR
   826  	i.u1 = uint64(op)
   827  	i.rd, i.rn = rd, rn
   828  	if dst64bit {
   829  		i.u3 = 1
   830  	}
   831  }
   832  
   833  func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) {
   834  	i.kind = extend
   835  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   836  	i.u1 = uint64(fromBits)
   837  	i.u2 = uint64(toBits)
   838  	if signed {
   839  		i.u3 = 1
   840  	}
   841  }
   842  
   843  func (i *instruction) asMove32(rd, rn regalloc.VReg) {
   844  	i.kind = mov32
   845  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   846  }
   847  
   848  func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction {
   849  	i.kind = mov64
   850  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   851  	return i
   852  }
   853  
   854  func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {
   855  	i.kind = fpuMov64
   856  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   857  }
   858  
   859  func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction {
   860  	i.kind = fpuMov128
   861  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   862  	return i
   863  }
   864  
   865  func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) {
   866  	i.kind = movToVec
   867  	i.rd = rd
   868  	i.rn = rn
   869  	i.u1, i.u2 = uint64(arr), uint64(index)
   870  }
   871  
   872  func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) {
   873  	if signed {
   874  		i.kind = movFromVecSigned
   875  	} else {
   876  		i.kind = movFromVec
   877  	}
   878  	i.rd = rd
   879  	i.rn = rn
   880  	i.u1, i.u2 = uint64(arr), uint64(index)
   881  }
   882  
   883  func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) {
   884  	i.kind = vecDup
   885  	i.u1 = uint64(arr)
   886  	i.rn, i.rd = rn, rd
   887  }
   888  
   889  func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) {
   890  	i.kind = vecDupElement
   891  	i.u1 = uint64(arr)
   892  	i.rn, i.rd = rn, rd
   893  	i.u2 = uint64(index)
   894  }
   895  
   896  func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) {
   897  	i.kind = vecExtract
   898  	i.u1 = uint64(arr)
   899  	i.rn, i.rm, i.rd = rn, rm, rd
   900  	i.u2 = uint64(index)
   901  }
   902  
   903  func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
   904  	i.kind = vecMovElement
   905  	i.u1 = uint64(arr)
   906  	i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex)
   907  	i.rn, i.rd = rn, rd
   908  }
   909  
   910  func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) {
   911  	i.kind = vecMisc
   912  	i.u1 = uint64(op)
   913  	i.rn, i.rd = rn, rd
   914  	i.u2 = uint64(arr)
   915  }
   916  
   917  func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) {
   918  	i.kind = vecLanes
   919  	i.u1 = uint64(op)
   920  	i.rn, i.rd = rn, rd
   921  	i.u2 = uint64(arr)
   922  }
   923  
   924  func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   925  	i.kind = vecShiftImm
   926  	i.u1 = uint64(op)
   927  	i.rn, i.rm, i.rd = rn, rm, rd
   928  	i.u2 = uint64(arr)
   929  }
   930  
   931  func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) {
   932  	switch nregs {
   933  	case 0, 1:
   934  		i.kind = vecTbl
   935  	case 2:
   936  		i.kind = vecTbl2
   937  		if !rn.reg().IsRealReg() {
   938  			panic("rn is not a RealReg")
   939  		}
   940  		if rn.realReg() == v31 {
   941  			panic("rn cannot be v31")
   942  		}
   943  	default:
   944  		panic(fmt.Sprintf("unsupported number of registers %d", nregs))
   945  	}
   946  	i.rn, i.rm, i.rd = rn, rm, rd
   947  	i.u2 = uint64(arr)
   948  }
   949  
   950  func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   951  	i.kind = vecPermute
   952  	i.u1 = uint64(op)
   953  	i.rn, i.rm, i.rd = rn, rm, rd
   954  	i.u2 = uint64(arr)
   955  }
   956  
   957  func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   958  	i.kind = vecRRR
   959  	i.u1 = uint64(op)
   960  	i.rn, i.rd, i.rm = rn, rd, rm
   961  	i.u2 = uint64(arr)
   962  }
   963  
   964  // asVecRRRRewrite encodes a vector instruction that rewrites the destination register.
   965  // IMPORTANT: the destination register must be already defined before this instruction.
   966  func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   967  	i.kind = vecRRRRewrite
   968  	i.u1 = uint64(op)
   969  	i.rn, i.rd, i.rm = rn, rd, rm
   970  	i.u2 = uint64(arr)
   971  }
   972  
   973  func (i *instruction) IsCopy() bool {
   974  	op := i.kind
   975  	// We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits,
   976  	// and it is only used in the translation of IReduce, not the actual copy indeed.
   977  	return op == mov64 || op == fpuMov64 || op == fpuMov128
   978  }
   979  
   980  // String implements fmt.Stringer.
   981  func (i *instruction) String() (str string) {
   982  	is64SizeBitToSize := func(u3 uint64) byte {
   983  		if u3 == 0 {
   984  			return 32
   985  		}
   986  		return 64
   987  	}
   988  
   989  	switch i.kind {
   990  	case nop0:
   991  		if i.u1 != 0 {
   992  			l := label(i.u1)
   993  			str = fmt.Sprintf("%s:", l)
   994  		} else {
   995  			str = "nop0"
   996  		}
   997  	case aluRRR:
   998  		size := is64SizeBitToSize(i.u3)
   999  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
  1000  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size),
  1001  			i.rm.format(size))
  1002  	case aluRRRR:
  1003  		size := is64SizeBitToSize(i.u3)
  1004  		str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(),
  1005  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size))
  1006  	case aluRRImm12:
  1007  		size := is64SizeBitToSize(i.u3)
  1008  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
  1009  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
  1010  	case aluRRBitmaskImm:
  1011  		size := is64SizeBitToSize(i.u3)
  1012  		rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size)
  1013  		if size == 32 {
  1014  			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2))
  1015  		} else {
  1016  			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2)
  1017  		}
  1018  	case aluRRImmShift:
  1019  		size := is64SizeBitToSize(i.u3)
  1020  		str = fmt.Sprintf("%s %s, %s, %#x",
  1021  			aluOp(i.u1).String(),
  1022  			formatVRegSized(i.rd.nr(), size),
  1023  			formatVRegSized(i.rn.nr(), size),
  1024  			i.rm.shiftImm(),
  1025  		)
  1026  	case aluRRRShift:
  1027  		size := is64SizeBitToSize(i.u3)
  1028  		str = fmt.Sprintf("%s %s, %s, %s",
  1029  			aluOp(i.u1).String(),
  1030  			formatVRegSized(i.rd.nr(), size),
  1031  			formatVRegSized(i.rn.nr(), size),
  1032  			i.rm.format(size),
  1033  		)
  1034  	case aluRRRExtend:
  1035  		size := is64SizeBitToSize(i.u3)
  1036  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
  1037  			formatVRegSized(i.rd.nr(), size),
  1038  			formatVRegSized(i.rn.nr(), size),
  1039  			// Regardless of the source size, the register is formatted in 32-bit.
  1040  			i.rm.format(32),
  1041  		)
  1042  	case bitRR:
  1043  		size := is64SizeBitToSize(i.u2)
  1044  		str = fmt.Sprintf("%s %s, %s",
  1045  			bitOp(i.u1),
  1046  			formatVRegSized(i.rd.nr(), size),
  1047  			formatVRegSized(i.rn.nr(), size),
  1048  		)
  1049  	case uLoad8:
  1050  		str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1051  	case sLoad8:
  1052  		str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1053  	case uLoad16:
  1054  		str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1055  	case sLoad16:
  1056  		str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1057  	case uLoad32:
  1058  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1059  	case sLoad32:
  1060  		str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1061  	case uLoad64:
  1062  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
  1063  	case store8:
  1064  		str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8))
  1065  	case store16:
  1066  		str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16))
  1067  	case store32:
  1068  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32))
  1069  	case store64:
  1070  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
  1071  	case storeP64:
  1072  		str = fmt.Sprintf("stp %s, %s, %s",
  1073  			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
  1074  	case loadP64:
  1075  		str = fmt.Sprintf("ldp %s, %s, %s",
  1076  			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
  1077  	case mov64:
  1078  		str = fmt.Sprintf("mov %s, %s",
  1079  			formatVRegSized(i.rd.nr(), 64),
  1080  			formatVRegSized(i.rn.nr(), 64))
  1081  	case mov32:
  1082  		str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32))
  1083  	case movZ:
  1084  		size := is64SizeBitToSize(i.u3)
  1085  		str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
  1086  	case movN:
  1087  		size := is64SizeBitToSize(i.u3)
  1088  		str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
  1089  	case movK:
  1090  		size := is64SizeBitToSize(i.u3)
  1091  		str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
  1092  	case extend:
  1093  		fromBits, toBits := byte(i.u1), byte(i.u2)
  1094  
  1095  		var signedStr string
  1096  		if i.u3 == 1 {
  1097  			signedStr = "s"
  1098  		} else {
  1099  			signedStr = "u"
  1100  		}
  1101  		var fromStr string
  1102  		switch fromBits {
  1103  		case 8:
  1104  			fromStr = "b"
  1105  		case 16:
  1106  			fromStr = "h"
  1107  		case 32:
  1108  			fromStr = "w"
  1109  		}
  1110  		str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32))
  1111  	case cSel:
  1112  		size := is64SizeBitToSize(i.u3)
  1113  		str = fmt.Sprintf("csel %s, %s, %s, %s",
  1114  			formatVRegSized(i.rd.nr(), size),
  1115  			formatVRegSized(i.rn.nr(), size),
  1116  			formatVRegSized(i.rm.nr(), size),
  1117  			condFlag(i.u1),
  1118  		)
  1119  	case cSet:
  1120  		if i.u2 != 0 {
  1121  			str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
  1122  		} else {
  1123  			str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
  1124  		}
  1125  	case cCmpImm:
  1126  		size := is64SizeBitToSize(i.u3)
  1127  		str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s",
  1128  			formatVRegSized(i.rn.nr(), size), i.rm.data,
  1129  			i.u2&0b1111,
  1130  			condFlag(i.u1))
  1131  	case fpuMov64:
  1132  		str = fmt.Sprintf("mov %s, %s",
  1133  			formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone),
  1134  			formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone))
  1135  	case fpuMov128:
  1136  		str = fmt.Sprintf("mov %s, %s",
  1137  			formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone),
  1138  			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone))
  1139  	case fpuMovFromVec:
  1140  		panic("TODO")
  1141  	case fpuRR:
  1142  		dstSz := is64SizeBitToSize(i.u3)
  1143  		srcSz := dstSz
  1144  		op := fpuUniOp(i.u1)
  1145  		switch op {
  1146  		case fpuUniOpCvt32To64:
  1147  			srcSz = 32
  1148  		case fpuUniOpCvt64To32:
  1149  			srcSz = 64
  1150  		}
  1151  		str = fmt.Sprintf("%s %s, %s", op.String(),
  1152  			formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz))
  1153  	case fpuRRR:
  1154  		size := is64SizeBitToSize(i.u3)
  1155  		str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(),
  1156  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
  1157  	case fpuRRI:
  1158  		panic("TODO")
  1159  	case fpuRRRR:
  1160  		panic("TODO")
  1161  	case fpuCmp:
  1162  		size := is64SizeBitToSize(i.u3)
  1163  		str = fmt.Sprintf("fcmp %s, %s",
  1164  			formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
  1165  	case fpuLoad32:
  1166  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1167  	case fpuStore32:
  1168  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64))
  1169  	case fpuLoad64:
  1170  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
  1171  	case fpuStore64:
  1172  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
  1173  	case fpuLoad128:
  1174  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64))
  1175  	case fpuStore128:
  1176  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64))
  1177  	case loadFpuConst32:
  1178  		str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1)))
  1179  	case loadFpuConst64:
  1180  		str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1))
  1181  	case loadFpuConst128:
  1182  		str = fmt.Sprintf("ldr %s, #8; b 32; data.v128  %016x %016x",
  1183  			formatVRegSized(i.rd.nr(), 128), i.u1, i.u2)
  1184  	case fpuToInt:
  1185  		var op, src, dst string
  1186  		if signed := i.u1 == 1; signed {
  1187  			op = "fcvtzs"
  1188  		} else {
  1189  			op = "fcvtzu"
  1190  		}
  1191  		if src64 := i.u2 == 1; src64 {
  1192  			src = formatVRegWidthVec(i.rn.nr(), vecArrangementD)
  1193  		} else {
  1194  			src = formatVRegWidthVec(i.rn.nr(), vecArrangementS)
  1195  		}
  1196  		if dst64 := i.u3 == 1; dst64 {
  1197  			dst = formatVRegSized(i.rd.nr(), 64)
  1198  		} else {
  1199  			dst = formatVRegSized(i.rd.nr(), 32)
  1200  		}
  1201  		str = fmt.Sprintf("%s %s, %s", op, dst, src)
  1202  
  1203  	case intToFpu:
  1204  		var op, src, dst string
  1205  		if signed := i.u1 == 1; signed {
  1206  			op = "scvtf"
  1207  		} else {
  1208  			op = "ucvtf"
  1209  		}
  1210  		if src64 := i.u2 == 1; src64 {
  1211  			src = formatVRegSized(i.rn.nr(), 64)
  1212  		} else {
  1213  			src = formatVRegSized(i.rn.nr(), 32)
  1214  		}
  1215  		if dst64 := i.u3 == 1; dst64 {
  1216  			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD)
  1217  		} else {
  1218  			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS)
  1219  		}
  1220  		str = fmt.Sprintf("%s %s, %s", op, dst, src)
  1221  	case fpuCSel:
  1222  		size := is64SizeBitToSize(i.u3)
  1223  		str = fmt.Sprintf("fcsel %s, %s, %s, %s",
  1224  			formatVRegSized(i.rd.nr(), size),
  1225  			formatVRegSized(i.rn.nr(), size),
  1226  			formatVRegSized(i.rm.nr(), size),
  1227  			condFlag(i.u1),
  1228  		)
  1229  	case movToVec:
  1230  		var size byte
  1231  		arr := vecArrangement(i.u1)
  1232  		switch arr {
  1233  		case vecArrangementB, vecArrangementH, vecArrangementS:
  1234  			size = 32
  1235  		case vecArrangementD:
  1236  			size = 64
  1237  		default:
  1238  			panic("unsupported arrangement " + arr.String())
  1239  		}
  1240  		str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
  1241  	case movFromVec, movFromVecSigned:
  1242  		var size byte
  1243  		var opcode string
  1244  		arr := vecArrangement(i.u1)
  1245  		signed := i.kind == movFromVecSigned
  1246  		switch arr {
  1247  		case vecArrangementB, vecArrangementH, vecArrangementS:
  1248  			size = 32
  1249  			if signed {
  1250  				opcode = "smov"
  1251  			} else {
  1252  				opcode = "umov"
  1253  			}
  1254  		case vecArrangementD:
  1255  			size = 64
  1256  			if signed {
  1257  				opcode = "smov"
  1258  			} else {
  1259  				opcode = "mov"
  1260  			}
  1261  		default:
  1262  			panic("unsupported arrangement " + arr.String())
  1263  		}
  1264  		str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
  1265  	case vecDup:
  1266  		str = fmt.Sprintf("dup %s, %s",
  1267  			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
  1268  			formatVRegSized(i.rn.nr(), 64),
  1269  		)
  1270  	case vecDupElement:
  1271  		arr := vecArrangement(i.u1)
  1272  		str = fmt.Sprintf("dup %s, %s",
  1273  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1274  			formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)),
  1275  		)
  1276  	case vecDupFromFpu:
  1277  		panic("TODO")
  1278  	case vecExtract:
  1279  		str = fmt.Sprintf("ext %s, %s, %s, #%d",
  1280  			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
  1281  			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone),
  1282  			formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone),
  1283  			uint32(i.u2),
  1284  		)
  1285  	case vecExtend:
  1286  		panic("TODO")
  1287  	case vecMovElement:
  1288  		str = fmt.Sprintf("mov %s, %s",
  1289  			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)),
  1290  			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)),
  1291  		)
  1292  	case vecMiscNarrow:
  1293  		panic("TODO")
  1294  	case vecRRR, vecRRRRewrite:
  1295  		str = fmt.Sprintf("%s %s, %s, %s",
  1296  			vecOp(i.u1),
  1297  			formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
  1298  			formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone),
  1299  			formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone),
  1300  		)
  1301  	case vecMisc:
  1302  		vop := vecOp(i.u1)
  1303  		if vop == vecOpCmeq0 {
  1304  			str = fmt.Sprintf("cmeq %s, %s, #0",
  1305  				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
  1306  				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
  1307  		} else {
  1308  			str = fmt.Sprintf("%s %s, %s",
  1309  				vop,
  1310  				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
  1311  				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
  1312  		}
  1313  	case vecLanes:
  1314  		arr := vecArrangement(i.u2)
  1315  		var destArr vecArrangement
  1316  		switch arr {
  1317  		case vecArrangement8B, vecArrangement16B:
  1318  			destArr = vecArrangementH
  1319  		case vecArrangement4H, vecArrangement8H:
  1320  			destArr = vecArrangementS
  1321  		case vecArrangement4S:
  1322  			destArr = vecArrangementD
  1323  		default:
  1324  			panic("invalid arrangement " + arr.String())
  1325  		}
  1326  		str = fmt.Sprintf("%s %s, %s",
  1327  			vecOp(i.u1),
  1328  			formatVRegWidthVec(i.rd.nr(), destArr),
  1329  			formatVRegVec(i.rn.nr(), arr, vecIndexNone))
  1330  	case vecShiftImm:
  1331  		arr := vecArrangement(i.u2)
  1332  		str = fmt.Sprintf("%s %s, %s, #%d",
  1333  			vecOp(i.u1),
  1334  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1335  			formatVRegVec(i.rn.nr(), arr, vecIndexNone),
  1336  			i.rm.shiftImm())
  1337  	case vecTbl:
  1338  		arr := vecArrangement(i.u2)
  1339  		str = fmt.Sprintf("tbl %s, { %s }, %s",
  1340  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1341  			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone),
  1342  			formatVRegVec(i.rm.nr(), arr, vecIndexNone))
  1343  	case vecTbl2:
  1344  		arr := vecArrangement(i.u2)
  1345  		rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr()
  1346  		rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
  1347  		str = fmt.Sprintf("tbl %s, { %s, %s }, %s",
  1348  			formatVRegVec(rd, arr, vecIndexNone),
  1349  			formatVRegVec(rn, vecArrangement16B, vecIndexNone),
  1350  			formatVRegVec(rn1, vecArrangement16B, vecIndexNone),
  1351  			formatVRegVec(rm, arr, vecIndexNone))
  1352  	case vecPermute:
  1353  		arr := vecArrangement(i.u2)
  1354  		str = fmt.Sprintf("%s %s, %s, %s",
  1355  			vecOp(i.u1),
  1356  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1357  			formatVRegVec(i.rn.nr(), arr, vecIndexNone),
  1358  			formatVRegVec(i.rm.nr(), arr, vecIndexNone))
  1359  	case movToFPSR:
  1360  		str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64))
  1361  	case movFromFPSR:
  1362  		str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64))
  1363  	case call:
  1364  		if i.u2 > 0 {
  1365  			str = fmt.Sprintf("bl #%#x", i.u2)
  1366  		} else {
  1367  			str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1))
  1368  		}
  1369  	case callInd:
  1370  		str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64))
  1371  	case ret:
  1372  		str = "ret"
  1373  	case br:
  1374  		target := label(i.u1)
  1375  		if i.u3 != 0 {
  1376  			str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String())
  1377  		} else {
  1378  			str = fmt.Sprintf("b %s", target.String())
  1379  		}
  1380  	case condBr:
  1381  		size := is64SizeBitToSize(i.u3)
  1382  		c := cond(i.u1)
  1383  		target := label(i.u2)
  1384  		switch c.kind() {
  1385  		case condKindRegisterZero:
  1386  			if !i.condBrOffsetResolved() {
  1387  				str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String())
  1388  			} else {
  1389  				str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String())
  1390  			}
  1391  		case condKindRegisterNotZero:
  1392  			if offset := i.condBrOffset(); offset != 0 {
  1393  				str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String())
  1394  			} else {
  1395  				str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String())
  1396  			}
  1397  		case condKindCondFlagSet:
  1398  			if offset := i.condBrOffset(); offset != 0 {
  1399  				if target == invalidLabel {
  1400  					str = fmt.Sprintf("b.%s #%#x", c.flag(), offset)
  1401  				} else {
  1402  					str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String())
  1403  				}
  1404  			} else {
  1405  				str = fmt.Sprintf("b.%s %s", c.flag(), target.String())
  1406  			}
  1407  		}
  1408  	case adr:
  1409  		str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1))
  1410  	case brTableSequence:
  1411  		if i.u3 == 0 { // The offsets haven't been resolved yet.
  1412  			labels := make([]string, len(i.targets))
  1413  			for index, l := range i.targets {
  1414  				labels[index] = label(l).String()
  1415  			}
  1416  			str = fmt.Sprintf("br_table_sequence %s, [%s]",
  1417  				formatVRegSized(i.rn.nr(), 64),
  1418  				strings.Join(labels, ", "),
  1419  			)
  1420  		} else {
  1421  			// See encodeBrTableSequence for the encoding.
  1422  			offsets := make([]string, len(i.targets))
  1423  			for index, offset := range i.targets {
  1424  				offsets[index] = fmt.Sprintf("%#x", int32(offset))
  1425  			}
  1426  			str = fmt.Sprintf(
  1427  				`adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`,
  1428  				formatVRegSized(i.rn.nr(), 64),
  1429  				formatVRegSized(tmpRegVReg, 64),
  1430  				offsets,
  1431  			)
  1432  		}
  1433  	case exitSequence:
  1434  		str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64))
  1435  	case udf:
  1436  		str = "udf"
  1437  	case emitSourceOffsetInfo:
  1438  		str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1))
  1439  	case vecLoad1R:
  1440  		str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
  1441  	default:
  1442  		panic(i.kind)
  1443  	}
  1444  	return
  1445  }
  1446  
  1447  func (i *instruction) asAdr(rd regalloc.VReg, offset int64) {
  1448  	i.kind = adr
  1449  	i.rd = operandNR(rd)
  1450  	i.u1 = uint64(offset)
  1451  }
  1452  
  1453  // TODO: delete unnecessary things.
  1454  const (
  1455  	// nop0 represents a no-op of zero size.
  1456  	nop0 instructionKind = iota + 1
  1457  	// aluRRR represents an ALU operation with two register sources and a register destination.
  1458  	aluRRR
  1459  	// aluRRRR represents an ALU operation with three register sources and a register destination.
  1460  	aluRRRR
  1461  	// aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination.
  1462  	aluRRImm12
  1463  	// aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination.
  1464  	aluRRBitmaskImm
  1465  	// aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination.
  1466  	aluRRImmShift
  1467  	// aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination.
  1468  	aluRRRShift
  1469  	// aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination.
  1470  	aluRRRExtend
  1471  	// bitRR represents a bit op instruction with a single register source.
  1472  	bitRR
  1473  	// uLoad8 represents an unsigned 8-bit load.
  1474  	uLoad8
  1475  	// sLoad8 represents a signed 8-bit load into 64-bit register.
  1476  	sLoad8
  1477  	// uLoad16 represents an unsigned 16-bit load into 64-bit register.
  1478  	uLoad16
  1479  	// sLoad16 represents a signed 16-bit load into 64-bit register.
  1480  	sLoad16
  1481  	// uLoad32 represents an unsigned 32-bit load into 64-bit register.
  1482  	uLoad32
  1483  	// sLoad32 represents a signed 32-bit load into 64-bit register.
  1484  	sLoad32
  1485  	// uLoad64 represents a 64-bit load.
  1486  	uLoad64
  1487  	// store8 represents an 8-bit store.
  1488  	store8
  1489  	// store16 represents a 16-bit store.
  1490  	store16
  1491  	// store32 represents a 32-bit store.
  1492  	store32
  1493  	// store64 represents a 64-bit store.
  1494  	store64
  1495  	// storeP64 represents a store of a pair of registers.
  1496  	storeP64
  1497  	// loadP64 represents a load of a pair of registers.
  1498  	loadP64
  1499  	// mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling.
  1500  	mov64
  1501  	// mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination.
  1502  	mov32
  1503  	// movZ represents a MOVZ with a 16-bit immediate.
  1504  	movZ
  1505  	// movN represents a MOVN with a 16-bit immediate.
  1506  	movN
  1507  	// movK represents a MOVK with a 16-bit immediate.
  1508  	movK
  1509  	// extend represents a sign- or zero-extend operation.
  1510  	extend
  1511  	// cSel represents a conditional-select operation.
  1512  	cSel
  1513  	// cSet represents a conditional-set operation.
  1514  	cSet
  1515  	// cCmpImm represents a conditional comparison with an immediate.
  1516  	cCmpImm
  1517  	// fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster.
  1518  	fpuMov64
  1519  	// fpuMov128 represents a vector register move.
  1520  	fpuMov128
  1521  	// fpuMovFromVec represents a move to scalar from a vector element.
  1522  	fpuMovFromVec
  1523  	// fpuRR represents a 1-op FPU instruction.
  1524  	fpuRR
  1525  	// fpuRRR represents a 2-op FPU instruction.
  1526  	fpuRRR
  1527  	// fpuRRI represents a 2-op FPU instruction with immediate value.
  1528  	fpuRRI
  1529  	// fpuRRRR represents a 3-op FPU instruction.
  1530  	fpuRRRR
  1531  	// fpuCmp represents a FPU comparison, either 32 or 64 bit.
  1532  	fpuCmp
  1533  	// fpuLoad32 represents a floating-point load, single-precision (32 bit).
  1534  	fpuLoad32
  1535  	// fpuStore32 represents a floating-point store, single-precision (32 bit).
  1536  	fpuStore32
  1537  	// fpuLoad64 represents a floating-point load, double-precision (64 bit).
  1538  	fpuLoad64
  1539  	// fpuStore64 represents a floating-point store, double-precision (64 bit).
  1540  	fpuStore64
  1541  	// fpuLoad128 represents a floating-point/vector load, 128 bit.
  1542  	fpuLoad128
  1543  	// fpuStore128 represents a floating-point/vector store, 128 bit.
  1544  	fpuStore128
  1545  	// loadFpuConst32 represents a load of a 32-bit floating-point constant.
  1546  	loadFpuConst32
  1547  	// loadFpuConst64 represents a load of a 64-bit floating-point constant.
  1548  	loadFpuConst64
  1549  	// loadFpuConst128 represents a load of a 128-bit floating-point constant.
  1550  	loadFpuConst128
  1551  	// vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector.
  1552  	vecLoad1R
  1553  	// fpuToInt represents a conversion from FP to integer.
  1554  	fpuToInt
  1555  	// intToFpu represents a conversion from integer to FP.
  1556  	intToFpu
  1557  	// fpuCSel represents a 32/64-bit FP conditional select.
  1558  	fpuCSel
  1559  	// movToVec represents a move to a vector element from a GPR.
  1560  	movToVec
  1561  	// movFromVec represents an unsigned move from a vector element to a GPR.
  1562  	movFromVec
  1563  	// movFromVecSigned represents a signed move from a vector element to a GPR.
  1564  	movFromVecSigned
  1565  	// vecDup represents a duplication of general-purpose register to vector.
  1566  	vecDup
  1567  	// vecDupElement represents a duplication of a vector element to vector or scalar.
  1568  	vecDupElement
  1569  	// vecDupFromFpu represents a duplication of scalar to vector.
  1570  	vecDupFromFpu
  1571  	// vecExtract represents a vector extraction operation.
  1572  	vecExtract
  1573  	// vecExtend represents a vector extension operation.
  1574  	vecExtend
  1575  	// vecMovElement represents a move vector element to another vector element operation.
  1576  	vecMovElement
  1577  	// vecMiscNarrow represents a vector narrowing operation.
  1578  	vecMiscNarrow
  1579  	// vecRRR represents a vector ALU operation.
  1580  	vecRRR
  1581  	// vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register.
  1582  	// For example, BSL instruction rewrites the destination register, and the existing value influences the result.
  1583  	// Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive
  1584  	// the instruction while this instruction doesn't have "def" in the context of register allocation.
  1585  	vecRRRRewrite
  1586  	// vecMisc represents a vector two register miscellaneous instruction.
  1587  	vecMisc
  1588  	// vecLanes represents a vector instruction across lanes.
  1589  	vecLanes
  1590  	// vecShiftImm represents a SIMD scalar shift by immediate instruction.
  1591  	vecShiftImm
  1592  	// vecTbl represents a table vector lookup - single register table.
  1593  	vecTbl
  1594  	// vecTbl2 represents a table vector lookup - two register table.
  1595  	vecTbl2
  1596  	// vecPermute represents a vector permute instruction.
  1597  	vecPermute
  1598  	// movToNZCV represents a move to the FPSR.
  1599  	movToFPSR
  1600  	// movFromNZCV represents a move from the FPSR.
  1601  	movFromFPSR
  1602  	// call represents a machine call instruction.
  1603  	call
  1604  	// callInd represents a machine indirect-call instruction.
  1605  	callInd
  1606  	// ret represents a machine return instruction.
  1607  	ret
  1608  	// br represents an unconditional branch.
  1609  	br
  1610  	// condBr represents a conditional branch.
  1611  	condBr
  1612  	// adr represents a compute the address (using a PC-relative offset) of a memory location.
  1613  	adr
  1614  	// brTableSequence represents a jump-table sequence.
  1615  	brTableSequence
  1616  	// exitSequence consists of multiple instructions, and exits the execution immediately.
  1617  	// See encodeExitSequence.
  1618  	exitSequence
  1619  	// UDF is the undefined instruction. For debugging only.
  1620  	udf
  1621  
  1622  	// emitSourceOffsetInfo is a dummy instruction to emit source offset info.
  1623  	// The existence of this instruction does not affect the execution.
  1624  	emitSourceOffsetInfo
  1625  
  1626  	// ------------------- do not define below this line -------------------
  1627  	numInstructionKinds
  1628  )
  1629  
  1630  func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction {
  1631  	i.kind = emitSourceOffsetInfo
  1632  	i.u1 = uint64(l)
  1633  	return i
  1634  }
  1635  
  1636  func (i *instruction) sourceOffsetInfo() ssa.SourceOffset {
  1637  	return ssa.SourceOffset(i.u1)
  1638  }
  1639  
  1640  func (i *instruction) asUDF() *instruction {
  1641  	i.kind = udf
  1642  	return i
  1643  }
  1644  
  1645  func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) {
  1646  	i.kind = fpuToInt
  1647  	i.rn = rn
  1648  	i.rd = rd
  1649  	if rdSigned {
  1650  		i.u1 = 1
  1651  	}
  1652  	if src64bit {
  1653  		i.u2 = 1
  1654  	}
  1655  	if dst64bit {
  1656  		i.u3 = 1
  1657  	}
  1658  }
  1659  
  1660  func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) {
  1661  	i.kind = intToFpu
  1662  	i.rn = rn
  1663  	i.rd = rd
  1664  	if rnSigned {
  1665  		i.u1 = 1
  1666  	}
  1667  	if src64bit {
  1668  		i.u2 = 1
  1669  	}
  1670  	if dst64bit {
  1671  		i.u3 = 1
  1672  	}
  1673  }
  1674  
  1675  func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction {
  1676  	i.kind = exitSequence
  1677  	i.rn = operandNR(ctx)
  1678  	return i
  1679  }
  1680  
  1681  // aluOp determines the type of ALU operation. Instructions whose kind is one of
  1682  // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend
  1683  // would use this type.
  1684  type aluOp int
  1685  
  1686  func (a aluOp) String() string {
  1687  	switch a {
  1688  	case aluOpAdd:
  1689  		return "add"
  1690  	case aluOpSub:
  1691  		return "sub"
  1692  	case aluOpOrr:
  1693  		return "orr"
  1694  	case aluOpAnd:
  1695  		return "and"
  1696  	case aluOpBic:
  1697  		return "bic"
  1698  	case aluOpEor:
  1699  		return "eor"
  1700  	case aluOpAddS:
  1701  		return "adds"
  1702  	case aluOpSubS:
  1703  		return "subs"
  1704  	case aluOpSMulH:
  1705  		return "sMulH"
  1706  	case aluOpUMulH:
  1707  		return "uMulH"
  1708  	case aluOpSDiv:
  1709  		return "sdiv"
  1710  	case aluOpUDiv:
  1711  		return "udiv"
  1712  	case aluOpRotR:
  1713  		return "ror"
  1714  	case aluOpLsr:
  1715  		return "lsr"
  1716  	case aluOpAsr:
  1717  		return "asr"
  1718  	case aluOpLsl:
  1719  		return "lsl"
  1720  	case aluOpMAdd:
  1721  		return "madd"
  1722  	case aluOpMSub:
  1723  		return "msub"
  1724  	}
  1725  	panic(int(a))
  1726  }
  1727  
  1728  const (
  1729  	// 32/64-bit Add.
  1730  	aluOpAdd aluOp = iota
  1731  	// 32/64-bit Subtract.
  1732  	aluOpSub
  1733  	// 32/64-bit Bitwise OR.
  1734  	aluOpOrr
  1735  	// 32/64-bit Bitwise AND.
  1736  	aluOpAnd
  1737  	// 32/64-bit Bitwise AND NOT.
  1738  	aluOpBic
  1739  	// 32/64-bit Bitwise XOR (Exclusive OR).
  1740  	aluOpEor
  1741  	// 32/64-bit Add setting flags.
  1742  	aluOpAddS
  1743  	// 32/64-bit Subtract setting flags.
  1744  	aluOpSubS
  1745  	// Signed multiply, high-word result.
  1746  	aluOpSMulH
  1747  	// Unsigned multiply, high-word result.
  1748  	aluOpUMulH
  1749  	// 64-bit Signed divide.
  1750  	aluOpSDiv
  1751  	// 64-bit Unsigned divide.
  1752  	aluOpUDiv
  1753  	// 32/64-bit Rotate right.
  1754  	aluOpRotR
  1755  	// 32/64-bit Logical shift right.
  1756  	aluOpLsr
  1757  	// 32/64-bit Arithmetic shift right.
  1758  	aluOpAsr
  1759  	// 32/64-bit Logical shift left.
  1760  	aluOpLsl /// Multiply-add
  1761  
  1762  	// MAdd and MSub are only applicable for aluRRRR.
  1763  	aluOpMAdd
  1764  	aluOpMSub
  1765  )
  1766  
  1767  // vecOp determines the type of vector operation. Instructions whose kind is one of
  1768  // vecOpCnt would use this type.
  1769  type vecOp int
  1770  
  1771  // String implements fmt.Stringer.
  1772  func (b vecOp) String() string {
  1773  	switch b {
  1774  	case vecOpCnt:
  1775  		return "cnt"
  1776  	case vecOpCmeq:
  1777  		return "cmeq"
  1778  	case vecOpCmgt:
  1779  		return "cmgt"
  1780  	case vecOpCmhi:
  1781  		return "cmhi"
  1782  	case vecOpCmge:
  1783  		return "cmge"
  1784  	case vecOpCmhs:
  1785  		return "cmhs"
  1786  	case vecOpFcmeq:
  1787  		return "fcmeq"
  1788  	case vecOpFcmgt:
  1789  		return "fcmgt"
  1790  	case vecOpFcmge:
  1791  		return "fcmge"
  1792  	case vecOpCmeq0:
  1793  		return "cmeq0"
  1794  	case vecOpUaddlv:
  1795  		return "uaddlv"
  1796  	case vecOpBit:
  1797  		return "bit"
  1798  	case vecOpBic:
  1799  		return "bic"
  1800  	case vecOpBsl:
  1801  		return "bsl"
  1802  	case vecOpNot:
  1803  		return "not"
  1804  	case vecOpAnd:
  1805  		return "and"
  1806  	case vecOpOrr:
  1807  		return "orr"
  1808  	case vecOpEOR:
  1809  		return "eor"
  1810  	case vecOpFadd:
  1811  		return "fadd"
  1812  	case vecOpAdd:
  1813  		return "add"
  1814  	case vecOpAddp:
  1815  		return "addp"
  1816  	case vecOpAddv:
  1817  		return "addv"
  1818  	case vecOpSub:
  1819  		return "sub"
  1820  	case vecOpFsub:
  1821  		return "fsub"
  1822  	case vecOpSmin:
  1823  		return "smin"
  1824  	case vecOpUmin:
  1825  		return "umin"
  1826  	case vecOpUminv:
  1827  		return "uminv"
  1828  	case vecOpSmax:
  1829  		return "smax"
  1830  	case vecOpUmax:
  1831  		return "umax"
  1832  	case vecOpUmaxp:
  1833  		return "umaxp"
  1834  	case vecOpUrhadd:
  1835  		return "urhadd"
  1836  	case vecOpFmul:
  1837  		return "fmul"
  1838  	case vecOpSqrdmulh:
  1839  		return "sqrdmulh"
  1840  	case vecOpMul:
  1841  		return "mul"
  1842  	case vecOpUmlal:
  1843  		return "umlal"
  1844  	case vecOpFdiv:
  1845  		return "fdiv"
  1846  	case vecOpFsqrt:
  1847  		return "fsqrt"
  1848  	case vecOpAbs:
  1849  		return "abs"
  1850  	case vecOpFabs:
  1851  		return "fabs"
  1852  	case vecOpNeg:
  1853  		return "neg"
  1854  	case vecOpFneg:
  1855  		return "fneg"
  1856  	case vecOpFrintp:
  1857  		return "frintp"
  1858  	case vecOpFrintm:
  1859  		return "frintm"
  1860  	case vecOpFrintn:
  1861  		return "frintn"
  1862  	case vecOpFrintz:
  1863  		return "frintz"
  1864  	case vecOpFcvtl:
  1865  		return "fcvtl"
  1866  	case vecOpFcvtn:
  1867  		return "fcvtn"
  1868  	case vecOpFcvtzu:
  1869  		return "fcvtzu"
  1870  	case vecOpFcvtzs:
  1871  		return "fcvtzs"
  1872  	case vecOpScvtf:
  1873  		return "scvtf"
  1874  	case vecOpUcvtf:
  1875  		return "ucvtf"
  1876  	case vecOpSqxtn:
  1877  		return "sqxtn"
  1878  	case vecOpUqxtn:
  1879  		return "uqxtn"
  1880  	case vecOpSqxtun:
  1881  		return "sqxtun"
  1882  	case vecOpRev64:
  1883  		return "rev64"
  1884  	case vecOpXtn:
  1885  		return "xtn"
  1886  	case vecOpShll:
  1887  		return "shll"
  1888  	case vecOpSshl:
  1889  		return "sshl"
  1890  	case vecOpSshll:
  1891  		return "sshll"
  1892  	case vecOpUshl:
  1893  		return "ushl"
  1894  	case vecOpUshll:
  1895  		return "ushll"
  1896  	case vecOpSshr:
  1897  		return "sshr"
  1898  	case vecOpZip1:
  1899  		return "zip1"
  1900  	case vecOpFmin:
  1901  		return "fmin"
  1902  	case vecOpFmax:
  1903  		return "fmax"
  1904  	}
  1905  	panic(int(b))
  1906  }
  1907  
  1908  const (
  1909  	vecOpCnt vecOp = iota
  1910  	vecOpCmeq0
  1911  	vecOpCmeq
  1912  	vecOpCmgt
  1913  	vecOpCmhi
  1914  	vecOpCmge
  1915  	vecOpCmhs
  1916  	vecOpFcmeq
  1917  	vecOpFcmgt
  1918  	vecOpFcmge
  1919  	vecOpUaddlv
  1920  	vecOpBit
  1921  	vecOpBic
  1922  	vecOpBsl
  1923  	vecOpNot
  1924  	vecOpAnd
  1925  	vecOpOrr
  1926  	vecOpEOR
  1927  	vecOpAdd
  1928  	vecOpFadd
  1929  	vecOpAddv
  1930  	vecOpSqadd
  1931  	vecOpUqadd
  1932  	vecOpAddp
  1933  	vecOpSub
  1934  	vecOpFsub
  1935  	vecOpSqsub
  1936  	vecOpUqsub
  1937  	vecOpSmin
  1938  	vecOpUmin
  1939  	vecOpUminv
  1940  	vecOpFmin
  1941  	vecOpSmax
  1942  	vecOpUmax
  1943  	vecOpUmaxp
  1944  	vecOpFmax
  1945  	vecOpUrhadd
  1946  	vecOpMul
  1947  	vecOpFmul
  1948  	vecOpSqrdmulh
  1949  	vecOpUmlal
  1950  	vecOpFdiv
  1951  	vecOpFsqrt
  1952  	vecOpAbs
  1953  	vecOpFabs
  1954  	vecOpNeg
  1955  	vecOpFneg
  1956  	vecOpFrintm
  1957  	vecOpFrintn
  1958  	vecOpFrintp
  1959  	vecOpFrintz
  1960  	vecOpFcvtl
  1961  	vecOpFcvtn
  1962  	vecOpFcvtzs
  1963  	vecOpFcvtzu
  1964  	vecOpScvtf
  1965  	vecOpUcvtf
  1966  	vecOpSqxtn
  1967  	vecOpSqxtun
  1968  	vecOpUqxtn
  1969  	vecOpRev64
  1970  	vecOpXtn
  1971  	vecOpShll
  1972  	vecOpSshl
  1973  	vecOpSshll
  1974  	vecOpUshl
  1975  	vecOpUshll
  1976  	vecOpSshr
  1977  	vecOpZip1
  1978  )
  1979  
  1980  // bitOp determines the type of bitwise operation. Instructions whose kind is one of
  1981  // bitOpRbit and bitOpClz would use this type.
  1982  type bitOp int
  1983  
  1984  // String implements fmt.Stringer.
  1985  func (b bitOp) String() string {
  1986  	switch b {
  1987  	case bitOpRbit:
  1988  		return "rbit"
  1989  	case bitOpClz:
  1990  		return "clz"
  1991  	}
  1992  	panic(int(b))
  1993  }
  1994  
  1995  const (
  1996  	// 32/64-bit Rbit.
  1997  	bitOpRbit bitOp = iota
  1998  	// 32/64-bit Clz.
  1999  	bitOpClz
  2000  )
  2001  
  2002  // fpuUniOp represents a unary floating-point unit (FPU) operation.
  2003  type fpuUniOp byte
  2004  
  2005  const (
  2006  	fpuUniOpNeg fpuUniOp = iota
  2007  	fpuUniOpCvt32To64
  2008  	fpuUniOpCvt64To32
  2009  	fpuUniOpSqrt
  2010  	fpuUniOpRoundPlus
  2011  	fpuUniOpRoundMinus
  2012  	fpuUniOpRoundZero
  2013  	fpuUniOpRoundNearest
  2014  	fpuUniOpAbs
  2015  )
  2016  
  2017  // String implements the fmt.Stringer.
  2018  func (f fpuUniOp) String() string {
  2019  	switch f {
  2020  	case fpuUniOpNeg:
  2021  		return "fneg"
  2022  	case fpuUniOpCvt32To64:
  2023  		return "fcvt"
  2024  	case fpuUniOpCvt64To32:
  2025  		return "fcvt"
  2026  	case fpuUniOpSqrt:
  2027  		return "fsqrt"
  2028  	case fpuUniOpRoundPlus:
  2029  		return "frintp"
  2030  	case fpuUniOpRoundMinus:
  2031  		return "frintm"
  2032  	case fpuUniOpRoundZero:
  2033  		return "frintz"
  2034  	case fpuUniOpRoundNearest:
  2035  		return "frintn"
  2036  	case fpuUniOpAbs:
  2037  		return "fabs"
  2038  	}
  2039  	panic(int(f))
  2040  }
  2041  
  2042  // fpuBinOp represents a binary floating-point unit (FPU) operation.
  2043  type fpuBinOp byte
  2044  
  2045  const (
  2046  	fpuBinOpAdd = iota
  2047  	fpuBinOpSub
  2048  	fpuBinOpMul
  2049  	fpuBinOpDiv
  2050  	fpuBinOpMax
  2051  	fpuBinOpMin
  2052  )
  2053  
  2054  // String implements the fmt.Stringer.
  2055  func (f fpuBinOp) String() string {
  2056  	switch f {
  2057  	case fpuBinOpAdd:
  2058  		return "fadd"
  2059  	case fpuBinOpSub:
  2060  		return "fsub"
  2061  	case fpuBinOpMul:
  2062  		return "fmul"
  2063  	case fpuBinOpDiv:
  2064  		return "fdiv"
  2065  	case fpuBinOpMax:
  2066  		return "fmax"
  2067  	case fpuBinOpMin:
  2068  		return "fmin"
  2069  	}
  2070  	panic(int(f))
  2071  }
  2072  
  2073  // extMode represents the mode of a register operand extension.
  2074  // For example, aluRRRExtend instructions need this info to determine the extensions.
  2075  type extMode byte
  2076  
  2077  const (
  2078  	extModeNone extMode = iota
  2079  	// extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32.
  2080  	extModeZeroExtend32
  2081  	// extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32.
  2082  	extModeSignExtend32
  2083  	// extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64.
  2084  	extModeZeroExtend64
  2085  	// extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64.
  2086  	extModeSignExtend64
  2087  )
  2088  
  2089  func (e extMode) bits() byte {
  2090  	switch e {
  2091  	case extModeZeroExtend32, extModeSignExtend32:
  2092  		return 32
  2093  	case extModeZeroExtend64, extModeSignExtend64:
  2094  		return 64
  2095  	default:
  2096  		return 0
  2097  	}
  2098  }
  2099  
  2100  func (e extMode) signed() bool {
  2101  	switch e {
  2102  	case extModeSignExtend32, extModeSignExtend64:
  2103  		return true
  2104  	default:
  2105  		return false
  2106  	}
  2107  }
  2108  
  2109  func extModeOf(t ssa.Type, signed bool) extMode {
  2110  	switch t.Bits() {
  2111  	case 32:
  2112  		if signed {
  2113  			return extModeSignExtend32
  2114  		}
  2115  		return extModeZeroExtend32
  2116  	case 64:
  2117  		if signed {
  2118  			return extModeSignExtend64
  2119  		}
  2120  		return extModeZeroExtend64
  2121  	default:
  2122  		panic("TODO? do we need narrower than 32 bits?")
  2123  	}
  2124  }
  2125  
  2126  type extendOp byte
  2127  
  2128  const (
  2129  	extendOpUXTB extendOp = 0b000
  2130  	extendOpUXTH extendOp = 0b001
  2131  	extendOpUXTW extendOp = 0b010
  2132  	// extendOpUXTX does nothing, but convenient symbol that officially exists. See:
  2133  	// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
  2134  	extendOpUXTX extendOp = 0b011
  2135  	extendOpSXTB extendOp = 0b100
  2136  	extendOpSXTH extendOp = 0b101
  2137  	extendOpSXTW extendOp = 0b110
  2138  	// extendOpSXTX does nothing, but convenient symbol that officially exists. See:
  2139  	// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
  2140  	extendOpSXTX extendOp = 0b111
  2141  	extendOpNone extendOp = 0xff
  2142  )
  2143  
  2144  func (e extendOp) srcBits() byte {
  2145  	switch e {
  2146  	case extendOpUXTB, extendOpSXTB:
  2147  		return 8
  2148  	case extendOpUXTH, extendOpSXTH:
  2149  		return 16
  2150  	case extendOpUXTW, extendOpSXTW:
  2151  		return 32
  2152  	case extendOpUXTX, extendOpSXTX:
  2153  		return 64
  2154  	}
  2155  	panic(int(e))
  2156  }
  2157  
  2158  func (e extendOp) String() string {
  2159  	switch e {
  2160  	case extendOpUXTB:
  2161  		return "UXTB"
  2162  	case extendOpUXTH:
  2163  		return "UXTH"
  2164  	case extendOpUXTW:
  2165  		return "UXTW"
  2166  	case extendOpUXTX:
  2167  		return "UXTX"
  2168  	case extendOpSXTB:
  2169  		return "SXTB"
  2170  	case extendOpSXTH:
  2171  		return "SXTH"
  2172  	case extendOpSXTW:
  2173  		return "SXTW"
  2174  	case extendOpSXTX:
  2175  		return "SXTX"
  2176  	}
  2177  	panic(int(e))
  2178  }
  2179  
  2180  func extendOpFrom(signed bool, from byte) extendOp {
  2181  	switch from {
  2182  	case 8:
  2183  		if signed {
  2184  			return extendOpSXTB
  2185  		}
  2186  		return extendOpUXTB
  2187  	case 16:
  2188  		if signed {
  2189  			return extendOpSXTH
  2190  		}
  2191  		return extendOpUXTH
  2192  	case 32:
  2193  		if signed {
  2194  			return extendOpSXTW
  2195  		}
  2196  		return extendOpUXTW
  2197  	case 64:
  2198  		if signed {
  2199  			return extendOpSXTX
  2200  		}
  2201  		return extendOpUXTX
  2202  	}
  2203  	panic("invalid extendOpFrom")
  2204  }
  2205  
  2206  type shiftOp byte
  2207  
  2208  const (
  2209  	shiftOpLSL shiftOp = 0b00
  2210  	shiftOpLSR shiftOp = 0b01
  2211  	shiftOpASR shiftOp = 0b10
  2212  	shiftOpROR shiftOp = 0b11
  2213  )
  2214  
  2215  func (s shiftOp) String() string {
  2216  	switch s {
  2217  	case shiftOpLSL:
  2218  		return "lsl"
  2219  	case shiftOpLSR:
  2220  		return "lsr"
  2221  	case shiftOpASR:
  2222  		return "asr"
  2223  	case shiftOpROR:
  2224  		return "ror"
  2225  	}
  2226  	panic(int(s))
  2227  }
  2228  
  2229  const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence.
  2230  
  2231  // size returns the size of the instruction in encoded bytes.
  2232  func (i *instruction) size() int64 {
  2233  	switch i.kind {
  2234  	case exitSequence:
  2235  		return exitSequenceSize // 5 instructions as in encodeExitSequence.
  2236  	case nop0:
  2237  		return 0
  2238  	case emitSourceOffsetInfo:
  2239  		return 0
  2240  	case loadFpuConst32:
  2241  		if i.u1 == 0 {
  2242  			return 4 // zero loading can be encoded as a single instruction.
  2243  		}
  2244  		return 4 + 4 + 4
  2245  	case loadFpuConst64:
  2246  		if i.u1 == 0 {
  2247  			return 4 // zero loading can be encoded as a single instruction.
  2248  		}
  2249  		return 4 + 4 + 8
  2250  	case loadFpuConst128:
  2251  		if i.u1 == 0 && i.u2 == 0 {
  2252  			return 4 // zero loading can be encoded as a single instruction.
  2253  		}
  2254  		return 4 + 4 + 16
  2255  	case brTableSequence:
  2256  		return 4*4 + int64(len(i.targets))*4
  2257  	default:
  2258  		return 4
  2259  	}
  2260  }
  2261  
  2262  // vecArrangement is the arrangement of data within a vector register.
  2263  type vecArrangement byte
  2264  
  2265  const (
  2266  	// vecArrangementNone is an arrangement indicating no data is stored.
  2267  	vecArrangementNone vecArrangement = iota
  2268  	// vecArrangement8B is an arrangement of 8 bytes (64-bit vector)
  2269  	vecArrangement8B
  2270  	// vecArrangement16B is an arrangement of 16 bytes (128-bit vector)
  2271  	vecArrangement16B
  2272  	// vecArrangement4H is an arrangement of 4 half precisions (64-bit vector)
  2273  	vecArrangement4H
  2274  	// vecArrangement8H is an arrangement of 8 half precisions (128-bit vector)
  2275  	vecArrangement8H
  2276  	// vecArrangement2S is an arrangement of 2 single precisions (64-bit vector)
  2277  	vecArrangement2S
  2278  	// vecArrangement4S is an arrangement of 4 single precisions (128-bit vector)
  2279  	vecArrangement4S
  2280  	// vecArrangement1D is an arrangement of 1 double precision (64-bit vector)
  2281  	vecArrangement1D
  2282  	// vecArrangement2D is an arrangement of 2 double precisions (128-bit vector)
  2283  	vecArrangement2D
  2284  
  2285  	// Assign each vector size specifier to a vector arrangement ID.
  2286  	// Instructions can only have an arrangement or a size specifier, but not both, so it
  2287  	// simplifies the internal representation of vector instructions by being able to
  2288  	// store either into the same field.
  2289  
  2290  	// vecArrangementB is a size specifier of byte
  2291  	vecArrangementB
  2292  	// vecArrangementH is a size specifier of word (16-bit)
  2293  	vecArrangementH
  2294  	// vecArrangementS is a size specifier of double word (32-bit)
  2295  	vecArrangementS
  2296  	// vecArrangementD is a size specifier of quad word (64-bit)
  2297  	vecArrangementD
  2298  	// vecArrangementQ is a size specifier of the entire vector (128-bit)
  2299  	vecArrangementQ
  2300  )
  2301  
  2302  // String implements fmt.Stringer
  2303  func (v vecArrangement) String() (ret string) {
  2304  	switch v {
  2305  	case vecArrangement8B:
  2306  		ret = "8B"
  2307  	case vecArrangement16B:
  2308  		ret = "16B"
  2309  	case vecArrangement4H:
  2310  		ret = "4H"
  2311  	case vecArrangement8H:
  2312  		ret = "8H"
  2313  	case vecArrangement2S:
  2314  		ret = "2S"
  2315  	case vecArrangement4S:
  2316  		ret = "4S"
  2317  	case vecArrangement1D:
  2318  		ret = "1D"
  2319  	case vecArrangement2D:
  2320  		ret = "2D"
  2321  	case vecArrangementB:
  2322  		ret = "B"
  2323  	case vecArrangementH:
  2324  		ret = "H"
  2325  	case vecArrangementS:
  2326  		ret = "S"
  2327  	case vecArrangementD:
  2328  		ret = "D"
  2329  	case vecArrangementQ:
  2330  		ret = "Q"
  2331  	case vecArrangementNone:
  2332  		ret = "none"
  2333  	default:
  2334  		panic(v)
  2335  	}
  2336  	return
  2337  }
  2338  
  2339  // vecIndex is the index of an element of a vector register
  2340  type vecIndex byte
  2341  
  2342  // vecIndexNone indicates no vector index specified.
  2343  const vecIndexNone = ^vecIndex(0)
  2344  
  2345  func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement {
  2346  	switch lane {
  2347  	case ssa.VecLaneI8x16:
  2348  		return vecArrangement16B
  2349  	case ssa.VecLaneI16x8:
  2350  		return vecArrangement8H
  2351  	case ssa.VecLaneI32x4:
  2352  		return vecArrangement4S
  2353  	case ssa.VecLaneI64x2:
  2354  		return vecArrangement2D
  2355  	case ssa.VecLaneF32x4:
  2356  		return vecArrangement4S
  2357  	case ssa.VecLaneF64x2:
  2358  		return vecArrangement2D
  2359  	default:
  2360  		panic(lane)
  2361  	}
  2362  }