github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/instr.go

github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/instr.go (about)

     1  package arm64
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"strings"
     7  
     8  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc"
     9  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa"
    10  )
    11  
    12  type (
    13  	// instruction represents either a real instruction in arm64, or the meta instructions
    14  	// that are convenient for code generation. For example, inline constants are also treated
    15  	// as instructions.
    16  	//
    17  	// Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation
    18  	// can be considered equivalent to the sequence of such instructions.
    19  	//
    20  	// Each field is interpreted depending on the kind.
    21  	//
    22  	// TODO: optimize the layout later once the impl settles.
    23  	instruction struct {
    24  		kind                instructionKind
    25  		prev, next          *instruction
    26  		u1, u2, u3          uint64
    27  		rd, rm, rn, ra      operand
    28  		amode               addressMode
    29  		abi                 *abiImpl
    30  		targets             []uint32
    31  		addedBeforeRegAlloc bool
    32  	}
    33  
    34  	// instructionKind represents the kind of instruction.
    35  	// This controls how the instruction struct is interpreted.
    36  	instructionKind int
    37  )
    38  
    39  func asNop0(i *instruction) {
    40  	i.kind = nop0
    41  }
    42  
    43  func setNext(i, next *instruction) {
    44  	i.next = next
    45  }
    46  
    47  func setPrev(i, prev *instruction) {
    48  	i.prev = prev
    49  }
    50  
    51  // IsCall implements regalloc.Instr IsCall.
    52  func (i *instruction) IsCall() bool {
    53  	return i.kind == call
    54  }
    55  
    56  // IsIndirectCall implements regalloc.Instr IsIndirectCall.
    57  func (i *instruction) IsIndirectCall() bool {
    58  	return i.kind == callInd
    59  }
    60  
    61  // IsReturn implements regalloc.Instr IsReturn.
    62  func (i *instruction) IsReturn() bool {
    63  	return i.kind == ret
    64  }
    65  
    66  type defKind byte
    67  
    68  const (
    69  	defKindNone defKind = iota + 1
    70  	defKindRD
    71  	defKindCall
    72  )
    73  
    74  var defKinds = [numInstructionKinds]defKind{
    75  	adr:                  defKindRD,
    76  	aluRRR:               defKindRD,
    77  	aluRRRR:              defKindRD,
    78  	aluRRImm12:           defKindRD,
    79  	aluRRBitmaskImm:      defKindRD,
    80  	aluRRRShift:          defKindRD,
    81  	aluRRImmShift:        defKindRD,
    82  	aluRRRExtend:         defKindRD,
    83  	bitRR:                defKindRD,
    84  	movZ:                 defKindRD,
    85  	movK:                 defKindRD,
    86  	movN:                 defKindRD,
    87  	mov32:                defKindRD,
    88  	mov64:                defKindRD,
    89  	fpuMov64:             defKindRD,
    90  	fpuMov128:            defKindRD,
    91  	fpuRR:                defKindRD,
    92  	fpuRRR:               defKindRD,
    93  	nop0:                 defKindNone,
    94  	call:                 defKindCall,
    95  	callInd:              defKindCall,
    96  	ret:                  defKindNone,
    97  	store8:               defKindNone,
    98  	store16:              defKindNone,
    99  	store32:              defKindNone,
   100  	store64:              defKindNone,
   101  	exitSequence:         defKindNone,
   102  	condBr:               defKindNone,
   103  	br:                   defKindNone,
   104  	brTableSequence:      defKindNone,
   105  	cSet:                 defKindRD,
   106  	extend:               defKindRD,
   107  	fpuCmp:               defKindNone,
   108  	uLoad8:               defKindRD,
   109  	uLoad16:              defKindRD,
   110  	uLoad32:              defKindRD,
   111  	sLoad8:               defKindRD,
   112  	sLoad16:              defKindRD,
   113  	sLoad32:              defKindRD,
   114  	uLoad64:              defKindRD,
   115  	fpuLoad32:            defKindRD,
   116  	fpuLoad64:            defKindRD,
   117  	fpuLoad128:           defKindRD,
   118  	vecLoad1R:            defKindRD,
   119  	loadFpuConst32:       defKindRD,
   120  	loadFpuConst64:       defKindRD,
   121  	loadFpuConst128:      defKindRD,
   122  	fpuStore32:           defKindNone,
   123  	fpuStore64:           defKindNone,
   124  	fpuStore128:          defKindNone,
   125  	udf:                  defKindNone,
   126  	cSel:                 defKindRD,
   127  	fpuCSel:              defKindRD,
   128  	movToVec:             defKindRD,
   129  	movFromVec:           defKindRD,
   130  	movFromVecSigned:     defKindRD,
   131  	vecDup:               defKindRD,
   132  	vecDupElement:        defKindRD,
   133  	vecExtract:           defKindRD,
   134  	vecMisc:              defKindRD,
   135  	vecMovElement:        defKindRD,
   136  	vecLanes:             defKindRD,
   137  	vecShiftImm:          defKindRD,
   138  	vecTbl:               defKindRD,
   139  	vecTbl2:              defKindRD,
   140  	vecPermute:           defKindRD,
   141  	vecRRR:               defKindRD,
   142  	vecRRRRewrite:        defKindNone,
   143  	fpuToInt:             defKindRD,
   144  	intToFpu:             defKindRD,
   145  	cCmpImm:              defKindNone,
   146  	movToFPSR:            defKindNone,
   147  	movFromFPSR:          defKindRD,
   148  	emitSourceOffsetInfo: defKindNone,
   149  }
   150  
   151  // Defs returns the list of regalloc.VReg that are defined by the instruction.
   152  // In order to reduce the number of allocations, the caller can pass the slice to be used.
   153  func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {
   154  	*regs = (*regs)[:0]
   155  	switch defKinds[i.kind] {
   156  	case defKindNone:
   157  	case defKindRD:
   158  		*regs = append(*regs, i.rd.nr())
   159  	case defKindCall:
   160  		*regs = append(*regs, i.abi.retRealRegs...)
   161  	default:
   162  		panic(fmt.Sprintf("defKind for %v not defined", i))
   163  	}
   164  	return *regs
   165  }
   166  
   167  // AssignDef implements regalloc.Instr AssignDef.
   168  func (i *instruction) AssignDef(reg regalloc.VReg) {
   169  	switch defKinds[i.kind] {
   170  	case defKindNone:
   171  	case defKindRD:
   172  		i.rd = i.rd.assignReg(reg)
   173  	case defKindCall:
   174  		panic("BUG: call instructions shouldn't be assigned")
   175  	default:
   176  		panic(fmt.Sprintf("defKind for %v not defined", i))
   177  	}
   178  }
   179  
   180  type useKind byte
   181  
   182  const (
   183  	useKindNone useKind = iota + 1
   184  	useKindRN
   185  	useKindRNRM
   186  	useKindRNRMRA
   187  	useKindRNRN1RM
   188  	useKindRet
   189  	useKindCall
   190  	useKindCallInd
   191  	useKindAMode
   192  	useKindRNAMode
   193  	useKindCond
   194  	useKindVecRRRRewrite
   195  )
   196  
   197  var useKinds = [numInstructionKinds]useKind{
   198  	udf:                  useKindNone,
   199  	aluRRR:               useKindRNRM,
   200  	aluRRRR:              useKindRNRMRA,
   201  	aluRRImm12:           useKindRN,
   202  	aluRRBitmaskImm:      useKindRN,
   203  	aluRRRShift:          useKindRNRM,
   204  	aluRRImmShift:        useKindRN,
   205  	aluRRRExtend:         useKindRNRM,
   206  	bitRR:                useKindRN,
   207  	movZ:                 useKindNone,
   208  	movK:                 useKindNone,
   209  	movN:                 useKindNone,
   210  	mov32:                useKindRN,
   211  	mov64:                useKindRN,
   212  	fpuMov64:             useKindRN,
   213  	fpuMov128:            useKindRN,
   214  	fpuRR:                useKindRN,
   215  	fpuRRR:               useKindRNRM,
   216  	nop0:                 useKindNone,
   217  	call:                 useKindCall,
   218  	callInd:              useKindCallInd,
   219  	ret:                  useKindRet,
   220  	store8:               useKindRNAMode,
   221  	store16:              useKindRNAMode,
   222  	store32:              useKindRNAMode,
   223  	store64:              useKindRNAMode,
   224  	exitSequence:         useKindRN,
   225  	condBr:               useKindCond,
   226  	br:                   useKindNone,
   227  	brTableSequence:      useKindRN,
   228  	cSet:                 useKindNone,
   229  	extend:               useKindRN,
   230  	fpuCmp:               useKindRNRM,
   231  	uLoad8:               useKindAMode,
   232  	uLoad16:              useKindAMode,
   233  	uLoad32:              useKindAMode,
   234  	sLoad8:               useKindAMode,
   235  	sLoad16:              useKindAMode,
   236  	sLoad32:              useKindAMode,
   237  	uLoad64:              useKindAMode,
   238  	fpuLoad32:            useKindAMode,
   239  	fpuLoad64:            useKindAMode,
   240  	fpuLoad128:           useKindAMode,
   241  	fpuStore32:           useKindRNAMode,
   242  	fpuStore64:           useKindRNAMode,
   243  	fpuStore128:          useKindRNAMode,
   244  	loadFpuConst32:       useKindNone,
   245  	loadFpuConst64:       useKindNone,
   246  	loadFpuConst128:      useKindNone,
   247  	vecLoad1R:            useKindRN,
   248  	cSel:                 useKindRNRM,
   249  	fpuCSel:              useKindRNRM,
   250  	movToVec:             useKindRN,
   251  	movFromVec:           useKindRN,
   252  	movFromVecSigned:     useKindRN,
   253  	vecDup:               useKindRN,
   254  	vecDupElement:        useKindRN,
   255  	vecExtract:           useKindRNRM,
   256  	cCmpImm:              useKindRN,
   257  	vecMisc:              useKindRN,
   258  	vecMovElement:        useKindRN,
   259  	vecLanes:             useKindRN,
   260  	vecShiftImm:          useKindRN,
   261  	vecTbl:               useKindRNRM,
   262  	vecTbl2:              useKindRNRN1RM,
   263  	vecRRR:               useKindRNRM,
   264  	vecRRRRewrite:        useKindVecRRRRewrite,
   265  	vecPermute:           useKindRNRM,
   266  	fpuToInt:             useKindRN,
   267  	intToFpu:             useKindRN,
   268  	movToFPSR:            useKindRN,
   269  	movFromFPSR:          useKindNone,
   270  	adr:                  useKindNone,
   271  	emitSourceOffsetInfo: useKindNone,
   272  }
   273  
   274  // Uses returns the list of regalloc.VReg that are used by the instruction.
   275  // In order to reduce the number of allocations, the caller can pass the slice to be used.
   276  func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
   277  	*regs = (*regs)[:0]
   278  	switch useKinds[i.kind] {
   279  	case useKindNone:
   280  	case useKindRN:
   281  		if rn := i.rn.reg(); rn.Valid() {
   282  			*regs = append(*regs, rn)
   283  		}
   284  	case useKindRNRM:
   285  		if rn := i.rn.reg(); rn.Valid() {
   286  			*regs = append(*regs, rn)
   287  		}
   288  		if rm := i.rm.reg(); rm.Valid() {
   289  			*regs = append(*regs, rm)
   290  		}
   291  	case useKindRNRMRA:
   292  		if rn := i.rn.reg(); rn.Valid() {
   293  			*regs = append(*regs, rn)
   294  		}
   295  		if rm := i.rm.reg(); rm.Valid() {
   296  			*regs = append(*regs, rm)
   297  		}
   298  		if ra := i.ra.reg(); ra.Valid() {
   299  			*regs = append(*regs, ra)
   300  		}
   301  	case useKindRNRN1RM:
   302  		if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() {
   303  			rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
   304  			*regs = append(*regs, rn, rn1)
   305  		}
   306  		if rm := i.rm.reg(); rm.Valid() {
   307  			*regs = append(*regs, rm)
   308  		}
   309  	case useKindRet:
   310  		*regs = append(*regs, i.abi.retRealRegs...)
   311  	case useKindAMode:
   312  		if amodeRN := i.amode.rn; amodeRN.Valid() {
   313  			*regs = append(*regs, amodeRN)
   314  		}
   315  		if amodeRM := i.amode.rm; amodeRM.Valid() {
   316  			*regs = append(*regs, amodeRM)
   317  		}
   318  	case useKindRNAMode:
   319  		*regs = append(*regs, i.rn.reg())
   320  		if amodeRN := i.amode.rn; amodeRN.Valid() {
   321  			*regs = append(*regs, amodeRN)
   322  		}
   323  		if amodeRM := i.amode.rm; amodeRM.Valid() {
   324  			*regs = append(*regs, amodeRM)
   325  		}
   326  	case useKindCond:
   327  		cnd := cond(i.u1)
   328  		if cnd.kind() != condKindCondFlagSet {
   329  			*regs = append(*regs, cnd.register())
   330  		}
   331  	case useKindCall:
   332  		*regs = append(*regs, i.abi.argRealRegs...)
   333  	case useKindCallInd:
   334  		*regs = append(*regs, i.rn.nr())
   335  		*regs = append(*regs, i.abi.argRealRegs...)
   336  	case useKindVecRRRRewrite:
   337  		*regs = append(*regs, i.rn.reg())
   338  		*regs = append(*regs, i.rm.reg())
   339  		*regs = append(*regs, i.rd.reg())
   340  	default:
   341  		panic(fmt.Sprintf("useKind for %v not defined", i))
   342  	}
   343  	return *regs
   344  }
   345  
   346  func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
   347  	switch useKinds[i.kind] {
   348  	case useKindNone:
   349  	case useKindRN:
   350  		if rn := i.rn.reg(); rn.Valid() {
   351  			i.rn = i.rn.assignReg(reg)
   352  		}
   353  	case useKindRNRM:
   354  		if index == 0 {
   355  			if rn := i.rn.reg(); rn.Valid() {
   356  				i.rn = i.rn.assignReg(reg)
   357  			}
   358  		} else {
   359  			if rm := i.rm.reg(); rm.Valid() {
   360  				i.rm = i.rm.assignReg(reg)
   361  			}
   362  		}
   363  	case useKindVecRRRRewrite:
   364  		if index == 0 {
   365  			if rn := i.rn.reg(); rn.Valid() {
   366  				i.rn = i.rn.assignReg(reg)
   367  			}
   368  		} else if index == 1 {
   369  			if rm := i.rm.reg(); rm.Valid() {
   370  				i.rm = i.rm.assignReg(reg)
   371  			}
   372  		} else {
   373  			if rd := i.rd.reg(); rd.Valid() {
   374  				i.rd = i.rd.assignReg(reg)
   375  			}
   376  		}
   377  	case useKindRNRN1RM:
   378  		if index == 0 {
   379  			if rn := i.rn.reg(); rn.Valid() {
   380  				i.rn = i.rn.assignReg(reg)
   381  			}
   382  			if rn1 := i.rn.reg() + 1; rn1.Valid() {
   383  				i.rm = i.rm.assignReg(reg + 1)
   384  			}
   385  		} else {
   386  			if rm := i.rm.reg(); rm.Valid() {
   387  				i.rm = i.rm.assignReg(reg)
   388  			}
   389  		}
   390  	case useKindRNRMRA:
   391  		if index == 0 {
   392  			if rn := i.rn.reg(); rn.Valid() {
   393  				i.rn = i.rn.assignReg(reg)
   394  			}
   395  		} else if index == 1 {
   396  			if rm := i.rm.reg(); rm.Valid() {
   397  				i.rm = i.rm.assignReg(reg)
   398  			}
   399  		} else {
   400  			if ra := i.ra.reg(); ra.Valid() {
   401  				i.ra = i.ra.assignReg(reg)
   402  			}
   403  		}
   404  	case useKindRet:
   405  		panic("BUG: ret instructions shouldn't be assigned")
   406  	case useKindAMode:
   407  		if index == 0 {
   408  			if amodeRN := i.amode.rn; amodeRN.Valid() {
   409  				i.amode.rn = reg
   410  			}
   411  		} else {
   412  			if amodeRM := i.amode.rm; amodeRM.Valid() {
   413  				i.amode.rm = reg
   414  			}
   415  		}
   416  	case useKindRNAMode:
   417  		if index == 0 {
   418  			i.rn = i.rn.assignReg(reg)
   419  		} else if index == 1 {
   420  			if amodeRN := i.amode.rn; amodeRN.Valid() {
   421  				i.amode.rn = reg
   422  			} else {
   423  				panic("BUG")
   424  			}
   425  		} else {
   426  			if amodeRM := i.amode.rm; amodeRM.Valid() {
   427  				i.amode.rm = reg
   428  			} else {
   429  				panic("BUG")
   430  			}
   431  		}
   432  	case useKindCond:
   433  		c := cond(i.u1)
   434  		switch c.kind() {
   435  		case condKindRegisterZero:
   436  			i.u1 = uint64(registerAsRegZeroCond(reg))
   437  		case condKindRegisterNotZero:
   438  			i.u1 = uint64(registerAsRegNotZeroCond(reg))
   439  		}
   440  	case useKindCall:
   441  		panic("BUG: call instructions shouldn't be assigned")
   442  	case useKindCallInd:
   443  		i.rn = i.rn.assignReg(reg)
   444  	default:
   445  		panic(fmt.Sprintf("useKind for %v not defined", i))
   446  	}
   447  }
   448  
   449  func (i *instruction) asCall(ref ssa.FuncRef, abi *abiImpl) {
   450  	i.kind = call
   451  	i.u1 = uint64(ref)
   452  	i.abi = abi
   453  }
   454  
   455  func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *abiImpl) {
   456  	i.kind = callInd
   457  	i.rn = operandNR(ptr)
   458  	i.abi = abi
   459  }
   460  
   461  func (i *instruction) callFuncRef() ssa.FuncRef {
   462  	return ssa.FuncRef(i.u1)
   463  }
   464  
   465  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
   466  func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
   467  	i.kind = movZ
   468  	i.rd = operandNR(dst)
   469  	i.u1 = imm
   470  	i.u2 = shift
   471  	if dst64bit {
   472  		i.u3 = 1
   473  	}
   474  }
   475  
   476  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
   477  func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
   478  	i.kind = movK
   479  	i.rd = operandNR(dst)
   480  	i.u1 = imm
   481  	i.u2 = shift
   482  	if dst64bit {
   483  		i.u3 = 1
   484  	}
   485  }
   486  
   487  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
   488  func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
   489  	i.kind = movN
   490  	i.rd = operandNR(dst)
   491  	i.u1 = imm
   492  	i.u2 = shift
   493  	if dst64bit {
   494  		i.u3 = 1
   495  	}
   496  }
   497  
   498  func (i *instruction) asNop0() *instruction {
   499  	i.kind = nop0
   500  	return i
   501  }
   502  
   503  func (i *instruction) asNop0WithLabel(l label) {
   504  	i.kind = nop0
   505  	i.u1 = uint64(l)
   506  }
   507  
   508  func (i *instruction) nop0Label() label {
   509  	return label(i.u1)
   510  }
   511  
   512  func (i *instruction) asRet(abi *abiImpl) {
   513  	i.kind = ret
   514  	i.abi = abi
   515  }
   516  
   517  func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) {
   518  	i.kind = storeP64
   519  	i.rn = operandNR(src1)
   520  	i.rm = operandNR(src2)
   521  	i.amode = amode
   522  }
   523  
   524  func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) {
   525  	i.kind = loadP64
   526  	i.rn = operandNR(src1)
   527  	i.rm = operandNR(src2)
   528  	i.amode = amode
   529  }
   530  
   531  func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
   532  	switch sizeInBits {
   533  	case 8:
   534  		i.kind = store8
   535  	case 16:
   536  		i.kind = store16
   537  	case 32:
   538  		if src.reg().RegType() == regalloc.RegTypeInt {
   539  			i.kind = store32
   540  		} else {
   541  			i.kind = fpuStore32
   542  		}
   543  	case 64:
   544  		if src.reg().RegType() == regalloc.RegTypeInt {
   545  			i.kind = store64
   546  		} else {
   547  			i.kind = fpuStore64
   548  		}
   549  	case 128:
   550  		i.kind = fpuStore128
   551  	}
   552  	i.rn = src
   553  	i.amode = amode
   554  }
   555  
   556  func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
   557  	switch sizeInBits {
   558  	case 8:
   559  		i.kind = sLoad8
   560  	case 16:
   561  		i.kind = sLoad16
   562  	case 32:
   563  		i.kind = sLoad32
   564  	default:
   565  		panic("BUG")
   566  	}
   567  	i.rd = dst
   568  	i.amode = amode
   569  }
   570  
   571  func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
   572  	switch sizeInBits {
   573  	case 8:
   574  		i.kind = uLoad8
   575  	case 16:
   576  		i.kind = uLoad16
   577  	case 32:
   578  		i.kind = uLoad32
   579  	case 64:
   580  		i.kind = uLoad64
   581  	}
   582  	i.rd = dst
   583  	i.amode = amode
   584  }
   585  
   586  func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) {
   587  	switch sizeInBits {
   588  	case 32:
   589  		i.kind = fpuLoad32
   590  	case 64:
   591  		i.kind = fpuLoad64
   592  	case 128:
   593  		i.kind = fpuLoad128
   594  	}
   595  	i.rd = dst
   596  	i.amode = amode
   597  }
   598  
   599  func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
   600  	// NOTE: currently only has support for no-offset loads, though it is suspicious that
   601  	// we would need to support offset load (that is only available for post-index).
   602  	i.kind = vecLoad1R
   603  	i.rd = rd
   604  	i.rn = rn
   605  	i.u1 = uint64(arr)
   606  }
   607  
   608  func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) {
   609  	i.kind = cSet
   610  	i.rd = operandNR(rd)
   611  	i.u1 = uint64(c)
   612  	if mask {
   613  		i.u2 = 1
   614  	}
   615  }
   616  
   617  func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
   618  	i.kind = cSel
   619  	i.rd = rd
   620  	i.rn = rn
   621  	i.rm = rm
   622  	i.u1 = uint64(c)
   623  	if _64bit {
   624  		i.u3 = 1
   625  	}
   626  }
   627  
   628  func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
   629  	i.kind = fpuCSel
   630  	i.rd = rd
   631  	i.rn = rn
   632  	i.rm = rm
   633  	i.u1 = uint64(c)
   634  	if _64bit {
   635  		i.u3 = 1
   636  	}
   637  }
   638  
   639  func (i *instruction) asBr(target label) {
   640  	if target == labelReturn {
   641  		panic("BUG: call site should special case for returnLabel")
   642  	}
   643  	i.kind = br
   644  	i.u1 = uint64(target)
   645  }
   646  
   647  func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) {
   648  	i.kind = brTableSequence
   649  	i.rn = operandNR(indexReg)
   650  	i.targets = targets
   651  }
   652  
   653  func (i *instruction) brTableSequenceOffsetsResolved() {
   654  	i.u3 = 1 // indicate that the offsets are resolved, for debugging.
   655  }
   656  
   657  func (i *instruction) brLabel() label {
   658  	return label(i.u1)
   659  }
   660  
   661  // brOffsetResolved is called when the target label is resolved.
   662  func (i *instruction) brOffsetResolve(offset int64) {
   663  	i.u2 = uint64(offset)
   664  	i.u3 = 1 // indicate that the offset is resolved, for debugging.
   665  }
   666  
   667  func (i *instruction) brOffset() int64 {
   668  	return int64(i.u2)
   669  }
   670  
   671  // asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag.
   672  func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
   673  	i.kind = condBr
   674  	i.u1 = c.asUint64()
   675  	i.u2 = uint64(target)
   676  	if is64bit {
   677  		i.u3 = 1
   678  	}
   679  }
   680  
   681  func (i *instruction) setCondBrTargets(target label) {
   682  	i.u2 = uint64(target)
   683  }
   684  
   685  func (i *instruction) condBrLabel() label {
   686  	return label(i.u2)
   687  }
   688  
   689  // condBrOffsetResolve is called when the target label is resolved.
   690  func (i *instruction) condBrOffsetResolve(offset int64) {
   691  	i.rd.data = uint64(offset)
   692  	i.rd.data2 = 1 // indicate that the offset is resolved, for debugging.
   693  }
   694  
   695  // condBrOffsetResolved returns true if condBrOffsetResolve is already called.
   696  func (i *instruction) condBrOffsetResolved() bool {
   697  	return i.rd.data2 == 1
   698  }
   699  
   700  func (i *instruction) condBrOffset() int64 {
   701  	return int64(i.rd.data)
   702  }
   703  
   704  func (i *instruction) condBrCond() cond {
   705  	return cond(i.u1)
   706  }
   707  
   708  func (i *instruction) condBr64bit() bool {
   709  	return i.u3 == 1
   710  }
   711  
   712  func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) {
   713  	i.kind = loadFpuConst32
   714  	i.u1 = raw
   715  	i.rd = operandNR(rd)
   716  }
   717  
   718  func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) {
   719  	i.kind = loadFpuConst64
   720  	i.u1 = raw
   721  	i.rd = operandNR(rd)
   722  }
   723  
   724  func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) {
   725  	i.kind = loadFpuConst128
   726  	i.u1 = lo
   727  	i.u2 = hi
   728  	i.rd = operandNR(rd)
   729  }
   730  
   731  func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) {
   732  	i.kind = fpuCmp
   733  	i.rn, i.rm = rn, rm
   734  	if is64bit {
   735  		i.u3 = 1
   736  	}
   737  }
   738  
   739  func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) {
   740  	i.kind = cCmpImm
   741  	i.rn = rn
   742  	i.rm.data = imm
   743  	i.u1 = uint64(c)
   744  	i.u2 = uint64(flag)
   745  	if is64bit {
   746  		i.u3 = 1
   747  	}
   748  }
   749  
   750  // asALU setups a basic ALU instruction.
   751  func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
   752  	switch rm.kind {
   753  	case operandKindNR:
   754  		i.kind = aluRRR
   755  	case operandKindSR:
   756  		i.kind = aluRRRShift
   757  	case operandKindER:
   758  		i.kind = aluRRRExtend
   759  	case operandKindImm12:
   760  		i.kind = aluRRImm12
   761  	default:
   762  		panic("BUG")
   763  	}
   764  	i.u1 = uint64(aluOp)
   765  	i.rd, i.rn, i.rm = rd, rn, rm
   766  	if dst64bit {
   767  		i.u3 = 1
   768  	}
   769  }
   770  
   771  // asALU setups a basic ALU instruction.
   772  func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) {
   773  	i.kind = aluRRRR
   774  	i.u1 = uint64(aluOp)
   775  	i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra
   776  	if dst64bit {
   777  		i.u3 = 1
   778  	}
   779  }
   780  
   781  // asALUShift setups a shift based ALU instruction.
   782  func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
   783  	switch rm.kind {
   784  	case operandKindNR:
   785  		i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands.
   786  	case operandKindShiftImm:
   787  		i.kind = aluRRImmShift
   788  	default:
   789  		panic("BUG")
   790  	}
   791  	i.u1 = uint64(aluOp)
   792  	i.rd, i.rn, i.rm = rd, rn, rm
   793  	if dst64bit {
   794  		i.u3 = 1
   795  	}
   796  }
   797  
   798  func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) {
   799  	i.kind = aluRRBitmaskImm
   800  	i.u1 = uint64(aluOp)
   801  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   802  	i.u2 = imm
   803  	if dst64bit {
   804  		i.u3 = 1
   805  	}
   806  }
   807  
   808  func (i *instruction) asMovToFPSR(rn regalloc.VReg) {
   809  	i.kind = movToFPSR
   810  	i.rn = operandNR(rn)
   811  }
   812  
   813  func (i *instruction) asMovFromFPSR(rd regalloc.VReg) {
   814  	i.kind = movFromFPSR
   815  	i.rd = operandNR(rd)
   816  }
   817  
   818  func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) {
   819  	i.kind = bitRR
   820  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   821  	i.u1 = uint64(bitOp)
   822  	if is64bit {
   823  		i.u2 = 1
   824  	}
   825  }
   826  
   827  func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) {
   828  	i.kind = fpuRRR
   829  	i.u1 = uint64(op)
   830  	i.rd, i.rn, i.rm = rd, rn, rm
   831  	if dst64bit {
   832  		i.u3 = 1
   833  	}
   834  }
   835  
   836  func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) {
   837  	i.kind = fpuRR
   838  	i.u1 = uint64(op)
   839  	i.rd, i.rn = rd, rn
   840  	if dst64bit {
   841  		i.u3 = 1
   842  	}
   843  }
   844  
   845  func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) {
   846  	i.kind = extend
   847  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   848  	i.u1 = uint64(fromBits)
   849  	i.u2 = uint64(toBits)
   850  	if signed {
   851  		i.u3 = 1
   852  	}
   853  }
   854  
   855  func (i *instruction) asMove32(rd, rn regalloc.VReg) {
   856  	i.kind = mov32
   857  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   858  }
   859  
   860  func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction {
   861  	i.kind = mov64
   862  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   863  	return i
   864  }
   865  
   866  func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {
   867  	i.kind = fpuMov64
   868  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   869  }
   870  
   871  func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction {
   872  	i.kind = fpuMov128
   873  	i.rn, i.rd = operandNR(rn), operandNR(rd)
   874  	return i
   875  }
   876  
   877  func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) {
   878  	i.kind = movToVec
   879  	i.rd = rd
   880  	i.rn = rn
   881  	i.u1, i.u2 = uint64(arr), uint64(index)
   882  }
   883  
   884  func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) {
   885  	if signed {
   886  		i.kind = movFromVecSigned
   887  	} else {
   888  		i.kind = movFromVec
   889  	}
   890  	i.rd = rd
   891  	i.rn = rn
   892  	i.u1, i.u2 = uint64(arr), uint64(index)
   893  }
   894  
   895  func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) {
   896  	i.kind = vecDup
   897  	i.u1 = uint64(arr)
   898  	i.rn, i.rd = rn, rd
   899  }
   900  
   901  func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) {
   902  	i.kind = vecDupElement
   903  	i.u1 = uint64(arr)
   904  	i.rn, i.rd = rn, rd
   905  	i.u2 = uint64(index)
   906  }
   907  
   908  func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) {
   909  	i.kind = vecExtract
   910  	i.u1 = uint64(arr)
   911  	i.rn, i.rm, i.rd = rn, rm, rd
   912  	i.u2 = uint64(index)
   913  }
   914  
   915  func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
   916  	i.kind = vecMovElement
   917  	i.u1 = uint64(arr)
   918  	i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex)
   919  	i.rn, i.rd = rn, rd
   920  }
   921  
   922  func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) {
   923  	i.kind = vecMisc
   924  	i.u1 = uint64(op)
   925  	i.rn, i.rd = rn, rd
   926  	i.u2 = uint64(arr)
   927  }
   928  
   929  func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) {
   930  	i.kind = vecLanes
   931  	i.u1 = uint64(op)
   932  	i.rn, i.rd = rn, rd
   933  	i.u2 = uint64(arr)
   934  }
   935  
   936  func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   937  	i.kind = vecShiftImm
   938  	i.u1 = uint64(op)
   939  	i.rn, i.rm, i.rd = rn, rm, rd
   940  	i.u2 = uint64(arr)
   941  }
   942  
   943  func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) {
   944  	switch nregs {
   945  	case 0, 1:
   946  		i.kind = vecTbl
   947  	case 2:
   948  		i.kind = vecTbl2
   949  		if !rn.reg().IsRealReg() {
   950  			panic("rn is not a RealReg")
   951  		}
   952  		if rn.realReg() == v31 {
   953  			panic("rn cannot be v31")
   954  		}
   955  	default:
   956  		panic(fmt.Sprintf("unsupported number of registers %d", nregs))
   957  	}
   958  	i.rn, i.rm, i.rd = rn, rm, rd
   959  	i.u2 = uint64(arr)
   960  }
   961  
   962  func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   963  	i.kind = vecPermute
   964  	i.u1 = uint64(op)
   965  	i.rn, i.rm, i.rd = rn, rm, rd
   966  	i.u2 = uint64(arr)
   967  }
   968  
   969  func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   970  	i.kind = vecRRR
   971  	i.u1 = uint64(op)
   972  	i.rn, i.rd, i.rm = rn, rd, rm
   973  	i.u2 = uint64(arr)
   974  }
   975  
   976  // asVecRRRRewrite encodes a vector instruction that rewrites the destination register.
   977  // IMPORTANT: the destination register must be already defined before this instruction.
   978  func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) {
   979  	i.kind = vecRRRRewrite
   980  	i.u1 = uint64(op)
   981  	i.rn, i.rd, i.rm = rn, rd, rm
   982  	i.u2 = uint64(arr)
   983  }
   984  
   985  func (i *instruction) IsCopy() bool {
   986  	op := i.kind
   987  	// We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits,
   988  	// and it is only used in the translation of IReduce, not the actual copy indeed.
   989  	return op == mov64 || op == fpuMov64 || op == fpuMov128
   990  }
   991  
   992  // String implements fmt.Stringer.
   993  func (i *instruction) String() (str string) {
   994  	is64SizeBitToSize := func(u3 uint64) byte {
   995  		if u3 == 0 {
   996  			return 32
   997  		}
   998  		return 64
   999  	}
  1000  
  1001  	switch i.kind {
  1002  	case nop0:
  1003  		if i.u1 != 0 {
  1004  			l := label(i.u1)
  1005  			str = fmt.Sprintf("%s:", l)
  1006  		} else {
  1007  			str = "nop0"
  1008  		}
  1009  	case aluRRR:
  1010  		size := is64SizeBitToSize(i.u3)
  1011  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
  1012  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size),
  1013  			i.rm.format(size))
  1014  	case aluRRRR:
  1015  		size := is64SizeBitToSize(i.u3)
  1016  		str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(),
  1017  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size))
  1018  	case aluRRImm12:
  1019  		size := is64SizeBitToSize(i.u3)
  1020  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
  1021  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
  1022  	case aluRRBitmaskImm:
  1023  		size := is64SizeBitToSize(i.u3)
  1024  		rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size)
  1025  		if size == 32 {
  1026  			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2))
  1027  		} else {
  1028  			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2)
  1029  		}
  1030  	case aluRRImmShift:
  1031  		size := is64SizeBitToSize(i.u3)
  1032  		str = fmt.Sprintf("%s %s, %s, %#x",
  1033  			aluOp(i.u1).String(),
  1034  			formatVRegSized(i.rd.nr(), size),
  1035  			formatVRegSized(i.rn.nr(), size),
  1036  			i.rm.shiftImm(),
  1037  		)
  1038  	case aluRRRShift:
  1039  		size := is64SizeBitToSize(i.u3)
  1040  		str = fmt.Sprintf("%s %s, %s, %s",
  1041  			aluOp(i.u1).String(),
  1042  			formatVRegSized(i.rd.nr(), size),
  1043  			formatVRegSized(i.rn.nr(), size),
  1044  			i.rm.format(size),
  1045  		)
  1046  	case aluRRRExtend:
  1047  		size := is64SizeBitToSize(i.u3)
  1048  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
  1049  			formatVRegSized(i.rd.nr(), size),
  1050  			formatVRegSized(i.rn.nr(), size),
  1051  			// Regardless of the source size, the register is formatted in 32-bit.
  1052  			i.rm.format(32),
  1053  		)
  1054  	case bitRR:
  1055  		size := is64SizeBitToSize(i.u2)
  1056  		str = fmt.Sprintf("%s %s, %s",
  1057  			bitOp(i.u1),
  1058  			formatVRegSized(i.rd.nr(), size),
  1059  			formatVRegSized(i.rn.nr(), size),
  1060  		)
  1061  	case uLoad8:
  1062  		str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1063  	case sLoad8:
  1064  		str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1065  	case uLoad16:
  1066  		str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1067  	case sLoad16:
  1068  		str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1069  	case uLoad32:
  1070  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1071  	case sLoad32:
  1072  		str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1073  	case uLoad64:
  1074  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
  1075  	case store8:
  1076  		str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8))
  1077  	case store16:
  1078  		str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16))
  1079  	case store32:
  1080  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32))
  1081  	case store64:
  1082  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
  1083  	case storeP64:
  1084  		str = fmt.Sprintf("stp %s, %s, %s",
  1085  			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
  1086  	case loadP64:
  1087  		str = fmt.Sprintf("ldp %s, %s, %s",
  1088  			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
  1089  	case mov64:
  1090  		str = fmt.Sprintf("mov %s, %s",
  1091  			formatVRegSized(i.rd.nr(), 64),
  1092  			formatVRegSized(i.rn.nr(), 64))
  1093  	case mov32:
  1094  		str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32))
  1095  	case movZ:
  1096  		size := is64SizeBitToSize(i.u3)
  1097  		str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
  1098  	case movN:
  1099  		size := is64SizeBitToSize(i.u3)
  1100  		str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
  1101  	case movK:
  1102  		size := is64SizeBitToSize(i.u3)
  1103  		str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
  1104  	case extend:
  1105  		fromBits, toBits := byte(i.u1), byte(i.u2)
  1106  
  1107  		var signedStr string
  1108  		if i.u3 == 1 {
  1109  			signedStr = "s"
  1110  		} else {
  1111  			signedStr = "u"
  1112  		}
  1113  		var fromStr string
  1114  		switch fromBits {
  1115  		case 8:
  1116  			fromStr = "b"
  1117  		case 16:
  1118  			fromStr = "h"
  1119  		case 32:
  1120  			fromStr = "w"
  1121  		}
  1122  		str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32))
  1123  	case cSel:
  1124  		size := is64SizeBitToSize(i.u3)
  1125  		str = fmt.Sprintf("csel %s, %s, %s, %s",
  1126  			formatVRegSized(i.rd.nr(), size),
  1127  			formatVRegSized(i.rn.nr(), size),
  1128  			formatVRegSized(i.rm.nr(), size),
  1129  			condFlag(i.u1),
  1130  		)
  1131  	case cSet:
  1132  		if i.u2 != 0 {
  1133  			str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
  1134  		} else {
  1135  			str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
  1136  		}
  1137  	case cCmpImm:
  1138  		size := is64SizeBitToSize(i.u3)
  1139  		str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s",
  1140  			formatVRegSized(i.rn.nr(), size), i.rm.data,
  1141  			i.u2&0b1111,
  1142  			condFlag(i.u1))
  1143  	case fpuMov64:
  1144  		str = fmt.Sprintf("mov %s, %s",
  1145  			formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone),
  1146  			formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone))
  1147  	case fpuMov128:
  1148  		str = fmt.Sprintf("mov %s, %s",
  1149  			formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone),
  1150  			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone))
  1151  	case fpuMovFromVec:
  1152  		panic("TODO")
  1153  	case fpuRR:
  1154  		dstSz := is64SizeBitToSize(i.u3)
  1155  		srcSz := dstSz
  1156  		op := fpuUniOp(i.u1)
  1157  		switch op {
  1158  		case fpuUniOpCvt32To64:
  1159  			srcSz = 32
  1160  		case fpuUniOpCvt64To32:
  1161  			srcSz = 64
  1162  		}
  1163  		str = fmt.Sprintf("%s %s, %s", op.String(),
  1164  			formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz))
  1165  	case fpuRRR:
  1166  		size := is64SizeBitToSize(i.u3)
  1167  		str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(),
  1168  			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
  1169  	case fpuRRI:
  1170  		panic("TODO")
  1171  	case fpuRRRR:
  1172  		panic("TODO")
  1173  	case fpuCmp:
  1174  		size := is64SizeBitToSize(i.u3)
  1175  		str = fmt.Sprintf("fcmp %s, %s",
  1176  			formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
  1177  	case fpuLoad32:
  1178  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
  1179  	case fpuStore32:
  1180  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64))
  1181  	case fpuLoad64:
  1182  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
  1183  	case fpuStore64:
  1184  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
  1185  	case fpuLoad128:
  1186  		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64))
  1187  	case fpuStore128:
  1188  		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64))
  1189  	case loadFpuConst32:
  1190  		str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1)))
  1191  	case loadFpuConst64:
  1192  		str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1))
  1193  	case loadFpuConst128:
  1194  		str = fmt.Sprintf("ldr %s, #8; b 32; data.v128  %016x %016x",
  1195  			formatVRegSized(i.rd.nr(), 128), i.u1, i.u2)
  1196  	case fpuToInt:
  1197  		var op, src, dst string
  1198  		if signed := i.u1 == 1; signed {
  1199  			op = "fcvtzs"
  1200  		} else {
  1201  			op = "fcvtzu"
  1202  		}
  1203  		if src64 := i.u2 == 1; src64 {
  1204  			src = formatVRegWidthVec(i.rn.nr(), vecArrangementD)
  1205  		} else {
  1206  			src = formatVRegWidthVec(i.rn.nr(), vecArrangementS)
  1207  		}
  1208  		if dst64 := i.u3 == 1; dst64 {
  1209  			dst = formatVRegSized(i.rd.nr(), 64)
  1210  		} else {
  1211  			dst = formatVRegSized(i.rd.nr(), 32)
  1212  		}
  1213  		str = fmt.Sprintf("%s %s, %s", op, dst, src)
  1214  
  1215  	case intToFpu:
  1216  		var op, src, dst string
  1217  		if signed := i.u1 == 1; signed {
  1218  			op = "scvtf"
  1219  		} else {
  1220  			op = "ucvtf"
  1221  		}
  1222  		if src64 := i.u2 == 1; src64 {
  1223  			src = formatVRegSized(i.rn.nr(), 64)
  1224  		} else {
  1225  			src = formatVRegSized(i.rn.nr(), 32)
  1226  		}
  1227  		if dst64 := i.u3 == 1; dst64 {
  1228  			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD)
  1229  		} else {
  1230  			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS)
  1231  		}
  1232  		str = fmt.Sprintf("%s %s, %s", op, dst, src)
  1233  	case fpuCSel:
  1234  		size := is64SizeBitToSize(i.u3)
  1235  		str = fmt.Sprintf("fcsel %s, %s, %s, %s",
  1236  			formatVRegSized(i.rd.nr(), size),
  1237  			formatVRegSized(i.rn.nr(), size),
  1238  			formatVRegSized(i.rm.nr(), size),
  1239  			condFlag(i.u1),
  1240  		)
  1241  	case movToVec:
  1242  		var size byte
  1243  		arr := vecArrangement(i.u1)
  1244  		switch arr {
  1245  		case vecArrangementB, vecArrangementH, vecArrangementS:
  1246  			size = 32
  1247  		case vecArrangementD:
  1248  			size = 64
  1249  		default:
  1250  			panic("unsupported arrangement " + arr.String())
  1251  		}
  1252  		str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
  1253  	case movFromVec, movFromVecSigned:
  1254  		var size byte
  1255  		var opcode string
  1256  		arr := vecArrangement(i.u1)
  1257  		signed := i.kind == movFromVecSigned
  1258  		switch arr {
  1259  		case vecArrangementB, vecArrangementH, vecArrangementS:
  1260  			size = 32
  1261  			if signed {
  1262  				opcode = "smov"
  1263  			} else {
  1264  				opcode = "umov"
  1265  			}
  1266  		case vecArrangementD:
  1267  			size = 64
  1268  			if signed {
  1269  				opcode = "smov"
  1270  			} else {
  1271  				opcode = "mov"
  1272  			}
  1273  		default:
  1274  			panic("unsupported arrangement " + arr.String())
  1275  		}
  1276  		str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
  1277  	case vecDup:
  1278  		str = fmt.Sprintf("dup %s, %s",
  1279  			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
  1280  			formatVRegSized(i.rn.nr(), 64),
  1281  		)
  1282  	case vecDupElement:
  1283  		arr := vecArrangement(i.u1)
  1284  		str = fmt.Sprintf("dup %s, %s",
  1285  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1286  			formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)),
  1287  		)
  1288  	case vecDupFromFpu:
  1289  		panic("TODO")
  1290  	case vecExtract:
  1291  		str = fmt.Sprintf("ext %s, %s, %s, #%d",
  1292  			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
  1293  			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone),
  1294  			formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone),
  1295  			uint32(i.u2),
  1296  		)
  1297  	case vecExtend:
  1298  		panic("TODO")
  1299  	case vecMovElement:
  1300  		str = fmt.Sprintf("mov %s, %s",
  1301  			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)),
  1302  			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)),
  1303  		)
  1304  	case vecMiscNarrow:
  1305  		panic("TODO")
  1306  	case vecRRR, vecRRRRewrite:
  1307  		str = fmt.Sprintf("%s %s, %s, %s",
  1308  			vecOp(i.u1),
  1309  			formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
  1310  			formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone),
  1311  			formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone),
  1312  		)
  1313  	case vecMisc:
  1314  		vop := vecOp(i.u1)
  1315  		if vop == vecOpCmeq0 {
  1316  			str = fmt.Sprintf("cmeq %s, %s, #0",
  1317  				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
  1318  				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
  1319  		} else {
  1320  			str = fmt.Sprintf("%s %s, %s",
  1321  				vop,
  1322  				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
  1323  				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
  1324  		}
  1325  	case vecLanes:
  1326  		arr := vecArrangement(i.u2)
  1327  		var destArr vecArrangement
  1328  		switch arr {
  1329  		case vecArrangement8B, vecArrangement16B:
  1330  			destArr = vecArrangementH
  1331  		case vecArrangement4H, vecArrangement8H:
  1332  			destArr = vecArrangementS
  1333  		case vecArrangement4S:
  1334  			destArr = vecArrangementD
  1335  		default:
  1336  			panic("invalid arrangement " + arr.String())
  1337  		}
  1338  		str = fmt.Sprintf("%s %s, %s",
  1339  			vecOp(i.u1),
  1340  			formatVRegWidthVec(i.rd.nr(), destArr),
  1341  			formatVRegVec(i.rn.nr(), arr, vecIndexNone))
  1342  	case vecShiftImm:
  1343  		arr := vecArrangement(i.u2)
  1344  		str = fmt.Sprintf("%s %s, %s, #%d",
  1345  			vecOp(i.u1),
  1346  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1347  			formatVRegVec(i.rn.nr(), arr, vecIndexNone),
  1348  			i.rm.shiftImm())
  1349  	case vecTbl:
  1350  		arr := vecArrangement(i.u2)
  1351  		str = fmt.Sprintf("tbl %s, { %s }, %s",
  1352  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1353  			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone),
  1354  			formatVRegVec(i.rm.nr(), arr, vecIndexNone))
  1355  	case vecTbl2:
  1356  		arr := vecArrangement(i.u2)
  1357  		rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr()
  1358  		rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
  1359  		str = fmt.Sprintf("tbl %s, { %s, %s }, %s",
  1360  			formatVRegVec(rd, arr, vecIndexNone),
  1361  			formatVRegVec(rn, vecArrangement16B, vecIndexNone),
  1362  			formatVRegVec(rn1, vecArrangement16B, vecIndexNone),
  1363  			formatVRegVec(rm, arr, vecIndexNone))
  1364  	case vecPermute:
  1365  		arr := vecArrangement(i.u2)
  1366  		str = fmt.Sprintf("%s %s, %s, %s",
  1367  			vecOp(i.u1),
  1368  			formatVRegVec(i.rd.nr(), arr, vecIndexNone),
  1369  			formatVRegVec(i.rn.nr(), arr, vecIndexNone),
  1370  			formatVRegVec(i.rm.nr(), arr, vecIndexNone))
  1371  	case movToFPSR:
  1372  		str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64))
  1373  	case movFromFPSR:
  1374  		str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64))
  1375  	case call:
  1376  		if i.u2 > 0 {
  1377  			str = fmt.Sprintf("bl #%#x", i.u2)
  1378  		} else {
  1379  			str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1))
  1380  		}
  1381  	case callInd:
  1382  		str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64))
  1383  	case ret:
  1384  		str = "ret"
  1385  	case br:
  1386  		target := label(i.u1)
  1387  		if i.u3 != 0 {
  1388  			str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String())
  1389  		} else {
  1390  			str = fmt.Sprintf("b %s", target.String())
  1391  		}
  1392  	case condBr:
  1393  		size := is64SizeBitToSize(i.u3)
  1394  		c := cond(i.u1)
  1395  		target := label(i.u2)
  1396  		switch c.kind() {
  1397  		case condKindRegisterZero:
  1398  			if !i.condBrOffsetResolved() {
  1399  				str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String())
  1400  			} else {
  1401  				str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String())
  1402  			}
  1403  		case condKindRegisterNotZero:
  1404  			if offset := i.condBrOffset(); offset != 0 {
  1405  				str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String())
  1406  			} else {
  1407  				str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String())
  1408  			}
  1409  		case condKindCondFlagSet:
  1410  			if offset := i.condBrOffset(); offset != 0 {
  1411  				if target == labelInvalid {
  1412  					str = fmt.Sprintf("b.%s #%#x", c.flag(), offset)
  1413  				} else {
  1414  					str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String())
  1415  				}
  1416  			} else {
  1417  				str = fmt.Sprintf("b.%s %s", c.flag(), target.String())
  1418  			}
  1419  		}
  1420  	case adr:
  1421  		str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1))
  1422  	case brTableSequence:
  1423  		if i.u3 == 0 { // The offsets haven't been resolved yet.
  1424  			labels := make([]string, len(i.targets))
  1425  			for index, l := range i.targets {
  1426  				labels[index] = label(l).String()
  1427  			}
  1428  			str = fmt.Sprintf("br_table_sequence %s, [%s]",
  1429  				formatVRegSized(i.rn.nr(), 64),
  1430  				strings.Join(labels, ", "),
  1431  			)
  1432  		} else {
  1433  			// See encodeBrTableSequence for the encoding.
  1434  			offsets := make([]string, len(i.targets))
  1435  			for index, offset := range i.targets {
  1436  				offsets[index] = fmt.Sprintf("%#x", int32(offset))
  1437  			}
  1438  			str = fmt.Sprintf(
  1439  				`adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`,
  1440  				formatVRegSized(i.rn.nr(), 64),
  1441  				formatVRegSized(tmpRegVReg, 64),
  1442  				offsets,
  1443  			)
  1444  		}
  1445  	case exitSequence:
  1446  		str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64))
  1447  	case udf:
  1448  		str = "udf"
  1449  	case emitSourceOffsetInfo:
  1450  		str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1))
  1451  	case vecLoad1R:
  1452  		str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
  1453  	default:
  1454  		panic(i.kind)
  1455  	}
  1456  	return
  1457  }
  1458  
  1459  func (i *instruction) asAdr(rd regalloc.VReg, offset int64) {
  1460  	i.kind = adr
  1461  	i.rd = operandNR(rd)
  1462  	i.u1 = uint64(offset)
  1463  }
  1464  
  1465  // TODO: delete unnecessary things.
  1466  const (
  1467  	// nop0 represents a no-op of zero size.
  1468  	nop0 instructionKind = iota + 1
  1469  	// aluRRR represents an ALU operation with two register sources and a register destination.
  1470  	aluRRR
  1471  	// aluRRRR represents an ALU operation with three register sources and a register destination.
  1472  	aluRRRR
  1473  	// aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination.
  1474  	aluRRImm12
  1475  	// aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination.
  1476  	aluRRBitmaskImm
  1477  	// aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination.
  1478  	aluRRImmShift
  1479  	// aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination.
  1480  	aluRRRShift
  1481  	// aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination.
  1482  	aluRRRExtend
  1483  	// bitRR represents a bit op instruction with a single register source.
  1484  	bitRR
  1485  	// uLoad8 represents an unsigned 8-bit load.
  1486  	uLoad8
  1487  	// sLoad8 represents a signed 8-bit load into 64-bit register.
  1488  	sLoad8
  1489  	// uLoad16 represents an unsigned 16-bit load into 64-bit register.
  1490  	uLoad16
  1491  	// sLoad16 represents a signed 16-bit load into 64-bit register.
  1492  	sLoad16
  1493  	// uLoad32 represents an unsigned 32-bit load into 64-bit register.
  1494  	uLoad32
  1495  	// sLoad32 represents a signed 32-bit load into 64-bit register.
  1496  	sLoad32
  1497  	// uLoad64 represents a 64-bit load.
  1498  	uLoad64
  1499  	// store8 represents an 8-bit store.
  1500  	store8
  1501  	// store16 represents a 16-bit store.
  1502  	store16
  1503  	// store32 represents a 32-bit store.
  1504  	store32
  1505  	// store64 represents a 64-bit store.
  1506  	store64
  1507  	// storeP64 represents a store of a pair of registers.
  1508  	storeP64
  1509  	// loadP64 represents a load of a pair of registers.
  1510  	loadP64
  1511  	// mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling.
  1512  	mov64
  1513  	// mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination.
  1514  	mov32
  1515  	// movZ represents a MOVZ with a 16-bit immediate.
  1516  	movZ
  1517  	// movN represents a MOVN with a 16-bit immediate.
  1518  	movN
  1519  	// movK represents a MOVK with a 16-bit immediate.
  1520  	movK
  1521  	// extend represents a sign- or zero-extend operation.
  1522  	extend
  1523  	// cSel represents a conditional-select operation.
  1524  	cSel
  1525  	// cSet represents a conditional-set operation.
  1526  	cSet
  1527  	// cCmpImm represents a conditional comparison with an immediate.
  1528  	cCmpImm
  1529  	// fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster.
  1530  	fpuMov64
  1531  	// fpuMov128 represents a vector register move.
  1532  	fpuMov128
  1533  	// fpuMovFromVec represents a move to scalar from a vector element.
  1534  	fpuMovFromVec
  1535  	// fpuRR represents a 1-op FPU instruction.
  1536  	fpuRR
  1537  	// fpuRRR represents a 2-op FPU instruction.
  1538  	fpuRRR
  1539  	// fpuRRI represents a 2-op FPU instruction with immediate value.
  1540  	fpuRRI
  1541  	// fpuRRRR represents a 3-op FPU instruction.
  1542  	fpuRRRR
  1543  	// fpuCmp represents a FPU comparison, either 32 or 64 bit.
  1544  	fpuCmp
  1545  	// fpuLoad32 represents a floating-point load, single-precision (32 bit).
  1546  	fpuLoad32
  1547  	// fpuStore32 represents a floating-point store, single-precision (32 bit).
  1548  	fpuStore32
  1549  	// fpuLoad64 represents a floating-point load, double-precision (64 bit).
  1550  	fpuLoad64
  1551  	// fpuStore64 represents a floating-point store, double-precision (64 bit).
  1552  	fpuStore64
  1553  	// fpuLoad128 represents a floating-point/vector load, 128 bit.
  1554  	fpuLoad128
  1555  	// fpuStore128 represents a floating-point/vector store, 128 bit.
  1556  	fpuStore128
  1557  	// loadFpuConst32 represents a load of a 32-bit floating-point constant.
  1558  	loadFpuConst32
  1559  	// loadFpuConst64 represents a load of a 64-bit floating-point constant.
  1560  	loadFpuConst64
  1561  	// loadFpuConst128 represents a load of a 128-bit floating-point constant.
  1562  	loadFpuConst128
  1563  	// vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector.
  1564  	vecLoad1R
  1565  	// fpuToInt represents a conversion from FP to integer.
  1566  	fpuToInt
  1567  	// intToFpu represents a conversion from integer to FP.
  1568  	intToFpu
  1569  	// fpuCSel represents a 32/64-bit FP conditional select.
  1570  	fpuCSel
  1571  	// movToVec represents a move to a vector element from a GPR.
  1572  	movToVec
  1573  	// movFromVec represents an unsigned move from a vector element to a GPR.
  1574  	movFromVec
  1575  	// movFromVecSigned represents a signed move from a vector element to a GPR.
  1576  	movFromVecSigned
  1577  	// vecDup represents a duplication of general-purpose register to vector.
  1578  	vecDup
  1579  	// vecDupElement represents a duplication of a vector element to vector or scalar.
  1580  	vecDupElement
  1581  	// vecDupFromFpu represents a duplication of scalar to vector.
  1582  	vecDupFromFpu
  1583  	// vecExtract represents a vector extraction operation.
  1584  	vecExtract
  1585  	// vecExtend represents a vector extension operation.
  1586  	vecExtend
  1587  	// vecMovElement represents a move vector element to another vector element operation.
  1588  	vecMovElement
  1589  	// vecMiscNarrow represents a vector narrowing operation.
  1590  	vecMiscNarrow
  1591  	// vecRRR represents a vector ALU operation.
  1592  	vecRRR
  1593  	// vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register.
  1594  	// For example, BSL instruction rewrites the destination register, and the existing value influences the result.
  1595  	// Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive
  1596  	// the instruction while this instruction doesn't have "def" in the context of register allocation.
  1597  	vecRRRRewrite
  1598  	// vecMisc represents a vector two register miscellaneous instruction.
  1599  	vecMisc
  1600  	// vecLanes represents a vector instruction across lanes.
  1601  	vecLanes
  1602  	// vecShiftImm represents a SIMD scalar shift by immediate instruction.
  1603  	vecShiftImm
  1604  	// vecTbl represents a table vector lookup - single register table.
  1605  	vecTbl
  1606  	// vecTbl2 represents a table vector lookup - two register table.
  1607  	vecTbl2
  1608  	// vecPermute represents a vector permute instruction.
  1609  	vecPermute
  1610  	// movToNZCV represents a move to the FPSR.
  1611  	movToFPSR
  1612  	// movFromNZCV represents a move from the FPSR.
  1613  	movFromFPSR
  1614  	// call represents a machine call instruction.
  1615  	call
  1616  	// callInd represents a machine indirect-call instruction.
  1617  	callInd
  1618  	// ret represents a machine return instruction.
  1619  	ret
  1620  	// br represents an unconditional branch.
  1621  	br
  1622  	// condBr represents a conditional branch.
  1623  	condBr
  1624  	// adr represents a compute the address (using a PC-relative offset) of a memory location.
  1625  	adr
  1626  	// brTableSequence represents a jump-table sequence.
  1627  	brTableSequence
  1628  	// exitSequence consists of multiple instructions, and exits the execution immediately.
  1629  	// See encodeExitSequence.
  1630  	exitSequence
  1631  	// UDF is the undefined instruction. For debugging only.
  1632  	udf
  1633  
  1634  	// emitSourceOffsetInfo is a dummy instruction to emit source offset info.
  1635  	// The existence of this instruction does not affect the execution.
  1636  	emitSourceOffsetInfo
  1637  
  1638  	// ------------------- do not define below this line -------------------
  1639  	numInstructionKinds
  1640  )
  1641  
  1642  func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction {
  1643  	i.kind = emitSourceOffsetInfo
  1644  	i.u1 = uint64(l)
  1645  	return i
  1646  }
  1647  
  1648  func (i *instruction) sourceOffsetInfo() ssa.SourceOffset {
  1649  	return ssa.SourceOffset(i.u1)
  1650  }
  1651  
  1652  func (i *instruction) asUDF() *instruction {
  1653  	i.kind = udf
  1654  	return i
  1655  }
  1656  
  1657  func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) {
  1658  	i.kind = fpuToInt
  1659  	i.rn = rn
  1660  	i.rd = rd
  1661  	if rdSigned {
  1662  		i.u1 = 1
  1663  	}
  1664  	if src64bit {
  1665  		i.u2 = 1
  1666  	}
  1667  	if dst64bit {
  1668  		i.u3 = 1
  1669  	}
  1670  }
  1671  
  1672  func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) {
  1673  	i.kind = intToFpu
  1674  	i.rn = rn
  1675  	i.rd = rd
  1676  	if rnSigned {
  1677  		i.u1 = 1
  1678  	}
  1679  	if src64bit {
  1680  		i.u2 = 1
  1681  	}
  1682  	if dst64bit {
  1683  		i.u3 = 1
  1684  	}
  1685  }
  1686  
  1687  func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction {
  1688  	i.kind = exitSequence
  1689  	i.rn = operandNR(ctx)
  1690  	return i
  1691  }
  1692  
  1693  // aluOp determines the type of ALU operation. Instructions whose kind is one of
  1694  // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend
  1695  // would use this type.
  1696  type aluOp int
  1697  
  1698  func (a aluOp) String() string {
  1699  	switch a {
  1700  	case aluOpAdd:
  1701  		return "add"
  1702  	case aluOpSub:
  1703  		return "sub"
  1704  	case aluOpOrr:
  1705  		return "orr"
  1706  	case aluOpAnd:
  1707  		return "and"
  1708  	case aluOpBic:
  1709  		return "bic"
  1710  	case aluOpEor:
  1711  		return "eor"
  1712  	case aluOpAddS:
  1713  		return "adds"
  1714  	case aluOpSubS:
  1715  		return "subs"
  1716  	case aluOpSMulH:
  1717  		return "sMulH"
  1718  	case aluOpUMulH:
  1719  		return "uMulH"
  1720  	case aluOpSDiv:
  1721  		return "sdiv"
  1722  	case aluOpUDiv:
  1723  		return "udiv"
  1724  	case aluOpRotR:
  1725  		return "ror"
  1726  	case aluOpLsr:
  1727  		return "lsr"
  1728  	case aluOpAsr:
  1729  		return "asr"
  1730  	case aluOpLsl:
  1731  		return "lsl"
  1732  	case aluOpMAdd:
  1733  		return "madd"
  1734  	case aluOpMSub:
  1735  		return "msub"
  1736  	}
  1737  	panic(int(a))
  1738  }
  1739  
  1740  const (
  1741  	// 32/64-bit Add.
  1742  	aluOpAdd aluOp = iota
  1743  	// 32/64-bit Subtract.
  1744  	aluOpSub
  1745  	// 32/64-bit Bitwise OR.
  1746  	aluOpOrr
  1747  	// 32/64-bit Bitwise AND.
  1748  	aluOpAnd
  1749  	// 32/64-bit Bitwise AND NOT.
  1750  	aluOpBic
  1751  	// 32/64-bit Bitwise XOR (Exclusive OR).
  1752  	aluOpEor
  1753  	// 32/64-bit Add setting flags.
  1754  	aluOpAddS
  1755  	// 32/64-bit Subtract setting flags.
  1756  	aluOpSubS
  1757  	// Signed multiply, high-word result.
  1758  	aluOpSMulH
  1759  	// Unsigned multiply, high-word result.
  1760  	aluOpUMulH
  1761  	// 64-bit Signed divide.
  1762  	aluOpSDiv
  1763  	// 64-bit Unsigned divide.
  1764  	aluOpUDiv
  1765  	// 32/64-bit Rotate right.
  1766  	aluOpRotR
  1767  	// 32/64-bit Logical shift right.
  1768  	aluOpLsr
  1769  	// 32/64-bit Arithmetic shift right.
  1770  	aluOpAsr
  1771  	// 32/64-bit Logical shift left.
  1772  	aluOpLsl /// Multiply-add
  1773  
  1774  	// MAdd and MSub are only applicable for aluRRRR.
  1775  	aluOpMAdd
  1776  	aluOpMSub
  1777  )
  1778  
  1779  // vecOp determines the type of vector operation. Instructions whose kind is one of
  1780  // vecOpCnt would use this type.
  1781  type vecOp int
  1782  
  1783  // String implements fmt.Stringer.
  1784  func (b vecOp) String() string {
  1785  	switch b {
  1786  	case vecOpCnt:
  1787  		return "cnt"
  1788  	case vecOpCmeq:
  1789  		return "cmeq"
  1790  	case vecOpCmgt:
  1791  		return "cmgt"
  1792  	case vecOpCmhi:
  1793  		return "cmhi"
  1794  	case vecOpCmge:
  1795  		return "cmge"
  1796  	case vecOpCmhs:
  1797  		return "cmhs"
  1798  	case vecOpFcmeq:
  1799  		return "fcmeq"
  1800  	case vecOpFcmgt:
  1801  		return "fcmgt"
  1802  	case vecOpFcmge:
  1803  		return "fcmge"
  1804  	case vecOpCmeq0:
  1805  		return "cmeq0"
  1806  	case vecOpUaddlv:
  1807  		return "uaddlv"
  1808  	case vecOpBit:
  1809  		return "bit"
  1810  	case vecOpBic:
  1811  		return "bic"
  1812  	case vecOpBsl:
  1813  		return "bsl"
  1814  	case vecOpNot:
  1815  		return "not"
  1816  	case vecOpAnd:
  1817  		return "and"
  1818  	case vecOpOrr:
  1819  		return "orr"
  1820  	case vecOpEOR:
  1821  		return "eor"
  1822  	case vecOpFadd:
  1823  		return "fadd"
  1824  	case vecOpAdd:
  1825  		return "add"
  1826  	case vecOpAddp:
  1827  		return "addp"
  1828  	case vecOpAddv:
  1829  		return "addv"
  1830  	case vecOpSub:
  1831  		return "sub"
  1832  	case vecOpFsub:
  1833  		return "fsub"
  1834  	case vecOpSmin:
  1835  		return "smin"
  1836  	case vecOpUmin:
  1837  		return "umin"
  1838  	case vecOpUminv:
  1839  		return "uminv"
  1840  	case vecOpSmax:
  1841  		return "smax"
  1842  	case vecOpUmax:
  1843  		return "umax"
  1844  	case vecOpUmaxp:
  1845  		return "umaxp"
  1846  	case vecOpUrhadd:
  1847  		return "urhadd"
  1848  	case vecOpFmul:
  1849  		return "fmul"
  1850  	case vecOpSqrdmulh:
  1851  		return "sqrdmulh"
  1852  	case vecOpMul:
  1853  		return "mul"
  1854  	case vecOpUmlal:
  1855  		return "umlal"
  1856  	case vecOpFdiv:
  1857  		return "fdiv"
  1858  	case vecOpFsqrt:
  1859  		return "fsqrt"
  1860  	case vecOpAbs:
  1861  		return "abs"
  1862  	case vecOpFabs:
  1863  		return "fabs"
  1864  	case vecOpNeg:
  1865  		return "neg"
  1866  	case vecOpFneg:
  1867  		return "fneg"
  1868  	case vecOpFrintp:
  1869  		return "frintp"
  1870  	case vecOpFrintm:
  1871  		return "frintm"
  1872  	case vecOpFrintn:
  1873  		return "frintn"
  1874  	case vecOpFrintz:
  1875  		return "frintz"
  1876  	case vecOpFcvtl:
  1877  		return "fcvtl"
  1878  	case vecOpFcvtn:
  1879  		return "fcvtn"
  1880  	case vecOpFcvtzu:
  1881  		return "fcvtzu"
  1882  	case vecOpFcvtzs:
  1883  		return "fcvtzs"
  1884  	case vecOpScvtf:
  1885  		return "scvtf"
  1886  	case vecOpUcvtf:
  1887  		return "ucvtf"
  1888  	case vecOpSqxtn:
  1889  		return "sqxtn"
  1890  	case vecOpUqxtn:
  1891  		return "uqxtn"
  1892  	case vecOpSqxtun:
  1893  		return "sqxtun"
  1894  	case vecOpRev64:
  1895  		return "rev64"
  1896  	case vecOpXtn:
  1897  		return "xtn"
  1898  	case vecOpShll:
  1899  		return "shll"
  1900  	case vecOpSshl:
  1901  		return "sshl"
  1902  	case vecOpSshll:
  1903  		return "sshll"
  1904  	case vecOpUshl:
  1905  		return "ushl"
  1906  	case vecOpUshll:
  1907  		return "ushll"
  1908  	case vecOpSshr:
  1909  		return "sshr"
  1910  	case vecOpZip1:
  1911  		return "zip1"
  1912  	case vecOpFmin:
  1913  		return "fmin"
  1914  	case vecOpFmax:
  1915  		return "fmax"
  1916  	}
  1917  	panic(int(b))
  1918  }
  1919  
  1920  const (
  1921  	vecOpCnt vecOp = iota
  1922  	vecOpCmeq0
  1923  	vecOpCmeq
  1924  	vecOpCmgt
  1925  	vecOpCmhi
  1926  	vecOpCmge
  1927  	vecOpCmhs
  1928  	vecOpFcmeq
  1929  	vecOpFcmgt
  1930  	vecOpFcmge
  1931  	vecOpUaddlv
  1932  	vecOpBit
  1933  	vecOpBic
  1934  	vecOpBsl
  1935  	vecOpNot
  1936  	vecOpAnd
  1937  	vecOpOrr
  1938  	vecOpEOR
  1939  	vecOpAdd
  1940  	vecOpFadd
  1941  	vecOpAddv
  1942  	vecOpSqadd
  1943  	vecOpUqadd
  1944  	vecOpAddp
  1945  	vecOpSub
  1946  	vecOpFsub
  1947  	vecOpSqsub
  1948  	vecOpUqsub
  1949  	vecOpSmin
  1950  	vecOpUmin
  1951  	vecOpUminv
  1952  	vecOpFmin
  1953  	vecOpSmax
  1954  	vecOpUmax
  1955  	vecOpUmaxp
  1956  	vecOpFmax
  1957  	vecOpUrhadd
  1958  	vecOpMul
  1959  	vecOpFmul
  1960  	vecOpSqrdmulh
  1961  	vecOpUmlal
  1962  	vecOpFdiv
  1963  	vecOpFsqrt
  1964  	vecOpAbs
  1965  	vecOpFabs
  1966  	vecOpNeg
  1967  	vecOpFneg
  1968  	vecOpFrintm
  1969  	vecOpFrintn
  1970  	vecOpFrintp
  1971  	vecOpFrintz
  1972  	vecOpFcvtl
  1973  	vecOpFcvtn
  1974  	vecOpFcvtzs
  1975  	vecOpFcvtzu
  1976  	vecOpScvtf
  1977  	vecOpUcvtf
  1978  	vecOpSqxtn
  1979  	vecOpSqxtun
  1980  	vecOpUqxtn
  1981  	vecOpRev64
  1982  	vecOpXtn
  1983  	vecOpShll
  1984  	vecOpSshl
  1985  	vecOpSshll
  1986  	vecOpUshl
  1987  	vecOpUshll
  1988  	vecOpSshr
  1989  	vecOpZip1
  1990  )
  1991  
  1992  // bitOp determines the type of bitwise operation. Instructions whose kind is one of
  1993  // bitOpRbit and bitOpClz would use this type.
  1994  type bitOp int
  1995  
  1996  // String implements fmt.Stringer.
  1997  func (b bitOp) String() string {
  1998  	switch b {
  1999  	case bitOpRbit:
  2000  		return "rbit"
  2001  	case bitOpClz:
  2002  		return "clz"
  2003  	}
  2004  	panic(int(b))
  2005  }
  2006  
  2007  const (
  2008  	// 32/64-bit Rbit.
  2009  	bitOpRbit bitOp = iota
  2010  	// 32/64-bit Clz.
  2011  	bitOpClz
  2012  )
  2013  
  2014  // fpuUniOp represents a unary floating-point unit (FPU) operation.
  2015  type fpuUniOp byte
  2016  
  2017  const (
  2018  	fpuUniOpNeg fpuUniOp = iota
  2019  	fpuUniOpCvt32To64
  2020  	fpuUniOpCvt64To32
  2021  	fpuUniOpSqrt
  2022  	fpuUniOpRoundPlus
  2023  	fpuUniOpRoundMinus
  2024  	fpuUniOpRoundZero
  2025  	fpuUniOpRoundNearest
  2026  	fpuUniOpAbs
  2027  )
  2028  
  2029  // String implements the fmt.Stringer.
  2030  func (f fpuUniOp) String() string {
  2031  	switch f {
  2032  	case fpuUniOpNeg:
  2033  		return "fneg"
  2034  	case fpuUniOpCvt32To64:
  2035  		return "fcvt"
  2036  	case fpuUniOpCvt64To32:
  2037  		return "fcvt"
  2038  	case fpuUniOpSqrt:
  2039  		return "fsqrt"
  2040  	case fpuUniOpRoundPlus:
  2041  		return "frintp"
  2042  	case fpuUniOpRoundMinus:
  2043  		return "frintm"
  2044  	case fpuUniOpRoundZero:
  2045  		return "frintz"
  2046  	case fpuUniOpRoundNearest:
  2047  		return "frintn"
  2048  	case fpuUniOpAbs:
  2049  		return "fabs"
  2050  	}
  2051  	panic(int(f))
  2052  }
  2053  
  2054  // fpuBinOp represents a binary floating-point unit (FPU) operation.
  2055  type fpuBinOp byte
  2056  
  2057  const (
  2058  	fpuBinOpAdd = iota
  2059  	fpuBinOpSub
  2060  	fpuBinOpMul
  2061  	fpuBinOpDiv
  2062  	fpuBinOpMax
  2063  	fpuBinOpMin
  2064  )
  2065  
  2066  // String implements the fmt.Stringer.
  2067  func (f fpuBinOp) String() string {
  2068  	switch f {
  2069  	case fpuBinOpAdd:
  2070  		return "fadd"
  2071  	case fpuBinOpSub:
  2072  		return "fsub"
  2073  	case fpuBinOpMul:
  2074  		return "fmul"
  2075  	case fpuBinOpDiv:
  2076  		return "fdiv"
  2077  	case fpuBinOpMax:
  2078  		return "fmax"
  2079  	case fpuBinOpMin:
  2080  		return "fmin"
  2081  	}
  2082  	panic(int(f))
  2083  }
  2084  
  2085  // extMode represents the mode of a register operand extension.
  2086  // For example, aluRRRExtend instructions need this info to determine the extensions.
  2087  type extMode byte
  2088  
  2089  const (
  2090  	extModeNone extMode = iota
  2091  	// extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32.
  2092  	extModeZeroExtend32
  2093  	// extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32.
  2094  	extModeSignExtend32
  2095  	// extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64.
  2096  	extModeZeroExtend64
  2097  	// extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64.
  2098  	extModeSignExtend64
  2099  )
  2100  
  2101  func (e extMode) bits() byte {
  2102  	switch e {
  2103  	case extModeZeroExtend32, extModeSignExtend32:
  2104  		return 32
  2105  	case extModeZeroExtend64, extModeSignExtend64:
  2106  		return 64
  2107  	default:
  2108  		return 0
  2109  	}
  2110  }
  2111  
  2112  func (e extMode) signed() bool {
  2113  	switch e {
  2114  	case extModeSignExtend32, extModeSignExtend64:
  2115  		return true
  2116  	default:
  2117  		return false
  2118  	}
  2119  }
  2120  
  2121  func extModeOf(t ssa.Type, signed bool) extMode {
  2122  	switch t.Bits() {
  2123  	case 32:
  2124  		if signed {
  2125  			return extModeSignExtend32
  2126  		}
  2127  		return extModeZeroExtend32
  2128  	case 64:
  2129  		if signed {
  2130  			return extModeSignExtend64
  2131  		}
  2132  		return extModeZeroExtend64
  2133  	default:
  2134  		panic("TODO? do we need narrower than 32 bits?")
  2135  	}
  2136  }
  2137  
  2138  type extendOp byte
  2139  
  2140  const (
  2141  	extendOpUXTB extendOp = 0b000
  2142  	extendOpUXTH extendOp = 0b001
  2143  	extendOpUXTW extendOp = 0b010
  2144  	// extendOpUXTX does nothing, but convenient symbol that officially exists. See:
  2145  	// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
  2146  	extendOpUXTX extendOp = 0b011
  2147  	extendOpSXTB extendOp = 0b100
  2148  	extendOpSXTH extendOp = 0b101
  2149  	extendOpSXTW extendOp = 0b110
  2150  	// extendOpSXTX does nothing, but convenient symbol that officially exists. See:
  2151  	// https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct
  2152  	extendOpSXTX extendOp = 0b111
  2153  	extendOpNone extendOp = 0xff
  2154  )
  2155  
  2156  func (e extendOp) srcBits() byte {
  2157  	switch e {
  2158  	case extendOpUXTB, extendOpSXTB:
  2159  		return 8
  2160  	case extendOpUXTH, extendOpSXTH:
  2161  		return 16
  2162  	case extendOpUXTW, extendOpSXTW:
  2163  		return 32
  2164  	case extendOpUXTX, extendOpSXTX:
  2165  		return 64
  2166  	}
  2167  	panic(int(e))
  2168  }
  2169  
  2170  func (e extendOp) String() string {
  2171  	switch e {
  2172  	case extendOpUXTB:
  2173  		return "UXTB"
  2174  	case extendOpUXTH:
  2175  		return "UXTH"
  2176  	case extendOpUXTW:
  2177  		return "UXTW"
  2178  	case extendOpUXTX:
  2179  		return "UXTX"
  2180  	case extendOpSXTB:
  2181  		return "SXTB"
  2182  	case extendOpSXTH:
  2183  		return "SXTH"
  2184  	case extendOpSXTW:
  2185  		return "SXTW"
  2186  	case extendOpSXTX:
  2187  		return "SXTX"
  2188  	}
  2189  	panic(int(e))
  2190  }
  2191  
  2192  func extendOpFrom(signed bool, from byte) extendOp {
  2193  	switch from {
  2194  	case 8:
  2195  		if signed {
  2196  			return extendOpSXTB
  2197  		}
  2198  		return extendOpUXTB
  2199  	case 16:
  2200  		if signed {
  2201  			return extendOpSXTH
  2202  		}
  2203  		return extendOpUXTH
  2204  	case 32:
  2205  		if signed {
  2206  			return extendOpSXTW
  2207  		}
  2208  		return extendOpUXTW
  2209  	case 64:
  2210  		if signed {
  2211  			return extendOpSXTX
  2212  		}
  2213  		return extendOpUXTX
  2214  	}
  2215  	panic("invalid extendOpFrom")
  2216  }
  2217  
  2218  type shiftOp byte
  2219  
  2220  const (
  2221  	shiftOpLSL shiftOp = 0b00
  2222  	shiftOpLSR shiftOp = 0b01
  2223  	shiftOpASR shiftOp = 0b10
  2224  	shiftOpROR shiftOp = 0b11
  2225  )
  2226  
  2227  func (s shiftOp) String() string {
  2228  	switch s {
  2229  	case shiftOpLSL:
  2230  		return "lsl"
  2231  	case shiftOpLSR:
  2232  		return "lsr"
  2233  	case shiftOpASR:
  2234  		return "asr"
  2235  	case shiftOpROR:
  2236  		return "ror"
  2237  	}
  2238  	panic(int(s))
  2239  }
  2240  
  2241  const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence.
  2242  
  2243  // size returns the size of the instruction in encoded bytes.
  2244  func (i *instruction) size() int64 {
  2245  	switch i.kind {
  2246  	case exitSequence:
  2247  		return exitSequenceSize // 5 instructions as in encodeExitSequence.
  2248  	case nop0:
  2249  		return 0
  2250  	case emitSourceOffsetInfo:
  2251  		return 0
  2252  	case loadFpuConst32:
  2253  		if i.u1 == 0 {
  2254  			return 4 // zero loading can be encoded as a single instruction.
  2255  		}
  2256  		return 4 + 4 + 4
  2257  	case loadFpuConst64:
  2258  		if i.u1 == 0 {
  2259  			return 4 // zero loading can be encoded as a single instruction.
  2260  		}
  2261  		return 4 + 4 + 8
  2262  	case loadFpuConst128:
  2263  		if i.u1 == 0 && i.u2 == 0 {
  2264  			return 4 // zero loading can be encoded as a single instruction.
  2265  		}
  2266  		return 4 + 4 + 16
  2267  	case brTableSequence:
  2268  		return 4*4 + int64(len(i.targets))*4
  2269  	default:
  2270  		return 4
  2271  	}
  2272  }
  2273  
  2274  // vecArrangement is the arrangement of data within a vector register.
  2275  type vecArrangement byte
  2276  
  2277  const (
  2278  	// vecArrangementNone is an arrangement indicating no data is stored.
  2279  	vecArrangementNone vecArrangement = iota
  2280  	// vecArrangement8B is an arrangement of 8 bytes (64-bit vector)
  2281  	vecArrangement8B
  2282  	// vecArrangement16B is an arrangement of 16 bytes (128-bit vector)
  2283  	vecArrangement16B
  2284  	// vecArrangement4H is an arrangement of 4 half precisions (64-bit vector)
  2285  	vecArrangement4H
  2286  	// vecArrangement8H is an arrangement of 8 half precisions (128-bit vector)
  2287  	vecArrangement8H
  2288  	// vecArrangement2S is an arrangement of 2 single precisions (64-bit vector)
  2289  	vecArrangement2S
  2290  	// vecArrangement4S is an arrangement of 4 single precisions (128-bit vector)
  2291  	vecArrangement4S
  2292  	// vecArrangement1D is an arrangement of 1 double precision (64-bit vector)
  2293  	vecArrangement1D
  2294  	// vecArrangement2D is an arrangement of 2 double precisions (128-bit vector)
  2295  	vecArrangement2D
  2296  
  2297  	// Assign each vector size specifier to a vector arrangement ID.
  2298  	// Instructions can only have an arrangement or a size specifier, but not both, so it
  2299  	// simplifies the internal representation of vector instructions by being able to
  2300  	// store either into the same field.
  2301  
  2302  	// vecArrangementB is a size specifier of byte
  2303  	vecArrangementB
  2304  	// vecArrangementH is a size specifier of word (16-bit)
  2305  	vecArrangementH
  2306  	// vecArrangementS is a size specifier of double word (32-bit)
  2307  	vecArrangementS
  2308  	// vecArrangementD is a size specifier of quad word (64-bit)
  2309  	vecArrangementD
  2310  	// vecArrangementQ is a size specifier of the entire vector (128-bit)
  2311  	vecArrangementQ
  2312  )
  2313  
  2314  // String implements fmt.Stringer
  2315  func (v vecArrangement) String() (ret string) {
  2316  	switch v {
  2317  	case vecArrangement8B:
  2318  		ret = "8B"
  2319  	case vecArrangement16B:
  2320  		ret = "16B"
  2321  	case vecArrangement4H:
  2322  		ret = "4H"
  2323  	case vecArrangement8H:
  2324  		ret = "8H"
  2325  	case vecArrangement2S:
  2326  		ret = "2S"
  2327  	case vecArrangement4S:
  2328  		ret = "4S"
  2329  	case vecArrangement1D:
  2330  		ret = "1D"
  2331  	case vecArrangement2D:
  2332  		ret = "2D"
  2333  	case vecArrangementB:
  2334  		ret = "B"
  2335  	case vecArrangementH:
  2336  		ret = "H"
  2337  	case vecArrangementS:
  2338  		ret = "S"
  2339  	case vecArrangementD:
  2340  		ret = "D"
  2341  	case vecArrangementQ:
  2342  		ret = "Q"
  2343  	case vecArrangementNone:
  2344  		ret = "none"
  2345  	default:
  2346  		panic(v)
  2347  	}
  2348  	return
  2349  }
  2350  
  2351  // vecIndex is the index of an element of a vector register
  2352  type vecIndex byte
  2353  
  2354  // vecIndexNone indicates no vector index specified.
  2355  const vecIndexNone = ^vecIndex(0)
  2356  
  2357  func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement {
  2358  	switch lane {
  2359  	case ssa.VecLaneI8x16:
  2360  		return vecArrangement16B
  2361  	case ssa.VecLaneI16x8:
  2362  		return vecArrangement8H
  2363  	case ssa.VecLaneI32x4:
  2364  		return vecArrangement4S
  2365  	case ssa.VecLaneI64x2:
  2366  		return vecArrangement2D
  2367  	case ssa.VecLaneF32x4:
  2368  		return vecArrangement4S
  2369  	case ssa.VecLaneF64x2:
  2370  		return vecArrangement2D
  2371  	default:
  2372  		panic(lane)
  2373  	}
  2374  }