github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go (about)

     1  package arm64
     2  
     3  import (
     4  	"encoding/hex"
     5  	"fmt"
     6  	"strings"
     7  	"testing"
     8  
     9  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
    10  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
    11  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
    12  	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
    13  	"github.com/tetratelabs/wazero/internal/testing/require"
    14  )
    15  
    16  func TestMachine_LowerConditionalBranch(t *testing.T) {
    17  	cmpInSameGroupFromParams := func(
    18  		brz bool, intCond ssa.IntegerCmpCond, floatCond ssa.FloatCmpCond,
    19  		ctx *mockCompiler, builder ssa.Builder, m *machine,
    20  	) (instr *ssa.Instruction, verify func(t *testing.T)) {
    21  		m.executableContext.StartLoweringFunction(10)
    22  		entry := builder.CurrentBlock()
    23  		isInt := intCond != ssa.IntegerCmpCondInvalid
    24  
    25  		var val1, val2 ssa.Value
    26  		if isInt {
    27  			val1 = entry.AddParam(builder, ssa.TypeI64)
    28  			val2 = entry.AddParam(builder, ssa.TypeI64)
    29  			ctx.vRegMap[val1], ctx.vRegMap[val2] = regToVReg(x1).SetRegType(regalloc.RegTypeInt), regToVReg(x2).SetRegType(regalloc.RegTypeInt)
    30  		} else {
    31  			val1 = entry.AddParam(builder, ssa.TypeF64)
    32  			val2 = entry.AddParam(builder, ssa.TypeF64)
    33  			ctx.vRegMap[val1], ctx.vRegMap[val2] = regToVReg(v1).SetRegType(regalloc.RegTypeFloat), regToVReg(v2).SetRegType(regalloc.RegTypeFloat)
    34  		}
    35  
    36  		var cmpInstr *ssa.Instruction
    37  		if isInt {
    38  			cmpInstr = builder.AllocateInstruction()
    39  			cmpInstr.AsIcmp(val1, val2, intCond)
    40  			builder.InsertInstruction(cmpInstr)
    41  		} else {
    42  			cmpInstr = builder.AllocateInstruction()
    43  			cmpInstr.AsFcmp(val1, val2, floatCond)
    44  			builder.InsertInstruction(cmpInstr)
    45  		}
    46  
    47  		cmpVal := cmpInstr.Return()
    48  		ctx.vRegMap[cmpVal] = 3
    49  
    50  		ctx.definitions[val1] = &backend.SSAValueDefinition{BlkParamVReg: ctx.vRegMap[val1], BlockParamValue: val1}
    51  		ctx.definitions[val2] = &backend.SSAValueDefinition{BlkParamVReg: ctx.vRegMap[val2], BlockParamValue: val2}
    52  		ctx.definitions[cmpVal] = &backend.SSAValueDefinition{Instr: cmpInstr}
    53  		b := builder.AllocateInstruction()
    54  		if brz {
    55  			b.AsBrz(cmpVal, ssa.ValuesNil, builder.AllocateBasicBlock())
    56  		} else {
    57  			b.AsBrnz(cmpVal, ssa.ValuesNil, builder.AllocateBasicBlock())
    58  		}
    59  		builder.InsertInstruction(b)
    60  		return b, func(t *testing.T) {
    61  			require.True(t, cmpInstr.Lowered())
    62  		}
    63  	}
    64  
    65  	icmpInSameGroupFromParamAndImm12 := func(brz bool, ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
    66  		m.executableContext.StartLoweringFunction(10)
    67  		entry := builder.CurrentBlock()
    68  		v1 := entry.AddParam(builder, ssa.TypeI32)
    69  
    70  		iconst := builder.AllocateInstruction()
    71  		iconst.AsIconst32(0x4d2)
    72  		builder.InsertInstruction(iconst)
    73  		v2 := iconst.Return()
    74  
    75  		// Constant can be referenced from different groups because we inline it.
    76  		builder.SetCurrentBlock(builder.AllocateBasicBlock())
    77  
    78  		icmp := builder.AllocateInstruction()
    79  		icmp.AsIcmp(v1, v2, ssa.IntegerCmpCondEqual)
    80  		builder.InsertInstruction(icmp)
    81  		icmpVal := icmp.Return()
    82  		ctx.definitions[v1] = &backend.SSAValueDefinition{BlkParamVReg: intToVReg(1), BlockParamValue: v1}
    83  		ctx.definitions[v2] = &backend.SSAValueDefinition{Instr: iconst}
    84  		ctx.definitions[icmpVal] = &backend.SSAValueDefinition{Instr: icmp}
    85  		ctx.vRegMap[v1], ctx.vRegMap[v2], ctx.vRegMap[icmpVal] = intToVReg(1), intToVReg(2), intToVReg(3)
    86  		b := builder.AllocateInstruction()
    87  		if brz {
    88  			b.AsBrz(icmpVal, ssa.ValuesNil, builder.AllocateBasicBlock())
    89  		} else {
    90  			b.AsBrnz(icmpVal, ssa.ValuesNil, builder.AllocateBasicBlock())
    91  		}
    92  		builder.InsertInstruction(b)
    93  		return b, func(t *testing.T) {
    94  			require.True(t, icmp.Lowered())
    95  		}
    96  	}
    97  
    98  	for _, tc := range []struct {
    99  		name         string
   100  		setup        func(*mockCompiler, ssa.Builder, *machine) (instr *ssa.Instruction, verify func(t *testing.T))
   101  		instructions []string
   102  	}{
   103  		{
   104  			name: "icmp in different group",
   105  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   106  				m.executableContext.StartLoweringFunction(10)
   107  				entry := builder.CurrentBlock()
   108  				v1, v2 := entry.AddParam(builder, ssa.TypeI64), entry.AddParam(builder, ssa.TypeI64)
   109  
   110  				icmp := builder.AllocateInstruction()
   111  				icmp.AsIcmp(v1, v2, ssa.IntegerCmpCondEqual)
   112  				builder.InsertInstruction(icmp)
   113  				icmpVal := icmp.Return()
   114  				ctx.definitions[icmpVal] = &backend.SSAValueDefinition{Instr: icmp}
   115  				ctx.vRegMap[v1], ctx.vRegMap[v2], ctx.vRegMap[icmpVal] = intToVReg(1), intToVReg(2), intToVReg(3)
   116  
   117  				brz := builder.AllocateInstruction()
   118  				brz.AsBrz(icmpVal, ssa.ValuesNil, builder.AllocateBasicBlock())
   119  				builder.InsertInstruction(brz)
   120  
   121  				// Indicate that currently compiling in the different group.
   122  				ctx.currentGID = 1000
   123  				return brz, func(t *testing.T) {
   124  					require.False(t, icmp.Lowered())
   125  				}
   126  			},
   127  			instructions: []string{"cbz w3?, (L1)"},
   128  		},
   129  		{
   130  			name: "brz / icmp in the same group / params",
   131  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   132  				return cmpInSameGroupFromParams(true, ssa.IntegerCmpCondUnsignedGreaterThan, ssa.FloatCmpCondInvalid, ctx, builder, m)
   133  			},
   134  			instructions: []string{
   135  				"subs xzr, x1, x2",
   136  				"b.ls L1",
   137  			},
   138  		},
   139  		{
   140  			name: "brnz / icmp in the same group / params",
   141  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   142  				return cmpInSameGroupFromParams(false, ssa.IntegerCmpCondEqual, ssa.FloatCmpCondInvalid, ctx, builder, m)
   143  			},
   144  			instructions: []string{
   145  				"subs xzr, x1, x2",
   146  				"b.eq L1",
   147  			},
   148  		},
   149  		{
   150  			name: "brz / fcmp in the same group / params",
   151  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   152  				return cmpInSameGroupFromParams(true, ssa.IntegerCmpCondInvalid, ssa.FloatCmpCondEqual, ctx, builder, m)
   153  			},
   154  			instructions: []string{
   155  				"fcmp d1, d2",
   156  				"b.ne L1",
   157  			},
   158  		},
   159  		{
   160  			name: "brnz / fcmp in the same group / params",
   161  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   162  				return cmpInSameGroupFromParams(false, ssa.IntegerCmpCondInvalid, ssa.FloatCmpCondGreaterThan, ctx, builder, m)
   163  			},
   164  			instructions: []string{
   165  				"fcmp d1, d2",
   166  				"b.gt L1",
   167  			},
   168  		},
   169  		{
   170  			name: "brz / icmp in the same group / params",
   171  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   172  				return icmpInSameGroupFromParamAndImm12(true, ctx, builder, m)
   173  			},
   174  			instructions: []string{
   175  				"subs wzr, w1?, #0x4d2",
   176  				"b.ne L1",
   177  			},
   178  		},
   179  		{
   180  			name: "brz / icmp in the same group / params",
   181  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   182  				return icmpInSameGroupFromParamAndImm12(false, ctx, builder, m)
   183  			},
   184  			instructions: []string{
   185  				"subs wzr, w1?, #0x4d2",
   186  				"b.eq L1",
   187  			},
   188  		},
   189  	} {
   190  		t.Run(tc.name, func(t *testing.T) {
   191  			ctx, b, m := newSetupWithMockContext()
   192  			instr, verify := tc.setup(ctx, b, m)
   193  			m.LowerConditionalBranch(instr)
   194  			verify(t)
   195  			require.Equal(t, strings.Join(tc.instructions, "\n"),
   196  				formatEmittedInstructionsInCurrentBlock(m))
   197  		})
   198  	}
   199  }
   200  
   201  func TestMachine_LowerSingleBranch(t *testing.T) {
   202  	for _, tc := range []struct {
   203  		name         string
   204  		setup        func(*mockCompiler, ssa.Builder, *machine) (instr *ssa.Instruction)
   205  		instructions []string
   206  	}{
   207  		{
   208  			name: "jump-fallthrough",
   209  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction) {
   210  				jump := builder.AllocateInstruction()
   211  				jump.AsJump(ssa.ValuesNil, builder.AllocateBasicBlock())
   212  				builder.InsertInstruction(jump)
   213  				jump.AsFallthroughJump()
   214  				return jump
   215  			},
   216  			instructions: []string{}, // Fallthrough jump should be optimized out.
   217  		},
   218  		{
   219  			name: "b",
   220  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction) {
   221  				m.executableContext.StartLoweringFunction(10)
   222  				jump := builder.AllocateInstruction()
   223  				jump.AsJump(ssa.ValuesNil, builder.AllocateBasicBlock())
   224  				builder.InsertInstruction(jump)
   225  				return jump
   226  			},
   227  			instructions: []string{"b L1"},
   228  		},
   229  		{
   230  			name: "ret",
   231  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction) {
   232  				m.executableContext.StartLoweringFunction(10)
   233  				jump := builder.AllocateInstruction()
   234  				jump.AsJump(ssa.ValuesNil, builder.ReturnBlock())
   235  				builder.InsertInstruction(jump)
   236  				return jump
   237  			},
   238  			// Jump which targets the return block should be translated as "ret".
   239  			instructions: []string{"ret"},
   240  		},
   241  	} {
   242  		tc := tc
   243  		t.Run(tc.name, func(t *testing.T) {
   244  			ctx, b, m := newSetupWithMockContext()
   245  			instr := tc.setup(ctx, b, m)
   246  			m.LowerSingleBranch(instr)
   247  			require.Equal(t, strings.Join(tc.instructions, "\n"), formatEmittedInstructionsInCurrentBlock(m))
   248  		})
   249  	}
   250  }
   251  
   252  func TestMachine_InsertMove(t *testing.T) {
   253  	for _, tc := range []struct {
   254  		name        string
   255  		src, dst    regalloc.VReg
   256  		typ         ssa.Type
   257  		instruction string
   258  	}{
   259  		{
   260  			name:        "int",
   261  			src:         regalloc.VReg(1).SetRegType(regalloc.RegTypeInt),
   262  			dst:         regalloc.VReg(2).SetRegType(regalloc.RegTypeInt),
   263  			instruction: "mov x1?, x2?",
   264  			typ:         ssa.TypeI64,
   265  		},
   266  		{
   267  			name:        "float",
   268  			src:         regalloc.VReg(1).SetRegType(regalloc.RegTypeFloat),
   269  			dst:         regalloc.VReg(2).SetRegType(regalloc.RegTypeFloat),
   270  			instruction: "mov v1?.8b, v2?.8b",
   271  			typ:         ssa.TypeF64,
   272  		},
   273  		{
   274  			name:        "vector",
   275  			src:         regalloc.VReg(1).SetRegType(regalloc.RegTypeFloat),
   276  			dst:         regalloc.VReg(2).SetRegType(regalloc.RegTypeFloat),
   277  			instruction: "mov v1?.16b, v2?.16b",
   278  			typ:         ssa.TypeV128,
   279  		},
   280  	} {
   281  		t.Run(tc.name, func(t *testing.T) {
   282  			_, _, m := newSetupWithMockContext()
   283  			m.InsertMove(tc.src, tc.dst, tc.typ)
   284  			require.Equal(t, tc.instruction, formatEmittedInstructionsInCurrentBlock(m))
   285  		})
   286  	}
   287  }
   288  
   289  func TestMachine_lowerIDiv(t *testing.T) {
   290  	for _, tc := range []struct {
   291  		name   string
   292  		_64bit bool
   293  		signed bool
   294  		exp    string
   295  	}{
   296  		{
   297  			name: "32bit unsigned", _64bit: false, signed: false,
   298  			exp: `
   299  udiv w1?, w2?, w3?
   300  mov x1?, x65535?
   301  cbnz w3?, L1
   302  movz x2?, #0xa, lsl 0
   303  str w2?, [x1?]
   304  mov x3?, sp
   305  str x3?, [x1?, #0x38]
   306  adr x4?, #0x0
   307  str x4?, [x1?, #0x30]
   308  exit_sequence x1?
   309  L1:
   310  `,
   311  		},
   312  		{name: "32bit signed", _64bit: false, signed: true, exp: `
   313  sdiv w1?, w2?, w3?
   314  mov x1?, x65535?
   315  cbnz w3?, L1
   316  movz x2?, #0xa, lsl 0
   317  str w2?, [x1?]
   318  mov x3?, sp
   319  str x3?, [x1?, #0x38]
   320  adr x4?, #0x0
   321  str x4?, [x1?, #0x30]
   322  exit_sequence x1?
   323  L1:
   324  adds wzr, w3?, #0x1
   325  ccmp w2?, #0x1, #0x0, eq
   326  mov x5?, x65535?
   327  b.vc L2
   328  movz x6?, #0xb, lsl 0
   329  str w6?, [x5?]
   330  mov x7?, sp
   331  str x7?, [x5?, #0x38]
   332  adr x8?, #0x0
   333  str x8?, [x5?, #0x30]
   334  exit_sequence x5?
   335  L2:
   336  `},
   337  		{name: "64bit unsigned", _64bit: true, signed: false, exp: `
   338  udiv x1?, x2?, x3?
   339  mov x1?, x65535?
   340  cbnz x3?, L1
   341  movz x2?, #0xa, lsl 0
   342  str w2?, [x1?]
   343  mov x3?, sp
   344  str x3?, [x1?, #0x38]
   345  adr x4?, #0x0
   346  str x4?, [x1?, #0x30]
   347  exit_sequence x1?
   348  L1:
   349  `},
   350  		{name: "64bit signed", _64bit: true, signed: true, exp: `
   351  sdiv x1?, x2?, x3?
   352  mov x1?, x65535?
   353  cbnz x3?, L1
   354  movz x2?, #0xa, lsl 0
   355  str w2?, [x1?]
   356  mov x3?, sp
   357  str x3?, [x1?, #0x38]
   358  adr x4?, #0x0
   359  str x4?, [x1?, #0x30]
   360  exit_sequence x1?
   361  L1:
   362  adds xzr, x3?, #0x1
   363  ccmp x2?, #0x1, #0x0, eq
   364  mov x5?, x65535?
   365  b.vc L2
   366  movz x6?, #0xb, lsl 0
   367  str w6?, [x5?]
   368  mov x7?, sp
   369  str x7?, [x5?, #0x38]
   370  adr x8?, #0x0
   371  str x8?, [x5?, #0x30]
   372  exit_sequence x5?
   373  L2:
   374  `},
   375  	} {
   376  		t.Run(tc.name, func(t *testing.T) {
   377  			execCtx := regalloc.VReg(0xffff).SetRegType(regalloc.RegTypeInt)
   378  			rd, rn, rm := regalloc.VReg(1).SetRegType(regalloc.RegTypeInt),
   379  				regalloc.VReg(2).SetRegType(regalloc.RegTypeInt),
   380  				regalloc.VReg(3).SetRegType(regalloc.RegTypeInt)
   381  			mc, _, m := newSetupWithMockContext()
   382  			mc.typeOf = map[regalloc.VRegID]ssa.Type{execCtx.ID(): ssa.TypeI64, 2: ssa.TypeI64, 3: ssa.TypeI64}
   383  			m.lowerIDiv(execCtx, operandNR(rd), operandNR(rn), operandNR(rm), tc._64bit, tc.signed)
   384  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   385  		})
   386  	}
   387  }
   388  
   389  func TestMachine_exitWithCode(t *testing.T) {
   390  	_, _, m := newSetupWithMockContext()
   391  	m.lowerExitWithCode(x1VReg, wazevoapi.ExitCodeGrowStack)
   392  	m.executableContext.FlushPendingInstructions()
   393  	m.encode(m.executableContext.PerBlockHead)
   394  	require.Equal(t, `
   395  movz x1?, #0x1, lsl 0
   396  str w1?, [x1]
   397  mov x2?, sp
   398  str x2?, [x1, #0x38]
   399  adr x3?, #0x0
   400  str x3?, [x1, #0x30]
   401  exit_sequence x1
   402  `, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   403  }
   404  
   405  func TestMachine_lowerFpuToInt(t *testing.T) {
   406  	for _, tc := range []struct {
   407  		name        string
   408  		nontrapping bool
   409  		expectedAsm string
   410  	}{
   411  		{
   412  			name:        "trapping",
   413  			nontrapping: false,
   414  			expectedAsm: `
   415  msr fpsr, xzr
   416  fcvtzu w1, s2
   417  mrs x1? fpsr
   418  mov x2?, x15
   419  mov x3?, d2
   420  subs xzr, x1?, #0x1
   421  b.ne L2
   422  fcmp w3?, w3?
   423  mov x4?, x2?
   424  b.vc L1
   425  movz x5?, #0xc, lsl 0
   426  str w5?, [x4?]
   427  mov x6?, sp
   428  str x6?, [x4?, #0x38]
   429  adr x7?, #0x0
   430  str x7?, [x4?, #0x30]
   431  exit_sequence x4?
   432  L1:
   433  movz x8?, #0xb, lsl 0
   434  str w8?, [x2?]
   435  mov x9?, sp
   436  str x9?, [x2?, #0x38]
   437  adr x10?, #0x0
   438  str x10?, [x2?, #0x30]
   439  exit_sequence x2?
   440  L2:
   441  `,
   442  		},
   443  		{
   444  			name:        "nontrapping",
   445  			nontrapping: true,
   446  			expectedAsm: `
   447  fcvtzu w1, s2
   448  `,
   449  		},
   450  	} {
   451  		t.Run(tc.name, func(t *testing.T) {
   452  			mc, _, m := newSetupWithMockContext()
   453  			mc.typeOf = map[regalloc.VRegID]ssa.Type{v2VReg.ID(): ssa.TypeI64, x15VReg.ID(): ssa.TypeI64}
   454  			m.lowerFpuToInt(operandNR(x1VReg), operandNR(v2VReg), x15VReg, false, false, false, tc.nontrapping)
   455  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   456  
   457  			m.executableContext.FlushPendingInstructions()
   458  			m.encode(m.executableContext.PerBlockHead)
   459  		})
   460  	}
   461  }
   462  
   463  func TestMachine_lowerVIMul(t *testing.T) {
   464  	for _, tc := range []struct {
   465  		name          string
   466  		expectedAsm   string
   467  		arrangement   vecArrangement
   468  		expectedBytes string
   469  	}{
   470  		{
   471  			name:        "2D",
   472  			arrangement: vecArrangement2D,
   473  			expectedAsm: `
   474  rev64 v2?.4s, x15.4s
   475  mul v2?.4s, v2?.4s, x2.4s
   476  xtn v1?.2s, x2.2s
   477  addp v2?.4s, v2?.4s, v2?.4s
   478  xtn v3?.2s, x15.2s
   479  shll v4?.2s, v2?.2s
   480  umlal v4?.2s, v3?.2s, v1?.2s
   481  mov x1.16b, v4?.16b
   482  `,
   483  			expectedBytes: "e009a04e009ca24e4028a10e00bca04ee029a10e0038a12e0080a02e011ca04e",
   484  		},
   485  		{
   486  			name:        "8B",
   487  			arrangement: vecArrangement8B,
   488  			expectedAsm: `
   489  mul x1.8b, x2.8b, x15.8b
   490  `,
   491  			expectedBytes: "419c2f0e",
   492  		},
   493  		{
   494  			name:        "16B",
   495  			arrangement: vecArrangement16B,
   496  			expectedAsm: `
   497  mul x1.16b, x2.16b, x15.16b
   498  `,
   499  			expectedBytes: "419c2f4e",
   500  		},
   501  		{
   502  			name:        "4H",
   503  			arrangement: vecArrangement4H,
   504  			expectedAsm: `
   505  mul x1.4h, x2.4h, x15.4h
   506  `,
   507  			expectedBytes: "419c6f0e",
   508  		},
   509  		{
   510  			name:        "8H",
   511  			arrangement: vecArrangement8H,
   512  			expectedAsm: `
   513  mul x1.8h, x2.8h, x15.8h
   514  `,
   515  			expectedBytes: "419c6f4e",
   516  		},
   517  		{
   518  			name:        "2S",
   519  			arrangement: vecArrangement2S,
   520  			expectedAsm: `
   521  mul x1.2s, x2.2s, x15.2s
   522  `,
   523  			expectedBytes: "419caf0e",
   524  		},
   525  		{
   526  			name:        "4S",
   527  			arrangement: vecArrangement4S,
   528  			expectedAsm: `
   529  mul x1.4s, x2.4s, x15.4s
   530  `,
   531  			expectedBytes: "419caf4e",
   532  		},
   533  	} {
   534  		t.Run(tc.name, func(t *testing.T) {
   535  			_, _, m := newSetupWithMockContext()
   536  			m.lowerVIMul(operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), tc.arrangement)
   537  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   538  
   539  			m.executableContext.FlushPendingInstructions()
   540  			m.encode(m.executableContext.PerBlockHead)
   541  			buf := m.compiler.Buf()
   542  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   543  		})
   544  	}
   545  }
   546  
   547  func TestMachine_lowerVcheckTrue(t *testing.T) {
   548  	for _, tc := range []struct {
   549  		name          string
   550  		op            ssa.Opcode
   551  		expectedAsm   string
   552  		arrangement   vecArrangement
   553  		expectedBytes string
   554  	}{
   555  		{
   556  			name: "anyTrue",
   557  			op:   ssa.OpcodeVanyTrue,
   558  			expectedAsm: `
   559  umaxp v1?.16b, x1.16b, x1.16b
   560  mov x15, v1?.d[0]
   561  ccmp x15, #0x0, #0x0, al
   562  cset x15, ne
   563  `,
   564  			expectedBytes: "20a4216e0f3c084ee0e940faef079f9a",
   565  		},
   566  		{
   567  			name:        "allTrue 2D",
   568  			op:          ssa.OpcodeVallTrue,
   569  			arrangement: vecArrangement2D,
   570  			expectedAsm: `
   571  cmeq v1?.2d, x1.2d, #0
   572  addp v1?.2d, v1?.2d, v1?.2d
   573  fcmp d1?, d1?
   574  cset x15, eq
   575  `,
   576  			expectedBytes: "2098e04e00bce04e0020601eef179f9a",
   577  		},
   578  		{
   579  			name:        "allTrue 8B",
   580  			arrangement: vecArrangement8B,
   581  			op:          ssa.OpcodeVallTrue,
   582  			expectedAsm: `
   583  uminv h1?, x1.8b
   584  mov x15, v1?.d[0]
   585  ccmp x15, #0x0, #0x0, al
   586  cset x15, ne
   587  `,
   588  			expectedBytes: "20a8312e0f3c084ee0e940faef079f9a",
   589  		},
   590  		{
   591  			name:        "allTrue 16B",
   592  			arrangement: vecArrangement16B,
   593  			op:          ssa.OpcodeVallTrue,
   594  			expectedAsm: `
   595  uminv h1?, x1.16b
   596  mov x15, v1?.d[0]
   597  ccmp x15, #0x0, #0x0, al
   598  cset x15, ne
   599  `,
   600  			expectedBytes: "20a8316e0f3c084ee0e940faef079f9a",
   601  		},
   602  		{
   603  			name:        "allTrue 4H",
   604  			arrangement: vecArrangement4H,
   605  			op:          ssa.OpcodeVallTrue,
   606  			expectedAsm: `
   607  uminv s1?, x1.4h
   608  mov x15, v1?.d[0]
   609  ccmp x15, #0x0, #0x0, al
   610  cset x15, ne
   611  `,
   612  			expectedBytes: "20a8712e0f3c084ee0e940faef079f9a",
   613  		},
   614  		{
   615  			name:        "allTrue 8H",
   616  			arrangement: vecArrangement8H,
   617  			op:          ssa.OpcodeVallTrue,
   618  			expectedAsm: `
   619  uminv s1?, x1.8h
   620  mov x15, v1?.d[0]
   621  ccmp x15, #0x0, #0x0, al
   622  cset x15, ne
   623  `,
   624  			expectedBytes: "20a8716e0f3c084ee0e940faef079f9a",
   625  		},
   626  		{
   627  			name:        "allTrue 4S",
   628  			arrangement: vecArrangement4S,
   629  			op:          ssa.OpcodeVallTrue,
   630  			expectedAsm: `
   631  uminv d1?, x1.4s
   632  mov x15, v1?.d[0]
   633  ccmp x15, #0x0, #0x0, al
   634  cset x15, ne
   635  `,
   636  			expectedBytes: "20a8b16e0f3c084ee0e940faef079f9a",
   637  		},
   638  	} {
   639  		t.Run(tc.name, func(t *testing.T) {
   640  			_, _, m := newSetupWithMockContext()
   641  			m.lowerVcheckTrue(tc.op, operandNR(x1VReg), operandNR(x15VReg), tc.arrangement)
   642  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   643  
   644  			m.executableContext.FlushPendingInstructions()
   645  			m.encode(m.executableContext.PerBlockHead)
   646  			buf := m.compiler.Buf()
   647  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   648  		})
   649  	}
   650  }
   651  
   652  func TestMachine_lowerVhighBits(t *testing.T) {
   653  	for _, tc := range []struct {
   654  		name          string
   655  		expectedAsm   string
   656  		arrangement   vecArrangement
   657  		expectedBytes string
   658  	}{
   659  		{
   660  			name:        "16B",
   661  			arrangement: vecArrangement16B,
   662  			expectedAsm: `
   663  sshr v3?.16b, x1.16b, #7
   664  movz x1?, #0x201, lsl 0
   665  movk x1?, #0x804, lsl 16
   666  movk x1?, #0x2010, lsl 32
   667  movk x1?, #0x8040, lsl 48
   668  dup v2?.2d, x1?
   669  and v3?.16b, v3?.16b, v2?.16b
   670  ext v2?.16b, v3?.16b, v3?.16b, #8
   671  zip1 v2?.16b, v3?.16b, v2?.16b
   672  addv s2?, v2?.8h
   673  umov w15, v2?.h[0]
   674  `,
   675  			expectedBytes: "2004094f204080d28000a1f20002c4f20008f0f2000c084e001c204e0040006e0038004e00b8714e0f3c020e",
   676  		},
   677  		{
   678  			name:        "8H",
   679  			arrangement: vecArrangement8H,
   680  			expectedAsm: `
   681  sshr v3?.8h, x1.8h, #15
   682  movz x1?, #0x1, lsl 0
   683  movk x1?, #0x2, lsl 16
   684  movk x1?, #0x4, lsl 32
   685  movk x1?, #0x8, lsl 48
   686  dup v2?.2d, x1?
   687  lsl x1?, x1?, 0x4
   688  ins v2?.d[1], x1?
   689  and v2?.16b, v3?.16b, v2?.16b
   690  addv s2?, v2?.8h
   691  umov w15, v2?.h[0]
   692  `,
   693  			expectedBytes: "2004114f200080d24000a0f28000c0f20001e0f2000c084e00ec7cd3001c184e001c204e00b8714e0f3c020e",
   694  		},
   695  		{
   696  			name:        "4S",
   697  			arrangement: vecArrangement4S,
   698  			expectedAsm: `
   699  sshr v3?.4s, x1.4s, #31
   700  movz x1?, #0x1, lsl 0
   701  movk x1?, #0x2, lsl 32
   702  dup v2?.2d, x1?
   703  lsl x1?, x1?, 0x2
   704  ins v2?.d[1], x1?
   705  and v2?.16b, v3?.16b, v2?.16b
   706  addv d2?, v2?.4s
   707  umov w15, v2?.s[0]
   708  `,
   709  			expectedBytes: "2004214f200080d24000c0f2000c084e00f47ed3001c184e001c204e00b8b14e0f3c040e",
   710  		},
   711  		{
   712  			name:        "2D",
   713  			arrangement: vecArrangement2D,
   714  			expectedAsm: `
   715  mov x15, x1.d[0]
   716  mov x1?, x1.d[1]
   717  lsr x1?, x1?, 0x3f
   718  lsr x15, x15, 0x3f
   719  add w15, w15, w1?, lsl #1
   720  `,
   721  			expectedBytes: "2f3c084e203c184e00fc7fd3effd7fd3ef05000b",
   722  		},
   723  	} {
   724  		t.Run(tc.name, func(t *testing.T) {
   725  			_, _, m := newSetupWithMockContext()
   726  			m.lowerVhighBits(operandNR(x1VReg), operandNR(x15VReg), tc.arrangement)
   727  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   728  
   729  			m.executableContext.FlushPendingInstructions()
   730  			m.encode(m.executableContext.PerBlockHead)
   731  			buf := m.compiler.Buf()
   732  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   733  		})
   734  	}
   735  }
   736  
   737  func TestMachine_lowerShuffle(t *testing.T) {
   738  	for _, tc := range []struct {
   739  		name          string
   740  		lanes         []uint64
   741  		expectedAsm   string
   742  		expectedBytes string
   743  	}{
   744  		{
   745  			name:  "lanes 0..15",
   746  			lanes: []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
   747  			expectedAsm: `
   748  mov v29.16b, x2.16b
   749  mov v30.16b, x15.16b
   750  ldr q1?, #8; b 32; data.v128  0706050403020100 0f0e0d0c0b0a0908
   751  tbl x1.16b, { v29.16b, v30.16b }, v1?.16b
   752  `,
   753  			expectedBytes: "5d1ca24efe1daf4e4000009c05000014000102030405060708090a0b0c0d0e0fa123004e",
   754  		},
   755  		{
   756  			name:  "lanes 0101...",
   757  			lanes: []uint64{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
   758  			expectedAsm: `
   759  mov v29.16b, x2.16b
   760  mov v30.16b, x15.16b
   761  ldr q1?, #8; b 32; data.v128  0100010001000100 0100010001000100
   762  tbl x1.16b, { v29.16b, v30.16b }, v1?.16b
   763  `,
   764  			expectedBytes: "5d1ca24efe1daf4e4000009c0500001400010001000100010001000100010001a123004e",
   765  		},
   766  	} {
   767  		t.Run(tc.name, func(t *testing.T) {
   768  			_, _, m := newSetupWithMockContext()
   769  			lanes := tc.lanes
   770  
   771  			// Encode the 16 bytes as 8 bytes in u1, and 8 bytes in u2.
   772  			lane1 := lanes[7]<<56 | lanes[6]<<48 | lanes[5]<<40 | lanes[4]<<32 | lanes[3]<<24 | lanes[2]<<16 | lanes[1]<<8 | lanes[0]
   773  			lane2 := lanes[15]<<56 | lanes[14]<<48 | lanes[13]<<40 | lanes[12]<<32 | lanes[11]<<24 | lanes[10]<<16 | lanes[9]<<8 | lanes[8]
   774  
   775  			m.lowerShuffle(operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), lane1, lane2)
   776  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   777  
   778  			m.executableContext.FlushPendingInstructions()
   779  			m.encode(m.executableContext.PerBlockHead)
   780  			buf := m.compiler.Buf()
   781  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   782  		})
   783  	}
   784  }
   785  
   786  func TestMachine_lowerVShift(t *testing.T) {
   787  	for _, tc := range []struct {
   788  		name          string
   789  		expectedAsm   string
   790  		op            ssa.Opcode
   791  		arrangement   vecArrangement
   792  		expectedBytes string
   793  	}{
   794  		{
   795  			name:        "VIshl",
   796  			op:          ssa.OpcodeVIshl,
   797  			arrangement: vecArrangement16B,
   798  			expectedAsm: `
   799  and x1?, x15, #0x7
   800  dup v2?.16b, x1?
   801  sshl x1.16b, x2.16b, v2?.16b
   802  `,
   803  			expectedBytes: "e0094092000c014e4144204e",
   804  		},
   805  		{
   806  			name:        "VSshr",
   807  			op:          ssa.OpcodeVSshr,
   808  			arrangement: vecArrangement16B,
   809  			expectedAsm: `
   810  and x1?, x15, #0x7
   811  sub x1?, xzr, x1?
   812  dup v2?.16b, x1?
   813  sshl x1.16b, x2.16b, v2?.16b
   814  `,
   815  			expectedBytes: "e0094092e00300cb000c014e4144204e",
   816  		},
   817  		{
   818  			name:        "VUshr",
   819  			op:          ssa.OpcodeVUshr,
   820  			arrangement: vecArrangement16B,
   821  			expectedAsm: `
   822  and x1?, x15, #0x7
   823  sub x1?, xzr, x1?
   824  dup v2?.16b, x1?
   825  ushl x1.16b, x2.16b, v2?.16b
   826  `,
   827  			expectedBytes: "e0094092e00300cb000c014e4144206e",
   828  		},
   829  	} {
   830  		t.Run(tc.name, func(t *testing.T) {
   831  			_, _, m := newSetupWithMockContext()
   832  			m.lowerVShift(tc.op, operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), tc.arrangement)
   833  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   834  
   835  			m.executableContext.FlushPendingInstructions()
   836  			m.encode(m.executableContext.PerBlockHead)
   837  			buf := m.compiler.Buf()
   838  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   839  		})
   840  	}
   841  }
   842  
   843  func TestMachine_lowerSelectVec(t *testing.T) {
   844  	_, _, m := newSetupWithMockContext()
   845  	c := operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
   846  	rn := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
   847  	rm := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
   848  	rd := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
   849  
   850  	require.Equal(t, 1, int(c.reg().ID()))
   851  	require.Equal(t, 2, int(rn.reg().ID()))
   852  	require.Equal(t, 3, int(rm.reg().ID()))
   853  	require.Equal(t, 4, int(rd.reg().ID()))
   854  
   855  	m.lowerSelectVec(c, rn, rm, rd)
   856  	require.Equal(t, `
   857  subs wzr, w1?, wzr
   858  csetm x5?, ne
   859  dup v6?.2d, x5?
   860  bsl v6?.16b, v2?.16b, v3?.16b
   861  mov v4?.16b, v6?.16b
   862  `, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   863  }
   864  
   865  func TestMachine_lowerFcopysign(t *testing.T) {
   866  	for _, tc := range []struct {
   867  		_64bit bool
   868  		exp    string
   869  	}{
   870  		{
   871  			_64bit: false,
   872  			exp: `
   873  movz w1?, #0x8000, lsl 16
   874  ins v2?.s[0], w1?
   875  mov v6?.8b, v3?.8b
   876  bit v6?.8b, v4?.8b, v2?.8b
   877  mov v5?.8b, v6?.8b
   878  `,
   879  		},
   880  		{
   881  			_64bit: true,
   882  			exp: `
   883  movz x1?, #0x8000, lsl 48
   884  ins v2?.d[0], x1?
   885  mov v6?.8b, v3?.8b
   886  bit v6?.8b, v4?.8b, v2?.8b
   887  mov v5?.8b, v6?.8b
   888  `,
   889  		},
   890  	} {
   891  		t.Run(fmt.Sprintf("64bit=%v", tc._64bit), func(t *testing.T) {
   892  			_, _, m := newSetupWithMockContext()
   893  			var typ, ftyp ssa.Type
   894  			if tc._64bit {
   895  				typ = ssa.TypeI64
   896  				ftyp = ssa.TypeF64
   897  			} else {
   898  				typ = ssa.TypeI32
   899  				ftyp = ssa.TypeF32
   900  			}
   901  			tmpI := operandNR(m.compiler.AllocateVReg(typ))
   902  			tmpF := operandNR(m.compiler.AllocateVReg(ftyp))
   903  			rn := operandNR(m.compiler.AllocateVReg(ftyp))
   904  			rm := operandNR(m.compiler.AllocateVReg(ftyp))
   905  			rd := operandNR(m.compiler.AllocateVReg(ftyp))
   906  
   907  			require.Equal(t, 1, int(tmpI.reg().ID()))
   908  			require.Equal(t, 2, int(tmpF.reg().ID()))
   909  			require.Equal(t, 3, int(rn.reg().ID()))
   910  			require.Equal(t, 4, int(rm.reg().ID()))
   911  			require.Equal(t, 5, int(rd.reg().ID()))
   912  
   913  			m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, tc._64bit)
   914  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   915  		})
   916  	}
   917  }
   918  
   919  func TestMachine_lowerRotl(t *testing.T) {
   920  	for _, tc := range []struct {
   921  		_64bit bool
   922  		exp    string
   923  	}{
   924  		{
   925  			_64bit: false,
   926  			exp: `
   927  sub w1?, wzr, w3?
   928  ror w4?, w2?, w1?
   929  `,
   930  		},
   931  		{
   932  			_64bit: true,
   933  			exp: `
   934  sub x1?, xzr, x3?
   935  ror x4?, x2?, x1?
   936  `,
   937  		},
   938  	} {
   939  		t.Run(fmt.Sprintf("64bit=%v", tc._64bit), func(t *testing.T) {
   940  			_, _, m := newSetupWithMockContext()
   941  			var typ ssa.Type
   942  			if tc._64bit {
   943  				typ = ssa.TypeI64
   944  			} else {
   945  				typ = ssa.TypeI32
   946  			}
   947  			tmpI := operandNR(m.compiler.AllocateVReg(typ))
   948  			rn := operandNR(m.compiler.AllocateVReg(typ))
   949  			rm := operandNR(m.compiler.AllocateVReg(typ))
   950  			rd := operandNR(m.compiler.AllocateVReg(typ))
   951  
   952  			require.Equal(t, 1, int(tmpI.reg().ID()))
   953  			require.Equal(t, 2, int(rn.reg().ID()))
   954  			require.Equal(t, 3, int(rm.reg().ID()))
   955  			require.Equal(t, 4, int(rd.reg().ID()))
   956  
   957  			m.lowerRotlImpl(rd, rn, rm, tmpI, tc._64bit)
   958  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   959  		})
   960  	}
   961  }
   962  
   963  func TestMachine_lowerAtomicRmw(t *testing.T) {
   964  	tests := []struct {
   965  		name      string
   966  		op        atomicRmwOp
   967  		negateArg bool
   968  		flipArg   bool
   969  		_64bit    bool
   970  		size      uint64
   971  		exp       string
   972  	}{
   973  		{
   974  			name: "add 32",
   975  			op:   atomicRmwOpAdd,
   976  			size: 4,
   977  			exp: `
   978  ldaddal w3?, w4?, x2?
   979  `,
   980  		},
   981  		{
   982  			name: "add 32_16u",
   983  			op:   atomicRmwOpAdd,
   984  			size: 2,
   985  			exp: `
   986  ldaddalh w3?, w4?, x2?
   987  `,
   988  		},
   989  		{
   990  			name: "add 32_8u",
   991  			op:   atomicRmwOpAdd,
   992  			size: 1,
   993  			exp: `
   994  ldaddalb w3?, w4?, x2?
   995  `,
   996  		},
   997  		{
   998  			name:   "add 64",
   999  			op:     atomicRmwOpAdd,
  1000  			size:   8,
  1001  			_64bit: true,
  1002  			exp: `
  1003  ldaddal x3?, x4?, x2?
  1004  `,
  1005  		},
  1006  		{
  1007  			name:   "add 64_32u",
  1008  			op:     atomicRmwOpAdd,
  1009  			size:   4,
  1010  			_64bit: true,
  1011  			exp: `
  1012  ldaddal w3?, w4?, x2?
  1013  `,
  1014  		},
  1015  		{
  1016  			name:   "add 64_16u",
  1017  			op:     atomicRmwOpAdd,
  1018  			size:   2,
  1019  			_64bit: true,
  1020  			exp: `
  1021  ldaddalh w3?, w4?, x2?
  1022  `,
  1023  		},
  1024  		{
  1025  			name:   "add 64_8u",
  1026  			op:     atomicRmwOpAdd,
  1027  			size:   1,
  1028  			_64bit: true,
  1029  			exp: `
  1030  ldaddalb w3?, w4?, x2?
  1031  `,
  1032  		},
  1033  		{
  1034  			name:      "sub 32",
  1035  			op:        atomicRmwOpAdd,
  1036  			negateArg: true,
  1037  			size:      4,
  1038  			exp: `
  1039  sub w1?, wzr, w3?
  1040  ldaddal w1?, w4?, x2?
  1041  `,
  1042  		},
  1043  		{
  1044  			name:      "sub 32_16u",
  1045  			op:        atomicRmwOpAdd,
  1046  			negateArg: true,
  1047  			size:      2,
  1048  			exp: `
  1049  sub w1?, wzr, w3?
  1050  ldaddalh w1?, w4?, x2?
  1051  `,
  1052  		},
  1053  		{
  1054  			name:      "sub 32_8u",
  1055  			op:        atomicRmwOpAdd,
  1056  			negateArg: true,
  1057  			size:      1,
  1058  			exp: `
  1059  sub w1?, wzr, w3?
  1060  ldaddalb w1?, w4?, x2?
  1061  `,
  1062  		},
  1063  		{
  1064  			name:      "sub 64",
  1065  			op:        atomicRmwOpAdd,
  1066  			negateArg: true,
  1067  			size:      8,
  1068  			_64bit:    true,
  1069  			exp: `
  1070  sub x1?, xzr, x3?
  1071  ldaddal x1?, x4?, x2?
  1072  `,
  1073  		},
  1074  		{
  1075  			name:      "sub 64_32u",
  1076  			op:        atomicRmwOpAdd,
  1077  			negateArg: true,
  1078  			size:      4,
  1079  			_64bit:    true,
  1080  			exp: `
  1081  sub x1?, xzr, x3?
  1082  ldaddal w1?, w4?, x2?
  1083  `,
  1084  		},
  1085  		{
  1086  			name:      "sub 64_16u",
  1087  			op:        atomicRmwOpAdd,
  1088  			negateArg: true,
  1089  			size:      2,
  1090  			_64bit:    true,
  1091  			exp: `
  1092  sub x1?, xzr, x3?
  1093  ldaddalh w1?, w4?, x2?
  1094  `,
  1095  		},
  1096  		{
  1097  			name:      "sub 64_8u",
  1098  			op:        atomicRmwOpAdd,
  1099  			negateArg: true,
  1100  			size:      1,
  1101  			_64bit:    true,
  1102  			exp: `
  1103  sub x1?, xzr, x3?
  1104  ldaddalb w1?, w4?, x2?
  1105  `,
  1106  		},
  1107  		{
  1108  			name:    "and 32",
  1109  			op:      atomicRmwOpClr,
  1110  			flipArg: true,
  1111  			size:    4,
  1112  			exp: `
  1113  orn w1?, wzr, w3?
  1114  ldclral w1?, w4?, x2?
  1115  `,
  1116  		},
  1117  		{
  1118  			name:    "and 32_16u",
  1119  			op:      atomicRmwOpClr,
  1120  			flipArg: true,
  1121  			size:    2,
  1122  			exp: `
  1123  orn w1?, wzr, w3?
  1124  ldclralh w1?, w4?, x2?
  1125  `,
  1126  		},
  1127  		{
  1128  			name:    "and 32_8u",
  1129  			op:      atomicRmwOpClr,
  1130  			flipArg: true,
  1131  			size:    1,
  1132  			exp: `
  1133  orn w1?, wzr, w3?
  1134  ldclralb w1?, w4?, x2?
  1135  `,
  1136  		},
  1137  		{
  1138  			name:    "and 64",
  1139  			op:      atomicRmwOpClr,
  1140  			flipArg: true,
  1141  			size:    8,
  1142  			_64bit:  true,
  1143  			exp: `
  1144  orn x1?, xzr, x3?
  1145  ldclral x1?, x4?, x2?
  1146  `,
  1147  		},
  1148  		{
  1149  			name:    "and 64_32u",
  1150  			op:      atomicRmwOpClr,
  1151  			flipArg: true,
  1152  			size:    4,
  1153  			_64bit:  true,
  1154  			exp: `
  1155  orn x1?, xzr, x3?
  1156  ldclral w1?, w4?, x2?
  1157  `,
  1158  		},
  1159  		{
  1160  			name:    "and 64_16u",
  1161  			op:      atomicRmwOpClr,
  1162  			flipArg: true,
  1163  			size:    2,
  1164  			_64bit:  true,
  1165  			exp: `
  1166  orn x1?, xzr, x3?
  1167  ldclralh w1?, w4?, x2?
  1168  `,
  1169  		},
  1170  		{
  1171  			name:    "and 64_8u",
  1172  			op:      atomicRmwOpClr,
  1173  			flipArg: true,
  1174  			size:    1,
  1175  			_64bit:  true,
  1176  			exp: `
  1177  orn x1?, xzr, x3?
  1178  ldclralb w1?, w4?, x2?
  1179  `,
  1180  		},
  1181  		{
  1182  			name: "or 32",
  1183  			op:   atomicRmwOpSet,
  1184  			size: 4,
  1185  			exp: `
  1186  ldsetal w3?, w4?, x2?
  1187  `,
  1188  		},
  1189  		{
  1190  			name: "or 32_16u",
  1191  			op:   atomicRmwOpSet,
  1192  			size: 2,
  1193  			exp: `
  1194  ldsetalh w3?, w4?, x2?
  1195  `,
  1196  		},
  1197  		{
  1198  			name: "or 32_8u",
  1199  			op:   atomicRmwOpSet,
  1200  			size: 1,
  1201  			exp: `
  1202  ldsetalb w3?, w4?, x2?
  1203  `,
  1204  		},
  1205  		{
  1206  			name:   "or 64",
  1207  			op:     atomicRmwOpSet,
  1208  			size:   8,
  1209  			_64bit: true,
  1210  			exp: `
  1211  ldsetal x3?, x4?, x2?
  1212  `,
  1213  		},
  1214  		{
  1215  			name:   "or 64_32u",
  1216  			op:     atomicRmwOpSet,
  1217  			size:   4,
  1218  			_64bit: true,
  1219  			exp: `
  1220  ldsetal w3?, w4?, x2?
  1221  `,
  1222  		},
  1223  		{
  1224  			name:   "or 64_16u",
  1225  			op:     atomicRmwOpSet,
  1226  			size:   2,
  1227  			_64bit: true,
  1228  			exp: `
  1229  ldsetalh w3?, w4?, x2?
  1230  `,
  1231  		},
  1232  		{
  1233  			name:   "or 64_8u",
  1234  			op:     atomicRmwOpSet,
  1235  			size:   1,
  1236  			_64bit: true,
  1237  			exp: `
  1238  ldsetalb w3?, w4?, x2?
  1239  `,
  1240  		},
  1241  		{
  1242  			name: "xor 32",
  1243  			op:   atomicRmwOpEor,
  1244  			size: 4,
  1245  			exp: `
  1246  ldeoral w3?, w4?, x2?
  1247  `,
  1248  		},
  1249  		{
  1250  			name: "xor 32_16u",
  1251  			op:   atomicRmwOpEor,
  1252  			size: 2,
  1253  			exp: `
  1254  ldeoralh w3?, w4?, x2?
  1255  `,
  1256  		},
  1257  		{
  1258  			name: "xor 32_8u",
  1259  			op:   atomicRmwOpEor,
  1260  			size: 1,
  1261  			exp: `
  1262  ldeoralb w3?, w4?, x2?
  1263  `,
  1264  		},
  1265  		{
  1266  			name:   "xor 64",
  1267  			op:     atomicRmwOpEor,
  1268  			size:   8,
  1269  			_64bit: true,
  1270  			exp: `
  1271  ldeoral x3?, x4?, x2?
  1272  `,
  1273  		},
  1274  		{
  1275  			name:   "xor 64_32u",
  1276  			op:     atomicRmwOpEor,
  1277  			size:   4,
  1278  			_64bit: true,
  1279  			exp: `
  1280  ldeoral w3?, w4?, x2?
  1281  `,
  1282  		},
  1283  		{
  1284  			name:   "xor 64_16u",
  1285  			op:     atomicRmwOpEor,
  1286  			size:   2,
  1287  			_64bit: true,
  1288  			exp: `
  1289  ldeoralh w3?, w4?, x2?
  1290  `,
  1291  		},
  1292  		{
  1293  			name:   "xor 64_8u",
  1294  			op:     atomicRmwOpEor,
  1295  			size:   1,
  1296  			_64bit: true,
  1297  			exp: `
  1298  ldeoralb w3?, w4?, x2?
  1299  `,
  1300  		},
  1301  		{
  1302  			name: "xchg 32",
  1303  			op:   atomicRmwOpSwp,
  1304  			size: 4,
  1305  			exp: `
  1306  swpal w3?, w4?, x2?
  1307  `,
  1308  		},
  1309  		{
  1310  			name: "xchg 32_16u",
  1311  			op:   atomicRmwOpSwp,
  1312  			size: 2,
  1313  			exp: `
  1314  swpalh w3?, w4?, x2?
  1315  `,
  1316  		},
  1317  		{
  1318  			name: "xchg 32_8u",
  1319  			op:   atomicRmwOpSwp,
  1320  			size: 1,
  1321  			exp: `
  1322  swpalb w3?, w4?, x2?
  1323  `,
  1324  		},
  1325  		{
  1326  			name:   "xchg 64",
  1327  			op:     atomicRmwOpSwp,
  1328  			size:   8,
  1329  			_64bit: true,
  1330  			exp: `
  1331  swpal x3?, x4?, x2?
  1332  `,
  1333  		},
  1334  		{
  1335  			name:   "xchg 64_32u",
  1336  			op:     atomicRmwOpSwp,
  1337  			size:   4,
  1338  			_64bit: true,
  1339  			exp: `
  1340  swpal w3?, w4?, x2?
  1341  `,
  1342  		},
  1343  		{
  1344  			name:   "xchg 64_16u",
  1345  			op:     atomicRmwOpSwp,
  1346  			size:   2,
  1347  			_64bit: true,
  1348  			exp: `
  1349  swpalh w3?, w4?, x2?
  1350  `,
  1351  		},
  1352  		{
  1353  			name:   "xchg 64_8u",
  1354  			op:     atomicRmwOpSwp,
  1355  			size:   1,
  1356  			_64bit: true,
  1357  			exp: `
  1358  swpalb w3?, w4?, x2?
  1359  `,
  1360  		},
  1361  	}
  1362  
  1363  	for _, tc := range tests {
  1364  		tc := tc
  1365  		t.Run(tc.name, func(t *testing.T) {
  1366  			_, _, m := newSetupWithMockContext()
  1367  			var typ ssa.Type
  1368  			if tc._64bit {
  1369  				typ = ssa.TypeI64
  1370  			} else {
  1371  				typ = ssa.TypeI32
  1372  			}
  1373  			tmp := operandNR(m.compiler.AllocateVReg(typ))
  1374  			rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
  1375  			rs := operandNR(m.compiler.AllocateVReg(typ))
  1376  			rt := operandNR(m.compiler.AllocateVReg(typ))
  1377  
  1378  			require.Equal(t, 1, int(tmp.reg().ID()))
  1379  			require.Equal(t, 2, int(rn.reg().ID()))
  1380  			require.Equal(t, 3, int(rs.reg().ID()))
  1381  			require.Equal(t, 4, int(rt.reg().ID()))
  1382  
  1383  			m.lowerAtomicRmwImpl(tc.op, rn, rs, rt, tmp, tc.size, tc.negateArg, tc.flipArg, tc._64bit)
  1384  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
  1385  		})
  1386  	}
  1387  }
  1388  
  1389  func TestMachine_lowerAtomicCas(t *testing.T) {
  1390  	tests := []struct {
  1391  		name   string
  1392  		_64bit bool
  1393  		size   uint64
  1394  		exp    string
  1395  	}{
  1396  		{
  1397  			name: "cas 32",
  1398  			size: 4,
  1399  			exp: `
  1400  casal w2?, w3?, x1?
  1401  `,
  1402  		},
  1403  		{
  1404  			name: "cas 32_16u",
  1405  			size: 2,
  1406  			exp: `
  1407  casalh w2?, w3?, x1?
  1408  `,
  1409  		},
  1410  		{
  1411  			name: "cas 32_8u",
  1412  			size: 1,
  1413  			exp: `
  1414  casalb w2?, w3?, x1?
  1415  `,
  1416  		},
  1417  		{
  1418  			name:   "cas 64",
  1419  			size:   8,
  1420  			_64bit: true,
  1421  			exp: `
  1422  casal x2?, x3?, x1?
  1423  `,
  1424  		},
  1425  		{
  1426  			name:   "cas 64_32u",
  1427  			size:   4,
  1428  			_64bit: true,
  1429  			exp: `
  1430  casal w2?, w3?, x1?
  1431  `,
  1432  		},
  1433  		{
  1434  			name:   "cas 64_16u",
  1435  			size:   2,
  1436  			_64bit: true,
  1437  			exp: `
  1438  casalh w2?, w3?, x1?
  1439  `,
  1440  		},
  1441  		{
  1442  			name:   "cas 64_8u",
  1443  			size:   1,
  1444  			_64bit: true,
  1445  			exp: `
  1446  casalb w2?, w3?, x1?
  1447  `,
  1448  		},
  1449  	}
  1450  
  1451  	for _, tc := range tests {
  1452  		tc := tc
  1453  		t.Run(tc.name, func(t *testing.T) {
  1454  			_, _, m := newSetupWithMockContext()
  1455  			var typ ssa.Type
  1456  			if tc._64bit {
  1457  				typ = ssa.TypeI64
  1458  			} else {
  1459  				typ = ssa.TypeI32
  1460  			}
  1461  			rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
  1462  			rs := operandNR(m.compiler.AllocateVReg(typ))
  1463  			rt := operandNR(m.compiler.AllocateVReg(typ))
  1464  
  1465  			require.Equal(t, 1, int(rn.reg().ID()))
  1466  			require.Equal(t, 2, int(rs.reg().ID()))
  1467  			require.Equal(t, 3, int(rt.reg().ID()))
  1468  
  1469  			m.lowerAtomicCasImpl(rn, rs, rt, tc.size)
  1470  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
  1471  		})
  1472  	}
  1473  }
  1474  
  1475  func TestMachine_lowerAtomicLoad(t *testing.T) {
  1476  	tests := []struct {
  1477  		name   string
  1478  		_64bit bool
  1479  		size   uint64
  1480  		exp    string
  1481  	}{
  1482  		{
  1483  			name: "load 32",
  1484  			size: 4,
  1485  			exp: `
  1486  ldar w2?, x1?
  1487  `,
  1488  		},
  1489  		{
  1490  			name: "load 32_16u",
  1491  			size: 2,
  1492  			exp: `
  1493  ldarh w2?, x1?
  1494  `,
  1495  		},
  1496  		{
  1497  			name: "load 32_8u",
  1498  			size: 1,
  1499  			exp: `
  1500  ldarb w2?, x1?
  1501  `,
  1502  		},
  1503  		{
  1504  			name:   "load 64",
  1505  			size:   8,
  1506  			_64bit: true,
  1507  			exp: `
  1508  ldar x2?, x1?
  1509  `,
  1510  		},
  1511  		{
  1512  			name:   "load 64_32u",
  1513  			size:   4,
  1514  			_64bit: true,
  1515  			exp: `
  1516  ldar w2?, x1?
  1517  `,
  1518  		},
  1519  		{
  1520  			name:   "load 64_16u",
  1521  			size:   2,
  1522  			_64bit: true,
  1523  			exp: `
  1524  ldarh w2?, x1?
  1525  `,
  1526  		},
  1527  		{
  1528  			name:   "load 64_8u",
  1529  			size:   1,
  1530  			_64bit: true,
  1531  			exp: `
  1532  ldarb w2?, x1?
  1533  `,
  1534  		},
  1535  	}
  1536  
  1537  	for _, tc := range tests {
  1538  		tc := tc
  1539  		t.Run(tc.name, func(t *testing.T) {
  1540  			_, _, m := newSetupWithMockContext()
  1541  			var typ ssa.Type
  1542  			if tc._64bit {
  1543  				typ = ssa.TypeI64
  1544  			} else {
  1545  				typ = ssa.TypeI32
  1546  			}
  1547  			rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
  1548  			rt := operandNR(m.compiler.AllocateVReg(typ))
  1549  
  1550  			require.Equal(t, 1, int(rn.reg().ID()))
  1551  			require.Equal(t, 2, int(rt.reg().ID()))
  1552  
  1553  			m.lowerAtomicLoadImpl(rn, rt, tc.size)
  1554  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
  1555  		})
  1556  	}
  1557  }
  1558  
  1559  func TestMachine_lowerAtomicStore(t *testing.T) {
  1560  	tests := []struct {
  1561  		name   string
  1562  		_64bit bool
  1563  		size   uint64
  1564  		exp    string
  1565  	}{
  1566  		{
  1567  			name: "store 32",
  1568  			size: 4,
  1569  			exp: `
  1570  stlr w2?, x1?
  1571  `,
  1572  		},
  1573  		{
  1574  			name: "store 32_16u",
  1575  			size: 2,
  1576  			exp: `
  1577  stlrh w2?, x1?
  1578  `,
  1579  		},
  1580  		{
  1581  			name: "store 32_8u",
  1582  			size: 1,
  1583  			exp: `
  1584  stlrb w2?, x1?
  1585  `,
  1586  		},
  1587  		{
  1588  			name:   "store 64",
  1589  			size:   8,
  1590  			_64bit: true,
  1591  			exp: `
  1592  stlr x2?, x1?
  1593  `,
  1594  		},
  1595  		{
  1596  			name:   "store 64_32u",
  1597  			size:   4,
  1598  			_64bit: true,
  1599  			exp: `
  1600  stlr w2?, x1?
  1601  `,
  1602  		},
  1603  		{
  1604  			name:   "store 64_16u",
  1605  			size:   2,
  1606  			_64bit: true,
  1607  			exp: `
  1608  stlrh w2?, x1?
  1609  `,
  1610  		},
  1611  		{
  1612  			name:   "store 64_8u",
  1613  			size:   1,
  1614  			_64bit: true,
  1615  			exp: `
  1616  stlrb w2?, x1?
  1617  `,
  1618  		},
  1619  	}
  1620  
  1621  	for _, tc := range tests {
  1622  		tc := tc
  1623  		t.Run(tc.name, func(t *testing.T) {
  1624  			_, _, m := newSetupWithMockContext()
  1625  			var typ ssa.Type
  1626  			if tc._64bit {
  1627  				typ = ssa.TypeI64
  1628  			} else {
  1629  				typ = ssa.TypeI32
  1630  			}
  1631  			rn := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
  1632  			rt := operandNR(m.compiler.AllocateVReg(typ))
  1633  
  1634  			require.Equal(t, 1, int(rn.reg().ID()))
  1635  			require.Equal(t, 2, int(rt.reg().ID()))
  1636  
  1637  			m.lowerAtomicStoreImpl(rn, rt, tc.size)
  1638  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
  1639  		})
  1640  	}
  1641  }