github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/lower_instr_test.go (about)

     1  package arm64
     2  
     3  import (
     4  	"encoding/hex"
     5  	"fmt"
     6  	"strings"
     7  	"testing"
     8  
     9  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/backend"
    10  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc"
    11  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa"
    12  	"github.com/bananabytelabs/wazero/internal/engine/wazevo/wazevoapi"
    13  	"github.com/bananabytelabs/wazero/internal/testing/require"
    14  )
    15  
    16  func TestMachine_LowerConditionalBranch(t *testing.T) {
    17  	cmpInSameGroupFromParams := func(
    18  		brz bool, intCond ssa.IntegerCmpCond, floatCond ssa.FloatCmpCond,
    19  		ctx *mockCompiler, builder ssa.Builder, m *machine,
    20  	) (instr *ssa.Instruction, verify func(t *testing.T)) {
    21  		m.executableContext.StartLoweringFunction(10)
    22  		entry := builder.CurrentBlock()
    23  		isInt := intCond != ssa.IntegerCmpCondInvalid
    24  
    25  		var val1, val2 ssa.Value
    26  		if isInt {
    27  			val1 = entry.AddParam(builder, ssa.TypeI64)
    28  			val2 = entry.AddParam(builder, ssa.TypeI64)
    29  			ctx.vRegMap[val1], ctx.vRegMap[val2] = regToVReg(x1).SetRegType(regalloc.RegTypeInt), regToVReg(x2).SetRegType(regalloc.RegTypeInt)
    30  		} else {
    31  			val1 = entry.AddParam(builder, ssa.TypeF64)
    32  			val2 = entry.AddParam(builder, ssa.TypeF64)
    33  			ctx.vRegMap[val1], ctx.vRegMap[val2] = regToVReg(v1).SetRegType(regalloc.RegTypeFloat), regToVReg(v2).SetRegType(regalloc.RegTypeFloat)
    34  		}
    35  
    36  		var cmpInstr *ssa.Instruction
    37  		if isInt {
    38  			cmpInstr = builder.AllocateInstruction()
    39  			cmpInstr.AsIcmp(val1, val2, intCond)
    40  			builder.InsertInstruction(cmpInstr)
    41  		} else {
    42  			cmpInstr = builder.AllocateInstruction()
    43  			cmpInstr.AsFcmp(val1, val2, floatCond)
    44  			builder.InsertInstruction(cmpInstr)
    45  		}
    46  
    47  		cmpVal := cmpInstr.Return()
    48  		ctx.vRegMap[cmpVal] = 3
    49  
    50  		ctx.definitions[val1] = &backend.SSAValueDefinition{BlkParamVReg: ctx.vRegMap[val1], BlockParamValue: val1}
    51  		ctx.definitions[val2] = &backend.SSAValueDefinition{BlkParamVReg: ctx.vRegMap[val2], BlockParamValue: val2}
    52  		ctx.definitions[cmpVal] = &backend.SSAValueDefinition{Instr: cmpInstr}
    53  		b := builder.AllocateInstruction()
    54  		if brz {
    55  			b.AsBrz(cmpVal, nil, builder.AllocateBasicBlock())
    56  		} else {
    57  			b.AsBrnz(cmpVal, nil, builder.AllocateBasicBlock())
    58  		}
    59  		builder.InsertInstruction(b)
    60  		return b, func(t *testing.T) {
    61  			require.True(t, cmpInstr.Lowered())
    62  		}
    63  	}
    64  
    65  	icmpInSameGroupFromParamAndImm12 := func(brz bool, ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
    66  		m.executableContext.StartLoweringFunction(10)
    67  		entry := builder.CurrentBlock()
    68  		v1 := entry.AddParam(builder, ssa.TypeI32)
    69  
    70  		iconst := builder.AllocateInstruction()
    71  		iconst.AsIconst32(0x4d2)
    72  		builder.InsertInstruction(iconst)
    73  		v2 := iconst.Return()
    74  
    75  		// Constant can be referenced from different groups because we inline it.
    76  		builder.SetCurrentBlock(builder.AllocateBasicBlock())
    77  
    78  		icmp := builder.AllocateInstruction()
    79  		icmp.AsIcmp(v1, v2, ssa.IntegerCmpCondEqual)
    80  		builder.InsertInstruction(icmp)
    81  		icmpVal := icmp.Return()
    82  		ctx.definitions[v1] = &backend.SSAValueDefinition{BlkParamVReg: intToVReg(1), BlockParamValue: v1}
    83  		ctx.definitions[v2] = &backend.SSAValueDefinition{Instr: iconst}
    84  		ctx.definitions[icmpVal] = &backend.SSAValueDefinition{Instr: icmp}
    85  		ctx.vRegMap[v1], ctx.vRegMap[v2], ctx.vRegMap[icmpVal] = intToVReg(1), intToVReg(2), intToVReg(3)
    86  		b := builder.AllocateInstruction()
    87  		if brz {
    88  			b.AsBrz(icmpVal, nil, builder.AllocateBasicBlock())
    89  		} else {
    90  			b.AsBrnz(icmpVal, nil, builder.AllocateBasicBlock())
    91  		}
    92  		builder.InsertInstruction(b)
    93  		return b, func(t *testing.T) {
    94  			require.True(t, icmp.Lowered())
    95  		}
    96  	}
    97  
    98  	for _, tc := range []struct {
    99  		name         string
   100  		setup        func(*mockCompiler, ssa.Builder, *machine) (instr *ssa.Instruction, verify func(t *testing.T))
   101  		instructions []string
   102  	}{
   103  		{
   104  			name: "icmp in different group",
   105  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   106  				m.executableContext.StartLoweringFunction(10)
   107  				entry := builder.CurrentBlock()
   108  				v1, v2 := entry.AddParam(builder, ssa.TypeI64), entry.AddParam(builder, ssa.TypeI64)
   109  
   110  				icmp := builder.AllocateInstruction()
   111  				icmp.AsIcmp(v1, v2, ssa.IntegerCmpCondEqual)
   112  				builder.InsertInstruction(icmp)
   113  				icmpVal := icmp.Return()
   114  				ctx.definitions[icmpVal] = &backend.SSAValueDefinition{Instr: icmp}
   115  				ctx.vRegMap[v1], ctx.vRegMap[v2], ctx.vRegMap[icmpVal] = intToVReg(1), intToVReg(2), intToVReg(3)
   116  
   117  				brz := builder.AllocateInstruction()
   118  				brz.AsBrz(icmpVal, nil, builder.AllocateBasicBlock())
   119  				builder.InsertInstruction(brz)
   120  
   121  				// Indicate that currently compiling in the different group.
   122  				ctx.currentGID = 1000
   123  				return brz, func(t *testing.T) {
   124  					require.False(t, icmp.Lowered())
   125  				}
   126  			},
   127  			instructions: []string{"cbz w3?, (L1)"},
   128  		},
   129  		{
   130  			name: "brz / icmp in the same group / params",
   131  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   132  				return cmpInSameGroupFromParams(true, ssa.IntegerCmpCondUnsignedGreaterThan, ssa.FloatCmpCondInvalid, ctx, builder, m)
   133  			},
   134  			instructions: []string{
   135  				"subs xzr, x1, x2",
   136  				"b.ls L1",
   137  			},
   138  		},
   139  		{
   140  			name: "brnz / icmp in the same group / params",
   141  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   142  				return cmpInSameGroupFromParams(false, ssa.IntegerCmpCondEqual, ssa.FloatCmpCondInvalid, ctx, builder, m)
   143  			},
   144  			instructions: []string{
   145  				"subs xzr, x1, x2",
   146  				"b.eq L1",
   147  			},
   148  		},
   149  		{
   150  			name: "brz / fcmp in the same group / params",
   151  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   152  				return cmpInSameGroupFromParams(true, ssa.IntegerCmpCondInvalid, ssa.FloatCmpCondEqual, ctx, builder, m)
   153  			},
   154  			instructions: []string{
   155  				"fcmp d1, d2",
   156  				"b.ne L1",
   157  			},
   158  		},
   159  		{
   160  			name: "brnz / fcmp in the same group / params",
   161  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   162  				return cmpInSameGroupFromParams(false, ssa.IntegerCmpCondInvalid, ssa.FloatCmpCondGreaterThan, ctx, builder, m)
   163  			},
   164  			instructions: []string{
   165  				"fcmp d1, d2",
   166  				"b.gt L1",
   167  			},
   168  		},
   169  		{
   170  			name: "brz / icmp in the same group / params",
   171  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   172  				return icmpInSameGroupFromParamAndImm12(true, ctx, builder, m)
   173  			},
   174  			instructions: []string{
   175  				"subs wzr, w1?, #0x4d2",
   176  				"b.ne L1",
   177  			},
   178  		},
   179  		{
   180  			name: "brz / icmp in the same group / params",
   181  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction, verify func(t *testing.T)) {
   182  				return icmpInSameGroupFromParamAndImm12(false, ctx, builder, m)
   183  			},
   184  			instructions: []string{
   185  				"subs wzr, w1?, #0x4d2",
   186  				"b.eq L1",
   187  			},
   188  		},
   189  	} {
   190  		t.Run(tc.name, func(t *testing.T) {
   191  			ctx, b, m := newSetupWithMockContext()
   192  			instr, verify := tc.setup(ctx, b, m)
   193  			m.LowerConditionalBranch(instr)
   194  			verify(t)
   195  			require.Equal(t, strings.Join(tc.instructions, "\n"),
   196  				formatEmittedInstructionsInCurrentBlock(m))
   197  		})
   198  	}
   199  }
   200  
   201  func TestMachine_LowerSingleBranch(t *testing.T) {
   202  	for _, tc := range []struct {
   203  		name         string
   204  		setup        func(*mockCompiler, ssa.Builder, *machine) (instr *ssa.Instruction)
   205  		instructions []string
   206  	}{
   207  		{
   208  			name: "jump-fallthrough",
   209  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction) {
   210  				jump := builder.AllocateInstruction()
   211  				jump.AsJump(nil, builder.AllocateBasicBlock())
   212  				builder.InsertInstruction(jump)
   213  				jump.AsFallthroughJump()
   214  				return jump
   215  			},
   216  			instructions: []string{}, // Fallthrough jump should be optimized out.
   217  		},
   218  		{
   219  			name: "b",
   220  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction) {
   221  				m.executableContext.StartLoweringFunction(10)
   222  				jump := builder.AllocateInstruction()
   223  				jump.AsJump(nil, builder.AllocateBasicBlock())
   224  				builder.InsertInstruction(jump)
   225  				return jump
   226  			},
   227  			instructions: []string{"b L1"},
   228  		},
   229  		{
   230  			name: "ret",
   231  			setup: func(ctx *mockCompiler, builder ssa.Builder, m *machine) (instr *ssa.Instruction) {
   232  				m.executableContext.StartLoweringFunction(10)
   233  				jump := builder.AllocateInstruction()
   234  				jump.AsJump(nil, builder.ReturnBlock())
   235  				builder.InsertInstruction(jump)
   236  				return jump
   237  			},
   238  			// Jump which targets the return block should be translated as "ret".
   239  			instructions: []string{"ret"},
   240  		},
   241  	} {
   242  		tc := tc
   243  		t.Run(tc.name, func(t *testing.T) {
   244  			ctx, b, m := newSetupWithMockContext()
   245  			instr := tc.setup(ctx, b, m)
   246  			m.LowerSingleBranch(instr)
   247  			require.Equal(t, strings.Join(tc.instructions, "\n"), formatEmittedInstructionsInCurrentBlock(m))
   248  		})
   249  	}
   250  }
   251  
   252  func TestMachine_InsertMove(t *testing.T) {
   253  	for _, tc := range []struct {
   254  		name        string
   255  		src, dst    regalloc.VReg
   256  		typ         ssa.Type
   257  		instruction string
   258  	}{
   259  		{
   260  			name:        "int",
   261  			src:         regalloc.VReg(1).SetRegType(regalloc.RegTypeInt),
   262  			dst:         regalloc.VReg(2).SetRegType(regalloc.RegTypeInt),
   263  			instruction: "mov x1?, x2?",
   264  			typ:         ssa.TypeI64,
   265  		},
   266  		{
   267  			name:        "float",
   268  			src:         regalloc.VReg(1).SetRegType(regalloc.RegTypeFloat),
   269  			dst:         regalloc.VReg(2).SetRegType(regalloc.RegTypeFloat),
   270  			instruction: "mov v1?.8b, v2?.8b",
   271  			typ:         ssa.TypeF64,
   272  		},
   273  		{
   274  			name:        "vector",
   275  			src:         regalloc.VReg(1).SetRegType(regalloc.RegTypeFloat),
   276  			dst:         regalloc.VReg(2).SetRegType(regalloc.RegTypeFloat),
   277  			instruction: "mov v1?.16b, v2?.16b",
   278  			typ:         ssa.TypeV128,
   279  		},
   280  	} {
   281  		t.Run(tc.name, func(t *testing.T) {
   282  			_, _, m := newSetupWithMockContext()
   283  			m.InsertMove(tc.src, tc.dst, tc.typ)
   284  			require.Equal(t, tc.instruction, formatEmittedInstructionsInCurrentBlock(m))
   285  		})
   286  	}
   287  }
   288  
   289  func TestMachine_lowerIDiv(t *testing.T) {
   290  	for _, tc := range []struct {
   291  		name   string
   292  		_64bit bool
   293  		signed bool
   294  		exp    string
   295  	}{
   296  		{
   297  			name: "32bit unsigned", _64bit: false, signed: false,
   298  			exp: `
   299  udiv w1?, w2?, w3?
   300  mov x1?, x65535?
   301  cbnz w3?, L1
   302  movz x2?, #0xa, lsl 0
   303  str w2?, [x1?]
   304  mov x3?, sp
   305  str x3?, [x1?, #0x38]
   306  adr x4?, #0x0
   307  str x4?, [x1?, #0x30]
   308  exit_sequence x1?
   309  L1:
   310  `,
   311  		},
   312  		{name: "32bit signed", _64bit: false, signed: true, exp: `
   313  sdiv w1?, w2?, w3?
   314  mov x1?, x65535?
   315  cbnz w3?, L1
   316  movz x2?, #0xa, lsl 0
   317  str w2?, [x1?]
   318  mov x3?, sp
   319  str x3?, [x1?, #0x38]
   320  adr x4?, #0x0
   321  str x4?, [x1?, #0x30]
   322  exit_sequence x1?
   323  L1:
   324  adds wzr, w3?, #0x1
   325  ccmp w2?, #0x1, #0x0, eq
   326  mov x5?, x65535?
   327  b.vc L2
   328  movz x6?, #0xb, lsl 0
   329  str w6?, [x5?]
   330  mov x7?, sp
   331  str x7?, [x5?, #0x38]
   332  adr x8?, #0x0
   333  str x8?, [x5?, #0x30]
   334  exit_sequence x5?
   335  L2:
   336  `},
   337  		{name: "64bit unsigned", _64bit: true, signed: false, exp: `
   338  udiv x1?, x2?, x3?
   339  mov x1?, x65535?
   340  cbnz x3?, L1
   341  movz x2?, #0xa, lsl 0
   342  str w2?, [x1?]
   343  mov x3?, sp
   344  str x3?, [x1?, #0x38]
   345  adr x4?, #0x0
   346  str x4?, [x1?, #0x30]
   347  exit_sequence x1?
   348  L1:
   349  `},
   350  		{name: "64bit signed", _64bit: true, signed: true, exp: `
   351  sdiv x1?, x2?, x3?
   352  mov x1?, x65535?
   353  cbnz x3?, L1
   354  movz x2?, #0xa, lsl 0
   355  str w2?, [x1?]
   356  mov x3?, sp
   357  str x3?, [x1?, #0x38]
   358  adr x4?, #0x0
   359  str x4?, [x1?, #0x30]
   360  exit_sequence x1?
   361  L1:
   362  adds xzr, x3?, #0x1
   363  ccmp x2?, #0x1, #0x0, eq
   364  mov x5?, x65535?
   365  b.vc L2
   366  movz x6?, #0xb, lsl 0
   367  str w6?, [x5?]
   368  mov x7?, sp
   369  str x7?, [x5?, #0x38]
   370  adr x8?, #0x0
   371  str x8?, [x5?, #0x30]
   372  exit_sequence x5?
   373  L2:
   374  `},
   375  	} {
   376  		t.Run(tc.name, func(t *testing.T) {
   377  			execCtx := regalloc.VReg(0xffff).SetRegType(regalloc.RegTypeInt)
   378  			rd, rn, rm := regalloc.VReg(1).SetRegType(regalloc.RegTypeInt),
   379  				regalloc.VReg(2).SetRegType(regalloc.RegTypeInt),
   380  				regalloc.VReg(3).SetRegType(regalloc.RegTypeInt)
   381  			mc, _, m := newSetupWithMockContext()
   382  			mc.typeOf = map[regalloc.VRegID]ssa.Type{execCtx.ID(): ssa.TypeI64, 2: ssa.TypeI64, 3: ssa.TypeI64}
   383  			m.lowerIDiv(execCtx, operandNR(rd), operandNR(rn), operandNR(rm), tc._64bit, tc.signed)
   384  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   385  		})
   386  	}
   387  }
   388  
   389  func TestMachine_exitWithCode(t *testing.T) {
   390  	_, _, m := newSetupWithMockContext()
   391  	m.lowerExitWithCode(x1VReg, wazevoapi.ExitCodeGrowStack)
   392  	m.executableContext.FlushPendingInstructions()
   393  	m.encode(m.executableContext.PerBlockHead)
   394  	require.Equal(t, `
   395  movz x1?, #0x1, lsl 0
   396  str w1?, [x1]
   397  mov x2?, sp
   398  str x2?, [x1, #0x38]
   399  adr x3?, #0x0
   400  str x3?, [x1, #0x30]
   401  exit_sequence x1
   402  `, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   403  }
   404  
   405  func TestMachine_lowerFpuToInt(t *testing.T) {
   406  	for _, tc := range []struct {
   407  		name        string
   408  		nontrapping bool
   409  		expectedAsm string
   410  	}{
   411  		{
   412  			name:        "trapping",
   413  			nontrapping: false,
   414  			expectedAsm: `
   415  msr fpsr, xzr
   416  fcvtzu w1, s2
   417  mrs x1? fpsr
   418  mov x2?, x15
   419  mov x3?, d2
   420  subs xzr, x1?, #0x1
   421  b.ne L2
   422  fcmp w3?, w3?
   423  mov x4?, x2?
   424  b.vc L1
   425  movz x5?, #0xc, lsl 0
   426  str w5?, [x4?]
   427  mov x6?, sp
   428  str x6?, [x4?, #0x38]
   429  adr x7?, #0x0
   430  str x7?, [x4?, #0x30]
   431  exit_sequence x4?
   432  L1:
   433  movz x8?, #0xb, lsl 0
   434  str w8?, [x2?]
   435  mov x9?, sp
   436  str x9?, [x2?, #0x38]
   437  adr x10?, #0x0
   438  str x10?, [x2?, #0x30]
   439  exit_sequence x2?
   440  L2:
   441  `,
   442  		},
   443  		{
   444  			name:        "nontrapping",
   445  			nontrapping: true,
   446  			expectedAsm: `
   447  fcvtzu w1, s2
   448  `,
   449  		},
   450  	} {
   451  		t.Run(tc.name, func(t *testing.T) {
   452  			mc, _, m := newSetupWithMockContext()
   453  			mc.typeOf = map[regalloc.VRegID]ssa.Type{v2VReg.ID(): ssa.TypeI64, x15VReg.ID(): ssa.TypeI64}
   454  			m.lowerFpuToInt(operandNR(x1VReg), operandNR(v2VReg), x15VReg, false, false, false, tc.nontrapping)
   455  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   456  
   457  			m.executableContext.FlushPendingInstructions()
   458  			m.encode(m.executableContext.PerBlockHead)
   459  		})
   460  	}
   461  }
   462  
   463  func TestMachine_lowerVIMul(t *testing.T) {
   464  	for _, tc := range []struct {
   465  		name          string
   466  		expectedAsm   string
   467  		arrangement   vecArrangement
   468  		expectedBytes string
   469  	}{
   470  		{
   471  			name:        "2D",
   472  			arrangement: vecArrangement2D,
   473  			expectedAsm: `
   474  rev64 v2?.4s, x15.4s
   475  mul v2?.4s, v2?.4s, x2.4s
   476  xtn v1?.2s, x2.2s
   477  addp v2?.4s, v2?.4s, v2?.4s
   478  xtn v3?.2s, x15.2s
   479  shll v4?.2s, v2?.2s
   480  umlal v4?.2s, v3?.2s, v1?.2s
   481  mov x1.16b, v4?.16b
   482  `,
   483  			expectedBytes: "e009a04e009ca24e4028a10e00bca04ee029a10e0038a12e0080a02e011ca04e",
   484  		},
   485  		{
   486  			name:        "8B",
   487  			arrangement: vecArrangement8B,
   488  			expectedAsm: `
   489  mul x1.8b, x2.8b, x15.8b
   490  `,
   491  			expectedBytes: "419c2f0e",
   492  		},
   493  		{
   494  			name:        "16B",
   495  			arrangement: vecArrangement16B,
   496  			expectedAsm: `
   497  mul x1.16b, x2.16b, x15.16b
   498  `,
   499  			expectedBytes: "419c2f4e",
   500  		},
   501  		{
   502  			name:        "4H",
   503  			arrangement: vecArrangement4H,
   504  			expectedAsm: `
   505  mul x1.4h, x2.4h, x15.4h
   506  `,
   507  			expectedBytes: "419c6f0e",
   508  		},
   509  		{
   510  			name:        "8H",
   511  			arrangement: vecArrangement8H,
   512  			expectedAsm: `
   513  mul x1.8h, x2.8h, x15.8h
   514  `,
   515  			expectedBytes: "419c6f4e",
   516  		},
   517  		{
   518  			name:        "2S",
   519  			arrangement: vecArrangement2S,
   520  			expectedAsm: `
   521  mul x1.2s, x2.2s, x15.2s
   522  `,
   523  			expectedBytes: "419caf0e",
   524  		},
   525  		{
   526  			name:        "4S",
   527  			arrangement: vecArrangement4S,
   528  			expectedAsm: `
   529  mul x1.4s, x2.4s, x15.4s
   530  `,
   531  			expectedBytes: "419caf4e",
   532  		},
   533  	} {
   534  		t.Run(tc.name, func(t *testing.T) {
   535  			_, _, m := newSetupWithMockContext()
   536  			m.lowerVIMul(operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), tc.arrangement)
   537  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   538  
   539  			m.executableContext.FlushPendingInstructions()
   540  			m.encode(m.executableContext.PerBlockHead)
   541  			buf := m.compiler.Buf()
   542  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   543  		})
   544  	}
   545  }
   546  
   547  func TestMachine_lowerVcheckTrue(t *testing.T) {
   548  	for _, tc := range []struct {
   549  		name          string
   550  		op            ssa.Opcode
   551  		expectedAsm   string
   552  		arrangement   vecArrangement
   553  		expectedBytes string
   554  	}{
   555  		{
   556  			name: "anyTrue",
   557  			op:   ssa.OpcodeVanyTrue,
   558  			expectedAsm: `
   559  umaxp v1?.16b, x1.16b, x1.16b
   560  mov x15, v1?.d[0]
   561  ccmp x15, #0x0, #0x0, al
   562  cset x15, ne
   563  `,
   564  			expectedBytes: "20a4216e0f3c084ee0e940faef079f9a",
   565  		},
   566  		{
   567  			name:        "allTrue 2D",
   568  			op:          ssa.OpcodeVallTrue,
   569  			arrangement: vecArrangement2D,
   570  			expectedAsm: `
   571  cmeq v1?.2d, x1.2d, #0
   572  addp v1?.2d, v1?.2d, v1?.2d
   573  fcmp d1?, d1?
   574  cset x15, eq
   575  `,
   576  			expectedBytes: "2098e04e00bce04e0020601eef179f9a",
   577  		},
   578  		{
   579  			name:        "allTrue 8B",
   580  			arrangement: vecArrangement8B,
   581  			op:          ssa.OpcodeVallTrue,
   582  			expectedAsm: `
   583  uminv h1?, x1.8b
   584  mov x15, v1?.d[0]
   585  ccmp x15, #0x0, #0x0, al
   586  cset x15, ne
   587  `,
   588  			expectedBytes: "20a8312e0f3c084ee0e940faef079f9a",
   589  		},
   590  		{
   591  			name:        "allTrue 16B",
   592  			arrangement: vecArrangement16B,
   593  			op:          ssa.OpcodeVallTrue,
   594  			expectedAsm: `
   595  uminv h1?, x1.16b
   596  mov x15, v1?.d[0]
   597  ccmp x15, #0x0, #0x0, al
   598  cset x15, ne
   599  `,
   600  			expectedBytes: "20a8316e0f3c084ee0e940faef079f9a",
   601  		},
   602  		{
   603  			name:        "allTrue 4H",
   604  			arrangement: vecArrangement4H,
   605  			op:          ssa.OpcodeVallTrue,
   606  			expectedAsm: `
   607  uminv s1?, x1.4h
   608  mov x15, v1?.d[0]
   609  ccmp x15, #0x0, #0x0, al
   610  cset x15, ne
   611  `,
   612  			expectedBytes: "20a8712e0f3c084ee0e940faef079f9a",
   613  		},
   614  		{
   615  			name:        "allTrue 8H",
   616  			arrangement: vecArrangement8H,
   617  			op:          ssa.OpcodeVallTrue,
   618  			expectedAsm: `
   619  uminv s1?, x1.8h
   620  mov x15, v1?.d[0]
   621  ccmp x15, #0x0, #0x0, al
   622  cset x15, ne
   623  `,
   624  			expectedBytes: "20a8716e0f3c084ee0e940faef079f9a",
   625  		},
   626  		{
   627  			name:        "allTrue 4S",
   628  			arrangement: vecArrangement4S,
   629  			op:          ssa.OpcodeVallTrue,
   630  			expectedAsm: `
   631  uminv d1?, x1.4s
   632  mov x15, v1?.d[0]
   633  ccmp x15, #0x0, #0x0, al
   634  cset x15, ne
   635  `,
   636  			expectedBytes: "20a8b16e0f3c084ee0e940faef079f9a",
   637  		},
   638  	} {
   639  		t.Run(tc.name, func(t *testing.T) {
   640  			_, _, m := newSetupWithMockContext()
   641  			m.lowerVcheckTrue(tc.op, operandNR(x1VReg), operandNR(x15VReg), tc.arrangement)
   642  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   643  
   644  			m.executableContext.FlushPendingInstructions()
   645  			m.encode(m.executableContext.PerBlockHead)
   646  			buf := m.compiler.Buf()
   647  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   648  		})
   649  	}
   650  }
   651  
   652  func TestMachine_lowerVhighBits(t *testing.T) {
   653  	for _, tc := range []struct {
   654  		name          string
   655  		expectedAsm   string
   656  		arrangement   vecArrangement
   657  		expectedBytes string
   658  	}{
   659  		{
   660  			name:        "16B",
   661  			arrangement: vecArrangement16B,
   662  			expectedAsm: `
   663  sshr v3?.16b, x1.16b, #7
   664  movz x1?, #0x201, lsl 0
   665  movk x1?, #0x804, lsl 16
   666  movk x1?, #0x2010, lsl 32
   667  movk x1?, #0x8040, lsl 48
   668  dup v2?.2d, x1?
   669  and v3?.16b, v3?.16b, v2?.16b
   670  ext v2?.16b, v3?.16b, v3?.16b, #8
   671  zip1 v2?.16b, v3?.16b, v2?.16b
   672  addv s2?, v2?.8h
   673  umov w15, v2?.h[0]
   674  `,
   675  			expectedBytes: "2004094f204080d28000a1f20002c4f20008f0f2000c084e001c204e0040006e0038004e00b8714e0f3c020e",
   676  		},
   677  		{
   678  			name:        "8H",
   679  			arrangement: vecArrangement8H,
   680  			expectedAsm: `
   681  sshr v3?.8h, x1.8h, #15
   682  movz x1?, #0x1, lsl 0
   683  movk x1?, #0x2, lsl 16
   684  movk x1?, #0x4, lsl 32
   685  movk x1?, #0x8, lsl 48
   686  dup v2?.2d, x1?
   687  lsl x1?, x1?, 0x4
   688  ins v2?.d[1], x1?
   689  and v2?.16b, v3?.16b, v2?.16b
   690  addv s2?, v2?.8h
   691  umov w15, v2?.h[0]
   692  `,
   693  			expectedBytes: "2004114f200080d24000a0f28000c0f20001e0f2000c084e00ec7cd3001c184e001c204e00b8714e0f3c020e",
   694  		},
   695  		{
   696  			name:        "4S",
   697  			arrangement: vecArrangement4S,
   698  			expectedAsm: `
   699  sshr v3?.4s, x1.4s, #31
   700  movz x1?, #0x1, lsl 0
   701  movk x1?, #0x2, lsl 32
   702  dup v2?.2d, x1?
   703  lsl x1?, x1?, 0x2
   704  ins v2?.d[1], x1?
   705  and v2?.16b, v3?.16b, v2?.16b
   706  addv d2?, v2?.4s
   707  umov w15, v2?.s[0]
   708  `,
   709  			expectedBytes: "2004214f200080d24000c0f2000c084e00f47ed3001c184e001c204e00b8b14e0f3c040e",
   710  		},
   711  		{
   712  			name:        "2D",
   713  			arrangement: vecArrangement2D,
   714  			expectedAsm: `
   715  mov x15, x1.d[0]
   716  mov x1?, x1.d[1]
   717  lsr x1?, x1?, 0x3f
   718  lsr x15, x15, 0x3f
   719  add w15, w15, w1?, lsl #1
   720  `,
   721  			expectedBytes: "2f3c084e203c184e00fc7fd3effd7fd3ef05000b",
   722  		},
   723  	} {
   724  		t.Run(tc.name, func(t *testing.T) {
   725  			_, _, m := newSetupWithMockContext()
   726  			m.lowerVhighBits(operandNR(x1VReg), operandNR(x15VReg), tc.arrangement)
   727  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   728  
   729  			m.executableContext.FlushPendingInstructions()
   730  			m.encode(m.executableContext.PerBlockHead)
   731  			buf := m.compiler.Buf()
   732  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   733  		})
   734  	}
   735  }
   736  
   737  func TestMachine_lowerShuffle(t *testing.T) {
   738  	for _, tc := range []struct {
   739  		name          string
   740  		lanes         []uint64
   741  		expectedAsm   string
   742  		expectedBytes string
   743  	}{
   744  		{
   745  			name:  "lanes 0..15",
   746  			lanes: []uint64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
   747  			expectedAsm: `
   748  mov v29.16b, x2.16b
   749  mov v30.16b, x15.16b
   750  ldr q1?, #8; b 32; data.v128  0706050403020100 0f0e0d0c0b0a0908
   751  tbl x1.16b, { v29.16b, v30.16b }, v1?.16b
   752  `,
   753  			expectedBytes: "5d1ca24efe1daf4e4000009c05000014000102030405060708090a0b0c0d0e0fa123004e",
   754  		},
   755  		{
   756  			name:  "lanes 0101...",
   757  			lanes: []uint64{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
   758  			expectedAsm: `
   759  mov v29.16b, x2.16b
   760  mov v30.16b, x15.16b
   761  ldr q1?, #8; b 32; data.v128  0100010001000100 0100010001000100
   762  tbl x1.16b, { v29.16b, v30.16b }, v1?.16b
   763  `,
   764  			expectedBytes: "5d1ca24efe1daf4e4000009c0500001400010001000100010001000100010001a123004e",
   765  		},
   766  	} {
   767  		t.Run(tc.name, func(t *testing.T) {
   768  			_, _, m := newSetupWithMockContext()
   769  			lanes := tc.lanes
   770  
   771  			// Encode the 16 bytes as 8 bytes in u1, and 8 bytes in u2.
   772  			lane1 := lanes[7]<<56 | lanes[6]<<48 | lanes[5]<<40 | lanes[4]<<32 | lanes[3]<<24 | lanes[2]<<16 | lanes[1]<<8 | lanes[0]
   773  			lane2 := lanes[15]<<56 | lanes[14]<<48 | lanes[13]<<40 | lanes[12]<<32 | lanes[11]<<24 | lanes[10]<<16 | lanes[9]<<8 | lanes[8]
   774  
   775  			m.lowerShuffle(operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), lane1, lane2)
   776  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   777  
   778  			m.executableContext.FlushPendingInstructions()
   779  			m.encode(m.executableContext.PerBlockHead)
   780  			buf := m.compiler.Buf()
   781  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   782  		})
   783  	}
   784  }
   785  
   786  func TestMachine_lowerVShift(t *testing.T) {
   787  	for _, tc := range []struct {
   788  		name          string
   789  		expectedAsm   string
   790  		op            ssa.Opcode
   791  		arrangement   vecArrangement
   792  		expectedBytes string
   793  	}{
   794  		{
   795  			name:        "VIshl",
   796  			op:          ssa.OpcodeVIshl,
   797  			arrangement: vecArrangement16B,
   798  			expectedAsm: `
   799  and x1?, x15, #0x7
   800  dup v2?.16b, x1?
   801  sshl x1.16b, x2.16b, v2?.16b
   802  `,
   803  			expectedBytes: "e0094092000c014e4144204e",
   804  		},
   805  		{
   806  			name:        "VSshr",
   807  			op:          ssa.OpcodeVSshr,
   808  			arrangement: vecArrangement16B,
   809  			expectedAsm: `
   810  and x1?, x15, #0x7
   811  sub x1?, xzr, x1?
   812  dup v2?.16b, x1?
   813  sshl x1.16b, x2.16b, v2?.16b
   814  `,
   815  			expectedBytes: "e0094092e00300cb000c014e4144204e",
   816  		},
   817  		{
   818  			name:        "VUshr",
   819  			op:          ssa.OpcodeVUshr,
   820  			arrangement: vecArrangement16B,
   821  			expectedAsm: `
   822  and x1?, x15, #0x7
   823  sub x1?, xzr, x1?
   824  dup v2?.16b, x1?
   825  ushl x1.16b, x2.16b, v2?.16b
   826  `,
   827  			expectedBytes: "e0094092e00300cb000c014e4144206e",
   828  		},
   829  	} {
   830  		t.Run(tc.name, func(t *testing.T) {
   831  			_, _, m := newSetupWithMockContext()
   832  			m.lowerVShift(tc.op, operandNR(x1VReg), operandNR(x2VReg), operandNR(x15VReg), tc.arrangement)
   833  			require.Equal(t, tc.expectedAsm, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   834  
   835  			m.executableContext.FlushPendingInstructions()
   836  			m.encode(m.executableContext.PerBlockHead)
   837  			buf := m.compiler.Buf()
   838  			require.Equal(t, tc.expectedBytes, hex.EncodeToString(buf))
   839  		})
   840  	}
   841  }
   842  
   843  func TestMachine_lowerSelectVec(t *testing.T) {
   844  	_, _, m := newSetupWithMockContext()
   845  	c := operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
   846  	rn := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
   847  	rm := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
   848  	rd := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
   849  
   850  	require.Equal(t, 1, int(c.reg().ID()))
   851  	require.Equal(t, 2, int(rn.reg().ID()))
   852  	require.Equal(t, 3, int(rm.reg().ID()))
   853  	require.Equal(t, 4, int(rd.reg().ID()))
   854  
   855  	m.lowerSelectVec(c, rn, rm, rd)
   856  	require.Equal(t, `
   857  subs wzr, w1?, wzr
   858  csetm x5?, ne
   859  dup v6?.2d, x5?
   860  bsl v6?.16b, v2?.16b, v3?.16b
   861  mov v4?.16b, v6?.16b
   862  `, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   863  }
   864  
   865  func TestMachine_lowerFcopysign(t *testing.T) {
   866  	for _, tc := range []struct {
   867  		_64bit bool
   868  		exp    string
   869  	}{
   870  		{
   871  			_64bit: false,
   872  			exp: `
   873  movz w1?, #0x8000, lsl 16
   874  ins v2?.s[0], w1?
   875  mov v6?.8b, v3?.8b
   876  bit v6?.8b, v4?.8b, v2?.8b
   877  mov v5?.8b, v6?.8b
   878  `,
   879  		},
   880  		{
   881  			_64bit: true,
   882  			exp: `
   883  movz x1?, #0x8000, lsl 48
   884  ins v2?.d[0], x1?
   885  mov v6?.8b, v3?.8b
   886  bit v6?.8b, v4?.8b, v2?.8b
   887  mov v5?.8b, v6?.8b
   888  `,
   889  		},
   890  	} {
   891  		t.Run(fmt.Sprintf("64bit=%v", tc._64bit), func(t *testing.T) {
   892  			_, _, m := newSetupWithMockContext()
   893  			var typ, ftyp ssa.Type
   894  			if tc._64bit {
   895  				typ = ssa.TypeI64
   896  				ftyp = ssa.TypeF64
   897  			} else {
   898  				typ = ssa.TypeI32
   899  				ftyp = ssa.TypeF32
   900  			}
   901  			tmpI := operandNR(m.compiler.AllocateVReg(typ))
   902  			tmpF := operandNR(m.compiler.AllocateVReg(ftyp))
   903  			rn := operandNR(m.compiler.AllocateVReg(ftyp))
   904  			rm := operandNR(m.compiler.AllocateVReg(ftyp))
   905  			rd := operandNR(m.compiler.AllocateVReg(ftyp))
   906  
   907  			require.Equal(t, 1, int(tmpI.reg().ID()))
   908  			require.Equal(t, 2, int(tmpF.reg().ID()))
   909  			require.Equal(t, 3, int(rn.reg().ID()))
   910  			require.Equal(t, 4, int(rm.reg().ID()))
   911  			require.Equal(t, 5, int(rd.reg().ID()))
   912  
   913  			m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, tc._64bit)
   914  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   915  		})
   916  	}
   917  }
   918  
   919  func TestMachine_lowerRotl(t *testing.T) {
   920  	for _, tc := range []struct {
   921  		_64bit bool
   922  		exp    string
   923  	}{
   924  		{
   925  			_64bit: false,
   926  			exp: `
   927  sub w1?, wzr, w3?
   928  ror w4?, w2?, w1?
   929  `,
   930  		},
   931  		{
   932  			_64bit: true,
   933  			exp: `
   934  sub x1?, xzr, x3?
   935  ror x4?, x2?, x1?
   936  `,
   937  		},
   938  	} {
   939  		t.Run(fmt.Sprintf("64bit=%v", tc._64bit), func(t *testing.T) {
   940  			_, _, m := newSetupWithMockContext()
   941  			var typ ssa.Type
   942  			if tc._64bit {
   943  				typ = ssa.TypeI64
   944  			} else {
   945  				typ = ssa.TypeI32
   946  			}
   947  			tmpI := operandNR(m.compiler.AllocateVReg(typ))
   948  			rn := operandNR(m.compiler.AllocateVReg(typ))
   949  			rm := operandNR(m.compiler.AllocateVReg(typ))
   950  			rd := operandNR(m.compiler.AllocateVReg(typ))
   951  
   952  			require.Equal(t, 1, int(tmpI.reg().ID()))
   953  			require.Equal(t, 2, int(rn.reg().ID()))
   954  			require.Equal(t, 3, int(rm.reg().ID()))
   955  			require.Equal(t, 4, int(rd.reg().ID()))
   956  
   957  			m.lowerRotlImpl(rd, rn, rm, tmpI, tc._64bit)
   958  			require.Equal(t, tc.exp, "\n"+formatEmittedInstructionsInCurrentBlock(m)+"\n")
   959  		})
   960  	}
   961  }