github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/simd/bitwise_test.go (about)

     1  // Copyright 2018 GRAIL, Inc.  All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package simd_test
     6  
     7  import (
     8  	"bytes"
     9  	"math/rand"
    10  	"testing"
    11  
    12  	"github.com/Schaudge/grailbase/simd"
    13  )
    14  
    15  func andInplaceSlow(main, arg []byte) {
    16  	// Slow, but straightforward-to-verify implementation.
    17  	for idx := range main {
    18  		main[idx] = main[idx] & arg[idx]
    19  	}
    20  }
    21  
    22  func TestAnd(t *testing.T) {
    23  	// Generate some random strings and verify that bitwise-and results are as
    24  	// expected.
    25  	maxSize := 500
    26  	nIter := 200
    27  	argArr := simd.MakeUnsafe(maxSize)
    28  	for ii := range argArr {
    29  		argArr[ii] = byte(rand.Intn(256))
    30  	}
    31  	main1Arr := simd.MakeUnsafe(maxSize)
    32  	main2Arr := simd.MakeUnsafe(maxSize)
    33  	main3Arr := simd.MakeUnsafe(maxSize)
    34  	main4Arr := simd.MakeUnsafe(maxSize)
    35  	main5Arr := simd.MakeUnsafe(maxSize)
    36  	for iter := 0; iter < nIter; iter++ {
    37  		sliceStart := rand.Intn(maxSize)
    38  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
    39  		argSlice := argArr[sliceStart:sliceEnd]
    40  		main1Slice := main1Arr[sliceStart:sliceEnd]
    41  		for ii := range main1Slice {
    42  			main1Slice[ii] = byte(rand.Intn(256))
    43  		}
    44  		main2Slice := main2Arr[sliceStart:sliceEnd]
    45  		main3Slice := main3Arr[sliceStart:sliceEnd]
    46  		main4Slice := main4Arr[sliceStart:sliceEnd]
    47  		main5Slice := main5Arr[sliceStart:sliceEnd]
    48  		copy(main3Slice, main1Slice)
    49  		copy(main5Slice, main1Slice)
    50  		andInplaceSlow(main1Slice, argSlice)
    51  		simd.AndUnsafe(main2Slice, argSlice, main3Slice)
    52  		if !bytes.Equal(main1Slice, main2Slice) {
    53  			t.Fatal("Mismatched AndUnsafe result.")
    54  		}
    55  		sentinel := byte(rand.Intn(256))
    56  		main4Arr[sliceEnd] = sentinel
    57  		simd.And(main4Slice, argSlice, main3Slice)
    58  		if !bytes.Equal(main1Slice, main4Slice) {
    59  			t.Fatal("Mismatched And result.")
    60  		}
    61  		if main4Arr[sliceEnd] != sentinel {
    62  			t.Fatal("And clobbered an extra byte.")
    63  		}
    64  		simd.AndUnsafeInplace(main3Slice, argSlice)
    65  		if !bytes.Equal(main1Slice, main3Slice) {
    66  			t.Fatal("Mismatched AndUnsafeInplace result.")
    67  		}
    68  		main5Arr[sliceEnd] = sentinel
    69  		simd.AndInplace(main5Slice, argSlice)
    70  		if !bytes.Equal(main1Slice, main5Slice) {
    71  			t.Fatal("Mismatched AndInplace result.")
    72  		}
    73  		if main5Arr[sliceEnd] != sentinel {
    74  			t.Fatal("AndInplace clobbered an extra byte.")
    75  		}
    76  	}
    77  }
    78  
    79  /*
    80  Benchmark results:
    81    MacBook Pro (15-inch, 2016)
    82    2.7 GHz Intel Core i7, 16 GB 2133 MHz LPDDR3
    83  
    84  Benchmark_AndInplace/SIMDShort1Cpu-8                  20          91832421 ns/op
    85  Benchmark_AndInplace/SIMDShortHalfCpu-8               50          25323744 ns/op
    86  Benchmark_AndInplace/SIMDShortAllCpu-8               100          23869031 ns/op
    87  Benchmark_AndInplace/SIMDLong1Cpu-8                    1        1715379622 ns/op
    88  Benchmark_AndInplace/SIMDLongHalfCpu-8                 1        1372591170 ns/op
    89  Benchmark_AndInplace/SIMDLongAllCpu-8                  1        1427476449 ns/op
    90  Benchmark_AndInplace/SlowShort1Cpu-8                   2         550667201 ns/op
    91  Benchmark_AndInplace/SlowShortHalfCpu-8               10         145756965 ns/op
    92  Benchmark_AndInplace/SlowShortAllCpu-8                10         135311356 ns/op
    93  Benchmark_AndInplace/SlowLong1Cpu-8                    1        7711233274 ns/op
    94  Benchmark_AndInplace/SlowLongHalfCpu-8                 1        2144409827 ns/op
    95  Benchmark_AndInplace/SlowLongAllCpu-8                  1        2158206158 ns/op
    96  */
    97  
    98  func andSimdSubtask(dst, src []byte, nIter int) int {
    99  	for iter := 0; iter < nIter; iter++ {
   100  		simd.AndInplace(dst, src)
   101  	}
   102  	return int(dst[0])
   103  }
   104  
   105  func andSlowSubtask(dst, src []byte, nIter int) int {
   106  	for iter := 0; iter < nIter; iter++ {
   107  		andInplaceSlow(dst, src)
   108  	}
   109  	return int(dst[0])
   110  }
   111  
   112  func Benchmark_AndInplace(b *testing.B) {
   113  	funcs := []taggedMultiBenchFunc{
   114  		{
   115  			f:   andSimdSubtask,
   116  			tag: "SIMD",
   117  		},
   118  		{
   119  			f:   andSlowSubtask,
   120  			tag: "Slow",
   121  		},
   122  	}
   123  	for _, f := range funcs {
   124  		// This is relevant to .bam reads in packed form, so 150/2=75 is a good
   125  		// size for the short-array benchmark.
   126  		multiBenchmark(f.f, f.tag+"Short", 75, 75, 9999999, b)
   127  		multiBenchmark(f.f, f.tag+"Long", 249250621, 249250621, 50, b)
   128  	}
   129  }
   130  
   131  // Don't bother with separate benchmarks for Or/Xor/Invmask.
   132  
   133  func orInplaceSlow(main, arg []byte) {
   134  	for idx := range main {
   135  		main[idx] = main[idx] | arg[idx]
   136  	}
   137  }
   138  
   139  func TestOr(t *testing.T) {
   140  	maxSize := 500
   141  	nIter := 200
   142  	argArr := simd.MakeUnsafe(maxSize)
   143  	for ii := range argArr {
   144  		argArr[ii] = byte(rand.Intn(256))
   145  	}
   146  	main1Arr := simd.MakeUnsafe(maxSize)
   147  	main2Arr := simd.MakeUnsafe(maxSize)
   148  	main3Arr := simd.MakeUnsafe(maxSize)
   149  	main4Arr := simd.MakeUnsafe(maxSize)
   150  	main5Arr := simd.MakeUnsafe(maxSize)
   151  	for iter := 0; iter < nIter; iter++ {
   152  		sliceStart := rand.Intn(maxSize)
   153  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   154  		argSlice := argArr[sliceStart:sliceEnd]
   155  		main1Slice := main1Arr[sliceStart:sliceEnd]
   156  		for ii := range main1Slice {
   157  			main1Slice[ii] = byte(rand.Intn(256))
   158  		}
   159  		main2Slice := main2Arr[sliceStart:sliceEnd]
   160  		main3Slice := main3Arr[sliceStart:sliceEnd]
   161  		main4Slice := main4Arr[sliceStart:sliceEnd]
   162  		main5Slice := main5Arr[sliceStart:sliceEnd]
   163  		copy(main3Slice, main1Slice)
   164  		copy(main5Slice, main1Slice)
   165  		orInplaceSlow(main1Slice, argSlice)
   166  		simd.OrUnsafe(main2Slice, argSlice, main3Slice)
   167  		if !bytes.Equal(main1Slice, main2Slice) {
   168  			t.Fatal("Mismatched OrUnsafe result.")
   169  		}
   170  		sentinel := byte(rand.Intn(256))
   171  		main4Arr[sliceEnd] = sentinel
   172  		simd.Or(main4Slice, argSlice, main3Slice)
   173  		if !bytes.Equal(main1Slice, main4Slice) {
   174  			t.Fatal("Mismatched Or result.")
   175  		}
   176  		if main4Arr[sliceEnd] != sentinel {
   177  			t.Fatal("Or clobbered an extra byte.")
   178  		}
   179  		simd.OrUnsafeInplace(main3Slice, argSlice)
   180  		if !bytes.Equal(main1Slice, main3Slice) {
   181  			t.Fatal("Mismatched OrUnsafeInplace result.")
   182  		}
   183  		main5Arr[sliceEnd] = sentinel
   184  		simd.OrInplace(main5Slice, argSlice)
   185  		if !bytes.Equal(main1Slice, main5Slice) {
   186  			t.Fatal("Mismatched OrInplace result.")
   187  		}
   188  		if main5Arr[sliceEnd] != sentinel {
   189  			t.Fatal("OrInplace clobbered an extra byte.")
   190  		}
   191  	}
   192  }
   193  
   194  func xorInplaceSlow(main, arg []byte) {
   195  	for idx := range main {
   196  		main[idx] = main[idx] ^ arg[idx]
   197  	}
   198  }
   199  
   200  func TestXor(t *testing.T) {
   201  	maxSize := 500
   202  	nIter := 200
   203  	argArr := simd.MakeUnsafe(maxSize)
   204  	for ii := range argArr {
   205  		argArr[ii] = byte(rand.Intn(256))
   206  	}
   207  	main1Arr := simd.MakeUnsafe(maxSize)
   208  	main2Arr := simd.MakeUnsafe(maxSize)
   209  	main3Arr := simd.MakeUnsafe(maxSize)
   210  	main4Arr := simd.MakeUnsafe(maxSize)
   211  	main5Arr := simd.MakeUnsafe(maxSize)
   212  	for iter := 0; iter < nIter; iter++ {
   213  		sliceStart := rand.Intn(maxSize)
   214  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   215  		argSlice := argArr[sliceStart:sliceEnd]
   216  		main1Slice := main1Arr[sliceStart:sliceEnd]
   217  		for ii := range main1Slice {
   218  			main1Slice[ii] = byte(rand.Intn(256))
   219  		}
   220  		main2Slice := main2Arr[sliceStart:sliceEnd]
   221  		main3Slice := main3Arr[sliceStart:sliceEnd]
   222  		main4Slice := main4Arr[sliceStart:sliceEnd]
   223  		main5Slice := main5Arr[sliceStart:sliceEnd]
   224  		copy(main3Slice, main1Slice)
   225  		copy(main5Slice, main1Slice)
   226  		xorInplaceSlow(main1Slice, argSlice)
   227  		simd.XorUnsafe(main2Slice, argSlice, main3Slice)
   228  		if !bytes.Equal(main1Slice, main2Slice) {
   229  			t.Fatal("Mismatched XorUnsafe result.")
   230  		}
   231  		sentinel := byte(rand.Intn(256))
   232  		main4Arr[sliceEnd] = sentinel
   233  		simd.Xor(main4Slice, argSlice, main3Slice)
   234  		if !bytes.Equal(main1Slice, main4Slice) {
   235  			t.Fatal("Mismatched Xor result.")
   236  		}
   237  		if main4Arr[sliceEnd] != sentinel {
   238  			t.Fatal("Xor clobbered an extra byte.")
   239  		}
   240  		simd.XorUnsafeInplace(main3Slice, argSlice)
   241  		if !bytes.Equal(main1Slice, main3Slice) {
   242  			t.Fatal("Mismatched XorUnsafeInplace result.")
   243  		}
   244  		main5Arr[sliceEnd] = sentinel
   245  		simd.XorInplace(main5Slice, argSlice)
   246  		if !bytes.Equal(main1Slice, main5Slice) {
   247  			t.Fatal("Mismatched XorInplace result.")
   248  		}
   249  		if main5Arr[sliceEnd] != sentinel {
   250  			t.Fatal("XorInplace clobbered an extra byte.")
   251  		}
   252  	}
   253  }
   254  
   255  func invmaskInplaceSlow(main, invmask []byte) {
   256  	// Slow, but straightforward-to-verify implementation.
   257  	for idx := range main {
   258  		main[idx] = main[idx] &^ invmask[idx]
   259  	}
   260  }
   261  
   262  func TestInvmask(t *testing.T) {
   263  	maxSize := 500
   264  	nIter := 200
   265  	invmaskArr := simd.MakeUnsafe(maxSize)
   266  	for ii := range invmaskArr {
   267  		invmaskArr[ii] = byte(rand.Intn(256))
   268  	}
   269  	main1Arr := simd.MakeUnsafe(maxSize)
   270  	main2Arr := simd.MakeUnsafe(maxSize)
   271  	main3Arr := simd.MakeUnsafe(maxSize)
   272  	main4Arr := simd.MakeUnsafe(maxSize)
   273  	main5Arr := simd.MakeUnsafe(maxSize)
   274  	for iter := 0; iter < nIter; iter++ {
   275  		sliceStart := rand.Intn(maxSize)
   276  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   277  		invmaskSlice := invmaskArr[sliceStart:sliceEnd]
   278  		main1Slice := main1Arr[sliceStart:sliceEnd]
   279  		for ii := range main1Slice {
   280  			main1Slice[ii] = byte(rand.Intn(256))
   281  		}
   282  		main2Slice := main2Arr[sliceStart:sliceEnd]
   283  		main3Slice := main3Arr[sliceStart:sliceEnd]
   284  		main4Slice := main4Arr[sliceStart:sliceEnd]
   285  		main5Slice := main5Arr[sliceStart:sliceEnd]
   286  		copy(main3Slice, main1Slice)
   287  		copy(main5Slice, main1Slice)
   288  		invmaskInplaceSlow(main1Slice, invmaskSlice)
   289  		simd.InvmaskUnsafe(main2Slice, main3Slice, invmaskSlice)
   290  		if !bytes.Equal(main1Slice, main2Slice) {
   291  			t.Fatal("Mismatched InvmaskUnsafe result.")
   292  		}
   293  		sentinel := byte(rand.Intn(256))
   294  		main4Arr[sliceEnd] = sentinel
   295  		simd.Invmask(main4Slice, main3Slice, invmaskSlice)
   296  		if !bytes.Equal(main1Slice, main4Slice) {
   297  			t.Fatal("Mismatched Invmask result.")
   298  		}
   299  		if main4Arr[sliceEnd] != sentinel {
   300  			t.Fatal("Invmask clobbered an extra byte.")
   301  		}
   302  		simd.InvmaskUnsafeInplace(main3Slice, invmaskSlice)
   303  		if !bytes.Equal(main1Slice, main3Slice) {
   304  			t.Fatal("Mismatched InvmaskUnsafeInplace result.")
   305  		}
   306  		main5Arr[sliceEnd] = sentinel
   307  		simd.InvmaskInplace(main5Slice, invmaskSlice)
   308  		if !bytes.Equal(main1Slice, main5Slice) {
   309  			t.Fatal("Mismatched InvmaskInplace result.")
   310  		}
   311  		if main5Arr[sliceEnd] != sentinel {
   312  			t.Fatal("InvmaskInplace clobbered an extra byte.")
   313  		}
   314  	}
   315  }
   316  
   317  func andConst8InplaceSlow(main []byte, val byte) {
   318  	for idx, mainByte := range main {
   319  		main[idx] = mainByte & val
   320  	}
   321  }
   322  
   323  func TestAndConst8(t *testing.T) {
   324  	maxSize := 500
   325  	nIter := 200
   326  	main1Arr := simd.MakeUnsafe(maxSize)
   327  	main2Arr := simd.MakeUnsafe(maxSize)
   328  	main3Arr := simd.MakeUnsafe(maxSize)
   329  	main4Arr := simd.MakeUnsafe(maxSize)
   330  	main5Arr := simd.MakeUnsafe(maxSize)
   331  	for iter := 0; iter < nIter; iter++ {
   332  		sliceStart := rand.Intn(maxSize)
   333  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   334  		main1Slice := main1Arr[sliceStart:sliceEnd]
   335  		for ii := range main1Slice {
   336  			main1Slice[ii] = byte(rand.Intn(256))
   337  		}
   338  		main2Slice := main2Arr[sliceStart:sliceEnd]
   339  		main3Slice := main3Arr[sliceStart:sliceEnd]
   340  		main4Slice := main4Arr[sliceStart:sliceEnd]
   341  		main5Slice := main5Arr[sliceStart:sliceEnd]
   342  		andVal := byte(rand.Intn(256))
   343  		sentinel := byte(rand.Intn(256))
   344  		copy(main2Slice, main1Slice)
   345  		copy(main3Slice, main1Slice)
   346  		main2Arr[sliceEnd] = sentinel
   347  		main5Arr[sliceEnd] = sentinel
   348  		simd.AndConst8Unsafe(main4Slice, main1Slice, andVal)
   349  		simd.AndConst8(main5Slice, main1Slice, andVal)
   350  		andConst8InplaceSlow(main1Slice, andVal)
   351  		if !bytes.Equal(main1Slice, main4Slice) {
   352  			t.Fatal("Mismatched AndConst8Unsafe result.")
   353  		}
   354  		if !bytes.Equal(main1Slice, main5Slice) {
   355  			t.Fatal("Mismatched AndConst8 result.")
   356  		}
   357  		if main5Arr[sliceEnd] != sentinel {
   358  			t.Fatal("AndConst8 clobbered an extra byte.")
   359  		}
   360  		simd.AndConst8Inplace(main2Slice, andVal)
   361  		if !bytes.Equal(main1Slice, main2Slice) {
   362  			t.Fatal("Mismatched AndConst8Inplace result.")
   363  		}
   364  		if main2Arr[sliceEnd] != sentinel {
   365  			t.Fatal("AndConst8Inplace clobbered an extra byte.")
   366  		}
   367  		simd.AndConst8UnsafeInplace(main3Slice, andVal)
   368  		if !bytes.Equal(main1Slice, main3Slice) {
   369  			t.Fatal("Mismatched AndConst8UnsafeInplace result.")
   370  		}
   371  	}
   372  }
   373  
   374  func orConst8InplaceSlow(main []byte, val byte) {
   375  	for idx, mainByte := range main {
   376  		main[idx] = mainByte | val
   377  	}
   378  }
   379  
   380  func TestOrConst8(t *testing.T) {
   381  	maxSize := 500
   382  	nIter := 200
   383  	main1Arr := simd.MakeUnsafe(maxSize)
   384  	main2Arr := simd.MakeUnsafe(maxSize)
   385  	main3Arr := simd.MakeUnsafe(maxSize)
   386  	main4Arr := simd.MakeUnsafe(maxSize)
   387  	main5Arr := simd.MakeUnsafe(maxSize)
   388  	for iter := 0; iter < nIter; iter++ {
   389  		sliceStart := rand.Intn(maxSize)
   390  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   391  		main1Slice := main1Arr[sliceStart:sliceEnd]
   392  		for ii := range main1Slice {
   393  			main1Slice[ii] = byte(rand.Intn(256))
   394  		}
   395  		main2Slice := main2Arr[sliceStart:sliceEnd]
   396  		main3Slice := main3Arr[sliceStart:sliceEnd]
   397  		main4Slice := main4Arr[sliceStart:sliceEnd]
   398  		main5Slice := main5Arr[sliceStart:sliceEnd]
   399  		orVal := byte(rand.Intn(256))
   400  		sentinel := byte(rand.Intn(256))
   401  		copy(main2Slice, main1Slice)
   402  		copy(main3Slice, main1Slice)
   403  		main2Arr[sliceEnd] = sentinel
   404  		main5Arr[sliceEnd] = sentinel
   405  		simd.OrConst8Unsafe(main4Slice, main1Slice, orVal)
   406  		simd.OrConst8(main5Slice, main1Slice, orVal)
   407  		orConst8InplaceSlow(main1Slice, orVal)
   408  		if !bytes.Equal(main4Slice, main1Slice) {
   409  			t.Fatal("Mismatched OrConst8Unsafe result.")
   410  		}
   411  		if !bytes.Equal(main5Slice, main1Slice) {
   412  			t.Fatal("Mismatched OrConst8 result.")
   413  		}
   414  		if main5Arr[sliceEnd] != sentinel {
   415  			t.Fatal("OrConst8 clobbered an extra byte.")
   416  		}
   417  		simd.OrConst8Inplace(main2Slice, orVal)
   418  		if !bytes.Equal(main1Slice, main2Slice) {
   419  			t.Fatal("Mismatched OrConst8Inplace result.")
   420  		}
   421  		if main2Arr[sliceEnd] != sentinel {
   422  			t.Fatal("OrConst8Inplace clobbered an extra byte.")
   423  		}
   424  		simd.OrConst8UnsafeInplace(main3Slice, orVal)
   425  		if !bytes.Equal(main1Slice, main3Slice) {
   426  			t.Fatal("Mismatched OrConst8UnsafeInplace result.")
   427  		}
   428  	}
   429  }
   430  
   431  func xorConst8InplaceSlow(main []byte, val byte) {
   432  	for idx, mainByte := range main {
   433  		main[idx] = mainByte ^ val
   434  	}
   435  }
   436  
   437  func TestXorConst8(t *testing.T) {
   438  	maxSize := 500
   439  	nIter := 200
   440  	main1Arr := simd.MakeUnsafe(maxSize)
   441  	main2Arr := simd.MakeUnsafe(maxSize)
   442  	main3Arr := simd.MakeUnsafe(maxSize)
   443  	main4Arr := simd.MakeUnsafe(maxSize)
   444  	main5Arr := simd.MakeUnsafe(maxSize)
   445  	src2Arr := simd.MakeUnsafe(maxSize)
   446  	for iter := 0; iter < nIter; iter++ {
   447  		sliceStart := rand.Intn(maxSize)
   448  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   449  		main1Slice := main1Arr[sliceStart:sliceEnd]
   450  		for ii := range main1Slice {
   451  			main1Slice[ii] = byte(rand.Intn(256))
   452  		}
   453  		main2Slice := main2Arr[sliceStart:sliceEnd]
   454  		main3Slice := main3Arr[sliceStart:sliceEnd]
   455  		main4Slice := main4Arr[sliceStart:sliceEnd]
   456  		main5Slice := main5Arr[sliceStart:sliceEnd]
   457  		src2Slice := src2Arr[sliceStart:sliceEnd]
   458  		xorVal := byte(rand.Intn(256))
   459  		sentinel := byte(rand.Intn(256))
   460  		copy(main2Slice, main1Slice)
   461  		copy(main3Slice, main1Slice)
   462  		copy(src2Slice, main1Slice)
   463  		main2Arr[sliceEnd] = sentinel
   464  		main5Arr[sliceEnd] = sentinel
   465  		simd.XorConst8Unsafe(main4Slice, main1Slice, xorVal)
   466  		simd.XorConst8(main5Slice, main1Slice, xorVal)
   467  		xorConst8InplaceSlow(main1Slice, xorVal)
   468  		if !bytes.Equal(main1Slice, main4Slice) {
   469  			t.Fatal("Mismatched XorConst8Unsafe result.")
   470  		}
   471  		if !bytes.Equal(main1Slice, main5Slice) {
   472  			t.Fatal("Mismatched XorConst8 result.")
   473  		}
   474  		if main5Arr[sliceEnd] != sentinel {
   475  			t.Fatal("XorConst8 clobbered an extra byte.")
   476  		}
   477  		simd.XorConst8Inplace(main2Slice, xorVal)
   478  		if !bytes.Equal(main1Slice, main2Slice) {
   479  			t.Fatal("Mismatched XorConst8Inplace result.")
   480  		}
   481  		if main2Arr[sliceEnd] != sentinel {
   482  			t.Fatal("XorConst8Inplace clobbered an extra byte.")
   483  		}
   484  		simd.XorConst8Inplace(main2Slice, xorVal)
   485  		if !bytes.Equal(main2Slice, src2Slice) {
   486  			t.Fatal("XorConst8Inplace did not invert itself.")
   487  		}
   488  		simd.XorConst8UnsafeInplace(main3Slice, xorVal)
   489  		if !bytes.Equal(main1Slice, main3Slice) {
   490  			t.Fatal("Mismatched XorConst8UnsafeInplace result.")
   491  		}
   492  	}
   493  }
   494  
   495  /*
   496  Benchmark results:
   497    MacBook Pro (15-inch, 2016)
   498    2.7 GHz Intel Core i7, 16 GB 2133 MHz LPDDR3
   499  
   500  Benchmark_XorConst8Inplace/SIMDShort1Cpu-8                    20          79730366 ns/op
   501  Benchmark_XorConst8Inplace/SIMDShortHalfCpu-8                100          21216542 ns/op
   502  Benchmark_XorConst8Inplace/SIMDShortAllCpu-8                 100          18902385 ns/op
   503  Benchmark_XorConst8Inplace/SIMDLong1Cpu-8                      1        1291770636 ns/op
   504  Benchmark_XorConst8Inplace/SIMDLongHalfCpu-8                   2         958003320 ns/op
   505  Benchmark_XorConst8Inplace/SIMDLongAllCpu-8                    2         967333286 ns/op
   506  Benchmark_XorConst8Inplace/SlowShort1Cpu-8                     3         417781174 ns/op
   507  Benchmark_XorConst8Inplace/SlowShortHalfCpu-8                 10         112255124 ns/op
   508  Benchmark_XorConst8Inplace/SlowShortAllCpu-8                  10         100138643 ns/op
   509  Benchmark_XorConst8Inplace/SlowLong1Cpu-8                      1        5476605564 ns/op
   510  Benchmark_XorConst8Inplace/SlowLongHalfCpu-8                   1        1480923705 ns/op
   511  Benchmark_XorConst8Inplace/SlowLongAllCpu-8                    1        1588216831 ns/op
   512  */
   513  
   514  func xorConst8InplaceSimdSubtask(dst, src []byte, nIter int) int {
   515  	for iter := 0; iter < nIter; iter++ {
   516  		simd.XorConst8Inplace(dst, 3)
   517  	}
   518  	return int(dst[0])
   519  }
   520  
   521  func xorConst8InplaceSlowSubtask(dst, src []byte, nIter int) int {
   522  	for iter := 0; iter < nIter; iter++ {
   523  		xorConst8InplaceSlow(dst, 3)
   524  	}
   525  	return int(dst[0])
   526  }
   527  
   528  func Benchmark_XorConst8Inplace(b *testing.B) {
   529  	funcs := []taggedMultiBenchFunc{
   530  		{
   531  			f:   xorConst8InplaceSimdSubtask,
   532  			tag: "SIMD",
   533  		},
   534  		{
   535  			f:   xorConst8InplaceSlowSubtask,
   536  			tag: "Slow",
   537  		},
   538  	}
   539  	for _, f := range funcs {
   540  		multiBenchmark(f.f, f.tag+"Short", 75, 0, 9999999, b)
   541  		multiBenchmark(f.f, f.tag+"Long", 249250621, 0, 50, b)
   542  	}
   543  }