github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/simd/add_test.go (about)

     1  // Copyright 2018 GRAIL, Inc.  All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package simd_test
     6  
     7  import (
     8  	"bytes"
     9  	"math/rand"
    10  	"testing"
    11  
    12  	"github.com/Schaudge/grailbase/simd"
    13  )
    14  
    15  func addConst8Slow(dst []byte, val byte) {
    16  	// strangely, this takes ~35% less time than the single-parameter for loop on
    17  	// the AddConstLong4 benchmark, though performance is usually
    18  	// indistinguishable
    19  	for idx, dstByte := range dst {
    20  		dst[idx] = dstByte + val
    21  	}
    22  }
    23  
    24  func TestAddConst(t *testing.T) {
    25  	maxSize := 500
    26  	nIter := 200
    27  	main1Arr := simd.MakeUnsafe(maxSize)
    28  	main2Arr := simd.MakeUnsafe(maxSize)
    29  	main3Arr := simd.MakeUnsafe(maxSize)
    30  	main4Arr := simd.MakeUnsafe(maxSize)
    31  	main5Arr := simd.MakeUnsafe(maxSize)
    32  	src2Arr := simd.MakeUnsafe(maxSize)
    33  	for iter := 0; iter < nIter; iter++ {
    34  		sliceStart := rand.Intn(maxSize)
    35  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
    36  		main1Slice := main1Arr[sliceStart:sliceEnd]
    37  		for ii := range main1Slice {
    38  			main1Slice[ii] = byte(rand.Intn(256))
    39  		}
    40  		main2Slice := main2Arr[sliceStart:sliceEnd]
    41  		main3Slice := main3Arr[sliceStart:sliceEnd]
    42  		main4Slice := main4Arr[sliceStart:sliceEnd]
    43  		main5Slice := main5Arr[sliceStart:sliceEnd]
    44  		src2Slice := src2Arr[sliceStart:sliceEnd]
    45  		copy(main2Slice, main1Slice)
    46  		copy(main3Slice, main1Slice)
    47  		copy(src2Slice, main1Slice)
    48  		byteVal := byte(rand.Intn(256))
    49  		simd.AddConst8Unsafe(main4Slice, main1Slice, byteVal)
    50  		sentinel := byte(rand.Intn(256))
    51  		main3Arr[sliceEnd] = sentinel
    52  		main5Arr[sliceEnd] = sentinel
    53  		simd.AddConst8(main5Slice, main1Slice, byteVal)
    54  		addConst8Slow(main1Slice, byteVal)
    55  		if !bytes.Equal(main1Slice, main4Slice) {
    56  			t.Fatal("Mismatched AddConst8Unsafe result.")
    57  		}
    58  		if !bytes.Equal(main1Slice, main5Slice) {
    59  			t.Fatal("Mismatched AddConst8 result.")
    60  		}
    61  		if main5Arr[sliceEnd] != sentinel {
    62  			t.Fatal("AddConst8 clobbered an extra byte.")
    63  		}
    64  		simd.AddConst8UnsafeInplace(main2Slice, byteVal)
    65  		if !bytes.Equal(main1Slice, main2Slice) {
    66  			t.Fatal("Mismatched AddConst8UnsafeInplace result.")
    67  		}
    68  		simd.AddConst8Inplace(main3Slice, byteVal)
    69  		if !bytes.Equal(main1Slice, main3Slice) {
    70  			t.Fatal("Mismatched AddConst8Inplace result.")
    71  		}
    72  		if main3Arr[sliceEnd] != sentinel {
    73  			t.Fatal("AddConst8Inplace clobbered an extra byte.")
    74  		}
    75  		// Verify inverse property.
    76  		simd.AddConst8Inplace(main3Slice, -byteVal)
    77  		if !bytes.Equal(src2Slice, main3Slice) {
    78  			t.Fatal("AddConst8Inplace(., -byteVal) didn't invert AddConst8Inplace(., byteVal).")
    79  		}
    80  	}
    81  }
    82  
    83  /*
    84  Benchmark results:
    85    MacBook Pro (15-inch, 2016)
    86    2.7 GHz Intel Core i7, 16 GB 2133 MHz LPDDR3
    87  
    88  Benchmark_AddConst8Inplace/SIMDShort1Cpu-8                    20          94449590 ns/op
    89  Benchmark_AddConst8Inplace/SIMDShortHalfCpu-8                 50          28197917 ns/op
    90  Benchmark_AddConst8Inplace/SIMDShortAllCpu-8                  50          27452313 ns/op
    91  Benchmark_AddConst8Inplace/SIMDLong1Cpu-8                      1        1145256373 ns/op
    92  Benchmark_AddConst8Inplace/SIMDLongHalfCpu-8                   2         959236835 ns/op
    93  Benchmark_AddConst8Inplace/SIMDLongAllCpu-8                    2         982555560 ns/op
    94  Benchmark_AddConst8Inplace/SlowShort1Cpu-8                     2         707287108 ns/op
    95  Benchmark_AddConst8Inplace/SlowShortHalfCpu-8                 10         199415710 ns/op
    96  Benchmark_AddConst8Inplace/SlowShortAllCpu-8                   5         245220685 ns/op
    97  Benchmark_AddConst8Inplace/SlowLong1Cpu-8                      1        5480013373 ns/op
    98  Benchmark_AddConst8Inplace/SlowLongHalfCpu-8                   1        1467424090 ns/op
    99  Benchmark_AddConst8Inplace/SlowLongAllCpu-8                    1        1554565031 ns/op
   100  */
   101  
   102  func addConst8InplaceSimdSubtask(dst, src []byte, nIter int) int {
   103  	for iter := 0; iter < nIter; iter++ {
   104  		simd.AddConst8Inplace(dst, 33)
   105  	}
   106  	return int(dst[0])
   107  }
   108  
   109  func addConst8InplaceSlowSubtask(dst, src []byte, nIter int) int {
   110  	for iter := 0; iter < nIter; iter++ {
   111  		addConst8Slow(dst, 33)
   112  	}
   113  	return int(dst[0])
   114  }
   115  
   116  func Benchmark_AddConst8Inplace(b *testing.B) {
   117  	funcs := []taggedMultiBenchFunc{
   118  		{
   119  			f:   addConst8InplaceSimdSubtask,
   120  			tag: "SIMD",
   121  		},
   122  		{
   123  			f:   addConst8InplaceSlowSubtask,
   124  			tag: "Slow",
   125  		},
   126  	}
   127  	for _, f := range funcs {
   128  		multiBenchmark(f.f, f.tag+"Short", 150, 0, 9999999, b)
   129  		// GRCh37 chromosome 1 length is 249250621, so that's a plausible
   130  		// long-array use case.
   131  		multiBenchmark(f.f, f.tag+"Long", 249250621, 0, 50, b)
   132  	}
   133  }
   134  
   135  func subtractFromConst8Slow(dst []byte, val byte) {
   136  	for idx, dstByte := range dst {
   137  		dst[idx] = val - dstByte
   138  	}
   139  }
   140  
   141  func TestSubtractFrom(t *testing.T) {
   142  	maxSize := 500
   143  	nIter := 200
   144  	main1Arr := simd.MakeUnsafe(maxSize)
   145  	main2Arr := simd.MakeUnsafe(maxSize)
   146  	main3Arr := simd.MakeUnsafe(maxSize)
   147  	main4Arr := simd.MakeUnsafe(maxSize)
   148  	main5Arr := simd.MakeUnsafe(maxSize)
   149  	src2Arr := simd.MakeUnsafe(maxSize)
   150  	for iter := 0; iter < nIter; iter++ {
   151  		sliceStart := rand.Intn(maxSize)
   152  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   153  		main1Slice := main1Arr[sliceStart:sliceEnd]
   154  		for ii := range main1Slice {
   155  			main1Slice[ii] = byte(rand.Intn(256))
   156  		}
   157  		main2Slice := main2Arr[sliceStart:sliceEnd]
   158  		main3Slice := main3Arr[sliceStart:sliceEnd]
   159  		main4Slice := main4Arr[sliceStart:sliceEnd]
   160  		main5Slice := main5Arr[sliceStart:sliceEnd]
   161  		src2Slice := src2Arr[sliceStart:sliceEnd]
   162  		copy(main2Slice, main1Slice)
   163  		copy(main3Slice, main1Slice)
   164  		copy(src2Slice, main1Slice)
   165  		byteVal := byte(rand.Intn(256))
   166  		simd.SubtractFromConst8Unsafe(main4Slice, main1Slice, byteVal)
   167  		sentinel := byte(rand.Intn(256))
   168  		main3Arr[sliceEnd] = sentinel
   169  		main5Arr[sliceEnd] = sentinel
   170  		simd.SubtractFromConst8(main5Slice, main1Slice, byteVal)
   171  		subtractFromConst8Slow(main1Slice, byteVal)
   172  		if !bytes.Equal(main1Slice, main4Slice) {
   173  			t.Fatal("Mismatched SubtractFromConst8Unsafe result.")
   174  		}
   175  		if !bytes.Equal(main1Slice, main5Slice) {
   176  			t.Fatal("Mismatched SubtractFromConst8 result.")
   177  		}
   178  		if main5Arr[sliceEnd] != sentinel {
   179  			t.Fatal("SubtractFromConst8 clobbered an extra byte.")
   180  		}
   181  		simd.SubtractFromConst8UnsafeInplace(main2Slice, byteVal)
   182  		if !bytes.Equal(main1Slice, main2Slice) {
   183  			t.Fatal("Mismatched SubtractFromConst8UnsafeInplace result.")
   184  		}
   185  		simd.SubtractFromConst8Inplace(main3Slice, byteVal)
   186  		if !bytes.Equal(main1Slice, main3Slice) {
   187  			t.Fatal("Mismatched SubtractFromConst8Inplace result.")
   188  		}
   189  		if main3Arr[sliceEnd] != sentinel {
   190  			t.Fatal("SubtractFromConst8Inplace clobbered an extra byte.")
   191  		}
   192  		// Verify inverse property.
   193  		simd.SubtractFromConst8Inplace(main3Slice, byteVal)
   194  		if !bytes.Equal(src2Slice, main3Slice) {
   195  			t.Fatal("SubtractFromConst8Inplace(., byteVal) didn't invert itself.")
   196  		}
   197  	}
   198  }