github.com/grailbio/base@v0.0.11/simd/cmp_test.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package simd_test 6 7 import ( 8 "math/rand" 9 "testing" 10 11 "github.com/grailbio/base/simd" 12 ) 13 14 func firstUnequal8Slow(arg1, arg2 []byte, startPos int) int { 15 // Slow, but straightforward-to-verify implementation. 16 endPos := len(arg1) 17 for pos := startPos; pos < endPos; pos++ { 18 if arg1[pos] != arg2[pos] { 19 return pos 20 } 21 } 22 return endPos 23 } 24 25 func TestFirstUnequal(t *testing.T) { 26 // Generate some random pairs of strings with varying frequencies of equal 27 // bytes, and verify that iterating through the strings with 28 // firstUnequal8Slow generates the same sequences of indexes as 29 // simd.FirstUnequal8{Unsafe}. 30 maxSize := 500 31 nIter := 200 32 main1Arr := simd.MakeUnsafe(maxSize) 33 main2Arr := simd.MakeUnsafe(maxSize) 34 for iter := 0; iter < nIter; iter++ { 35 sliceStart := rand.Intn(maxSize) 36 sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart) 37 main1Slice := main1Arr[sliceStart:sliceEnd] 38 for ii := range main1Slice { 39 main1Slice[ii] = byte(rand.Intn(256)) 40 } 41 main2Slice := main2Arr[sliceStart:sliceEnd] 42 copy(main2Slice, main1Slice) 43 sliceSize := sliceEnd - sliceStart 44 nDiff := rand.Intn(sliceSize + 1) 45 for ii := 0; ii < nDiff; ii++ { 46 // This may choose the same position multiple times; that's ok. Also ok 47 // if the new byte randomly matches what it previously was. 48 pos := rand.Intn(sliceSize) 49 main2Slice[pos] = byte(rand.Intn(256)) 50 } 51 curPos := sliceStart 52 for { 53 unsafePos := simd.FirstUnequal8Unsafe(main1Slice, main2Slice, curPos) 54 safePos := simd.FirstUnequal8(main1Slice, main2Slice, curPos) 55 curPos = firstUnequal8Slow(main1Slice, main2Slice, curPos) 56 if curPos != safePos { 57 t.Fatal("Mismatched FirstUnequal8 result.") 58 } 59 if curPos != unsafePos { 60 t.Fatal("Mismatched FirstUnequal8Unsafe result.") 61 } 62 curPos++ 63 if curPos >= sliceSize { 64 break 65 } 66 } 67 } 68 } 69 70 /* 71 Benchmark results: 72 MacBook Pro (15-inch, 2016) 73 2.7 GHz Intel Core i7, 16 GB 2133 MHz LPDDR3 74 75 Benchmark_FirstUnequal8/UnsafeShort1Cpu-8 10 104339029 ns/op 76 Benchmark_FirstUnequal8/UnsafeShortHalfCpu-8 50 28360826 ns/op 77 Benchmark_FirstUnequal8/UnsafeShortAllCpu-8 100 24272646 ns/op 78 Benchmark_FirstUnequal8/UnsafeLong1Cpu-8 2 654616638 ns/op 79 Benchmark_FirstUnequal8/UnsafeLongHalfCpu-8 3 499705618 ns/op 80 Benchmark_FirstUnequal8/UnsafeLongAllCpu-8 3 477807746 ns/op 81 Benchmark_FirstUnequal8/SIMDShort1Cpu-8 10 114335599 ns/op 82 Benchmark_FirstUnequal8/SIMDShortHalfCpu-8 50 30189426 ns/op 83 Benchmark_FirstUnequal8/SIMDShortAllCpu-8 50 26847829 ns/op 84 Benchmark_FirstUnequal8/SIMDLong1Cpu-8 2 735662635 ns/op 85 Benchmark_FirstUnequal8/SIMDLongHalfCpu-8 3 488191229 ns/op 86 Benchmark_FirstUnequal8/SIMDLongAllCpu-8 3 480315740 ns/op 87 Benchmark_FirstUnequal8/SlowShort1Cpu-8 2 608618106 ns/op 88 Benchmark_FirstUnequal8/SlowShortHalfCpu-8 10 166658947 ns/op 89 Benchmark_FirstUnequal8/SlowShortAllCpu-8 10 154372585 ns/op 90 Benchmark_FirstUnequal8/SlowLong1Cpu-8 1 3883830889 ns/op 91 Benchmark_FirstUnequal8/SlowLongHalfCpu-8 1 1080159614 ns/op 92 Benchmark_FirstUnequal8/SlowLongAllCpu-8 1 1046794857 ns/op 93 94 Notes: There is practically no speed penalty relative to bytes.Compare(). 95 */ 96 97 func firstUnequal8UnsafeSubtask(dst, src []byte, nIter int) int { 98 curPos := 0 99 endPos := len(dst) 100 for iter := 0; iter < nIter; iter++ { 101 if curPos >= endPos { 102 curPos = 0 103 } 104 curPos = simd.FirstUnequal8Unsafe(dst, src, curPos) 105 curPos++ 106 } 107 return curPos 108 } 109 110 func firstUnequal8SimdSubtask(dst, src []byte, nIter int) int { 111 curPos := 0 112 endPos := len(dst) 113 for iter := 0; iter < nIter; iter++ { 114 if curPos >= endPos { 115 curPos = 0 116 } 117 curPos = simd.FirstUnequal8(dst, src, curPos) 118 curPos++ 119 } 120 return curPos 121 } 122 123 func firstUnequal8SlowSubtask(dst, src []byte, nIter int) int { 124 curPos := 0 125 endPos := len(dst) 126 for iter := 0; iter < nIter; iter++ { 127 if curPos >= endPos { 128 curPos = 0 129 } 130 curPos = firstUnequal8Slow(dst, src, curPos) 131 curPos++ 132 } 133 return curPos 134 } 135 136 func Benchmark_FirstUnequal8(b *testing.B) { 137 funcs := []taggedMultiBenchFunc{ 138 { 139 f: firstUnequal8UnsafeSubtask, 140 tag: "Unsafe", 141 }, 142 { 143 f: firstUnequal8SimdSubtask, 144 tag: "SIMD", 145 }, 146 { 147 f: firstUnequal8SlowSubtask, 148 tag: "Slow", 149 }, 150 } 151 // Necessary to customize the initialization functions; the default setting 152 // of src = {0, 3, 6, 9, ...} and dst = {0, 0, 0, 0, ...} results in too many 153 // mismatches for a realistic benchmark. 154 opts := multiBenchmarkOpts{ 155 dstInit: func(src []byte) { 156 src[len(src)/2] = 128 157 }, 158 srcInit: bytesInit0, 159 } 160 for _, f := range funcs { 161 multiBenchmark(f.f, f.tag+"Short", 150, 150, 9999999, b, opts) 162 multiBenchmark(f.f, f.tag+"Long", 249250621, 249250621, 50, b, opts) 163 } 164 } 165 166 func firstGreater8Slow(arg []byte, val byte, startPos int) int { 167 // Slow, but straightforward-to-verify implementation. 168 endPos := len(arg) 169 for pos := startPos; pos < endPos; pos++ { 170 if arg[pos] > val { 171 return pos 172 } 173 } 174 return endPos 175 } 176 177 func TestFirstGreater(t *testing.T) { 178 // Generate random strings and random int8s to compare against, and verify 179 // that iterating through the strings with firstGreater8Slow generates 180 // the same sequences of indexes as simd.FirstGreater8{Unsafe}. 181 maxSize := 500 182 nIter := 200 183 mainArr := simd.MakeUnsafe(maxSize) 184 for iter := 0; iter < nIter; iter++ { 185 sliceStart := rand.Intn(maxSize) 186 sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart) 187 mainSlice := mainArr[sliceStart:sliceEnd] 188 for ii := range mainSlice { 189 mainSlice[ii] = byte(rand.Intn(256)) 190 } 191 cmpVal := byte(rand.Intn(256)) 192 sliceSize := sliceEnd - sliceStart 193 curPos := sliceStart 194 for { 195 unsafePos := simd.FirstGreater8Unsafe(mainSlice, cmpVal, curPos) 196 safePos := simd.FirstGreater8(mainSlice, cmpVal, curPos) 197 curPos = firstGreater8Slow(mainSlice, cmpVal, curPos) 198 if curPos != safePos { 199 t.Fatal("Mismatched FirstGreater8 result.") 200 } 201 if curPos != unsafePos { 202 t.Fatal("Mismatched FirstGreater8Unsafe result.") 203 } 204 curPos++ 205 if curPos >= sliceSize { 206 break 207 } 208 } 209 } 210 } 211 212 func firstLeq8Slow(arg []byte, val byte, startPos int) int { 213 // Slow, but straightforward-to-verify implementation. 214 endPos := len(arg) 215 for pos := startPos; pos < endPos; pos++ { 216 if arg[pos] <= val { 217 return pos 218 } 219 } 220 return endPos 221 } 222 223 func TestFirstLeq8(t *testing.T) { 224 // Generate random strings and random int8s to compare against, and verify 225 // that iterating through the strings with firstLeq8Slow generates the 226 // same sequences of indexes as simd.FirstLeq8{Unsafe}. 227 maxSize := 500 228 nIter := 200 229 mainArr := simd.MakeUnsafe(maxSize) 230 for iter := 0; iter < nIter; iter++ { 231 sliceStart := rand.Intn(maxSize) 232 sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart) 233 mainSlice := mainArr[sliceStart:sliceEnd] 234 for ii := range mainSlice { 235 mainSlice[ii] = byte(rand.Intn(256)) 236 } 237 cmpVal := byte(rand.Intn(256)) 238 sliceSize := sliceEnd - sliceStart 239 curPos := sliceStart 240 for { 241 unsafePos := simd.FirstLeq8Unsafe(mainSlice, cmpVal, curPos) 242 safePos := simd.FirstLeq8(mainSlice, cmpVal, curPos) 243 curPos = firstLeq8Slow(mainSlice, cmpVal, curPos) 244 if curPos != safePos { 245 t.Fatal("Mismatched FirstLeq8 result.") 246 } 247 if curPos != unsafePos { 248 t.Fatal("Mismatched FirstLeq8Unsafe result.") 249 } 250 curPos++ 251 if curPos >= sliceSize { 252 break 253 } 254 } 255 } 256 } 257 258 /* 259 Benchmark results: 260 MacBook Pro (15-inch, 2016) 261 2.7 GHz Intel Core i7, 16 GB 2133 MHz LPDDR3 262 263 Benchmark_FirstLeq8/SIMDShort1Cpu-8 20 87235782 ns/op 264 Benchmark_FirstLeq8/SIMDShortHalfCpu-8 50 23864936 ns/op 265 Benchmark_FirstLeq8/SIMDShortAllCpu-8 100 21211734 ns/op 266 Benchmark_FirstLeq8/SIMDLong1Cpu-8 3 402996726 ns/op 267 Benchmark_FirstLeq8/SIMDLongHalfCpu-8 5 245066128 ns/op 268 Benchmark_FirstLeq8/SIMDLongAllCpu-8 5 231557103 ns/op 269 Benchmark_FirstLeq8/SlowShort1Cpu-8 2 549800977 ns/op 270 Benchmark_FirstLeq8/SlowShortHalfCpu-8 10 152074140 ns/op 271 Benchmark_FirstLeq8/SlowShortAllCpu-8 10 142355855 ns/op 272 Benchmark_FirstLeq8/SlowLong1Cpu-8 1 3687059961 ns/op 273 Benchmark_FirstLeq8/SlowLongHalfCpu-8 1 1030280464 ns/op 274 Benchmark_FirstLeq8/SlowLongAllCpu-8 1 1019364554 ns/op 275 */ 276 277 func firstLeq8SimdSubtask(dst, src []byte, nIter int) int { 278 curPos := 0 279 endPos := len(src) 280 for iter := 0; iter < nIter; iter++ { 281 if curPos >= endPos { 282 curPos = 0 283 } 284 curPos = simd.FirstLeq8(src, 0, curPos) 285 curPos++ 286 } 287 return curPos 288 } 289 290 func firstLeq8SlowSubtask(dst, src []byte, nIter int) int { 291 curPos := 0 292 endPos := len(src) 293 for iter := 0; iter < nIter; iter++ { 294 if curPos >= endPos { 295 curPos = 0 296 } 297 curPos = firstLeq8Slow(src, 0, curPos) 298 curPos++ 299 } 300 return curPos 301 } 302 303 func Benchmark_FirstLeq8(b *testing.B) { 304 funcs := []taggedMultiBenchFunc{ 305 { 306 f: firstLeq8SimdSubtask, 307 tag: "SIMD", 308 }, 309 { 310 f: firstLeq8SlowSubtask, 311 tag: "Slow", 312 }, 313 } 314 opts := multiBenchmarkOpts{ 315 srcInit: func(src []byte) { 316 simd.Memset8(src, 255) 317 // Just change one byte in the middle. 318 src[len(src)/2] = 128 319 }, 320 } 321 for _, f := range funcs { 322 multiBenchmark(f.f, f.tag+"Short", 0, 150, 9999999, b, opts) 323 multiBenchmark(f.f, f.tag+"Long", 0, 249250621, 50, b, opts) 324 } 325 }