
     1  // Copyright 2021 GRAIL, Inc.  All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     5  package simd_test
     7  import (
     8  	"math"
     9  	"math/rand"
    10  	"testing"
    12  	""
    13  	""
    14  )
    16  func findNaNOrInf64Standard(data []float64) int {
    17  	for i, x := range data {
    18  		if math.IsNaN(x) || (x > math.MaxFloat64) || (x < -math.MaxFloat64) {
    19  			return i
    20  		}
    21  	}
    22  	return -1
    23  }
    25  func getPossiblyNaNOrInfFloat64(rate float64) float64 {
    26  	var x float64
    27  	if rand.Float64() < rate {
    28  		r := rand.Intn(3)
    29  		if r == 0 {
    30  			x = math.NaN()
    31  		} else {
    32  			// -inf if r == 1, +inf if r == 2.
    33  			x = math.Inf(r - 2)
    34  		}
    35  	} else {
    36  		// Exponentially-distributed random number in
    37  		// [-math.MaxFloat64, math.MaxFloat64].
    38  		x = rand.ExpFloat64()
    39  		if rand.Intn(2) != 0 {
    40  			x = -x
    41  		}
    42  	}
    43  	return x
    44  }
    46  func TestFindNaNOrInf(t *testing.T) {
    47  	// Exhausively test all first-NaN/inf positions for sizes in 0..32.
    48  	for size := 0; size <= 32; size++ {
    49  		slice := make([]float64, size)
    50  		got := simd.FindNaNOrInf64(slice)
    51  		want := findNaNOrInf64Standard(slice)
    52  		expect.EQ(t, got, want)
    53  		expect.EQ(t, got, -1)
    55  		for target := size - 1; target >= 0; target-- {
    56  			slice[target] = math.Inf(1)
    57  			// Randomize everything after this position, maximizing entropy.
    58  			for i := target + 1; i < size; i++ {
    59  				slice[i] = getPossiblyNaNOrInfFloat64(0.5)
    60  			}
    61  			got = simd.FindNaNOrInf64(slice)
    62  			want = findNaNOrInf64Standard(slice)
    63  			expect.EQ(t, got, want)
    64  			expect.EQ(t, got, target)
    65  		}
    66  		for i := range slice {
    67  			slice[i] = 0.0
    68  		}
    69  		for target := size - 1; target >= 0; target-- {
    70  			slice[target] = math.NaN()
    71  			for i := target + 1; i < size; i++ {
    72  				slice[i] = getPossiblyNaNOrInfFloat64(0.5)
    73  			}
    74  			got = simd.FindNaNOrInf64(slice)
    75  			want = findNaNOrInf64Standard(slice)
    76  			expect.EQ(t, got, want)
    77  			expect.EQ(t, got, target)
    78  		}
    79  	}
    80  	// Random test for larger sizes.
    81  	maxSize := 30000
    82  	nIter := 200
    83  	rand.Seed(1)
    84  	for iter := 0; iter < nIter; iter++ {
    85  		size := 1 + rand.Intn(maxSize)
    86  		rate := rand.Float64()
    87  		slice := make([]float64, size)
    88  		for i := range slice {
    89  			slice[i] = getPossiblyNaNOrInfFloat64(rate)
    90  		}
    92  		for pos := 0; ; {
    93  			got := simd.FindNaNOrInf64(slice[pos:])
    94  			want := findNaNOrInf64Standard(slice[pos:])
    95  			expect.EQ(t, got, want)
    96  			if got == -1 {
    97  				break
    98  			}
    99  			pos += got + 1
   100  		}
   101  	}
   102  }
   104  type float64Args struct {
   105  	main []float64
   106  }
   108  func findNaNOrInfSimdSubtask(args interface{}, nIter int) int {
   109  	a := args.(float64Args)
   110  	slice := a.main
   111  	sum := 0
   112  	pos := 0
   113  	for iter := 0; iter < nIter; iter++ {
   114  		got := simd.FindNaNOrInf64(slice[pos:])
   115  		sum += got
   116  		if got == -1 {
   117  			pos = 0
   118  		} else {
   119  			pos += got + 1
   120  		}
   121  	}
   122  	return sum
   123  }
   125  func findNaNOrInf64Bitwise(data []float64) int {
   126  	for i, x := range data {
   127  		// Extract the exponent bits, and check if they're all set: that (and only
   128  		// that) corresponds to NaN/inf.
   129  		// Interestingly, the performance of this idiom degrades significantly,
   130  		// relative to
   131  		//   "math.IsNaN(x) || x > math.MaxFloat64 || x < -math.MaxFloat64",
   132  		// if x is interpreted as a float64 anywhere in this loop.
   133  		if (math.Float64bits(x) & (0x7ff << 52)) == (0x7ff << 52) {
   134  			return i
   135  		}
   136  	}
   137  	return -1
   138  }
   140  func findNaNOrInfBitwiseSubtask(args interface{}, nIter int) int {
   141  	a := args.(float64Args)
   142  	slice := a.main
   143  	sum := 0
   144  	pos := 0
   145  	for iter := 0; iter < nIter; iter++ {
   146  		got := findNaNOrInf64Bitwise(slice[pos:])
   147  		sum += got
   148  		if got == -1 {
   149  			pos = 0
   150  		} else {
   151  			pos += got + 1
   152  		}
   153  	}
   154  	return sum
   155  }
   157  func findNaNOrInfStandardSubtask(args interface{}, nIter int) int {
   158  	a := args.(float64Args)
   159  	slice := a.main
   160  	sum := 0
   161  	pos := 0
   162  	for iter := 0; iter < nIter; iter++ {
   163  		got := findNaNOrInf64Standard(slice[pos:])
   164  		sum += got
   165  		if got == -1 {
   166  			pos = 0
   167  		} else {
   168  			pos += got + 1
   169  		}
   170  	}
   171  	return sum
   172  }
   174  // On an m5.16xlarge:
   175  //   $ bazel run //go/src/ -- -test.bench=FindNaNOrInf
   176  //   ...
   177  // Benchmark_FindNaNOrInf/SIMDLong1Cpu-64                82          14053127 ns/op
   178  // Benchmark_FindNaNOrInf/SIMDLongHalfCpu-64            960           1143599 ns/op
   179  // Benchmark_FindNaNOrInf/SIMDLongAllCpu-64            1143           1018525 ns/op
   180  // Benchmark_FindNaNOrInf/BitwiseLong1Cpu-64              8         126930287 ns/op
   181  // Benchmark_FindNaNOrInf/BitwiseLongHalfCpu-64         253           6668467 ns/op
   182  // Benchmark_FindNaNOrInf/BitwiseLongAllCpu-64          229           4679633 ns/op
   183  // Benchmark_FindNaNOrInf/StandardLong1Cpu-64             7         158318559 ns/op
   184  // Benchmark_FindNaNOrInf/StandardLongHalfCpu-64        190           6223669 ns/op
   185  // Benchmark_FindNaNOrInf/StandardLongAllCpu-64         171           6746008 ns/op
   186  // PASS
   187  func Benchmark_FindNaNOrInf(b *testing.B) {
   188  	funcs := []taggedMultiBenchVarargsFunc{
   189  		{
   190  			f:   findNaNOrInfSimdSubtask,
   191  			tag: "SIMD",
   192  		},
   193  		{
   194  			f:   findNaNOrInfBitwiseSubtask,
   195  			tag: "Bitwise",
   196  		},
   197  		{
   198  			f:   findNaNOrInfStandardSubtask,
   199  			tag: "Standard",
   200  		},
   201  	}
   202  	rand.Seed(1)
   203  	for _, f := range funcs {
   204  		multiBenchmarkVarargs(f.f, f.tag+"Long", 100000, func() interface{} {
   205  			main := make([]float64, 30000)
   206  			// Results were overly influenced by RNG if the number of NaNs/infs in
   207  			// the slice was not controlled.
   208  			for i := 0; i < 30; i++ {
   209  				for {
   210  					pos := rand.Intn(len(main))
   211  					if main[pos] != math.Inf(0) {
   212  						main[pos] = math.Inf(0)
   213  						break
   214  					}
   215  				}
   216  			}
   217  			return float64Args{
   218  				main: main,
   219  			}
   220  		}, b)
   221  	}
   222  }