github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/bitset/bitset_test.go (about)

     1  // Copyright 2018 GRAIL, Inc.  All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package bitset_test
     6  
     7  import (
     8  	"math/bits"
     9  	"math/rand"
    10  	"runtime"
    11  	"testing"
    12  
    13  	gbitset "github.com/Schaudge/grailbase/bitset"
    14  	"github.com/grailbio/testutil/expect"
    15  	"github.com/willf/bitset"
    16  )
    17  
    18  func TestSetAndClearIntervals(t *testing.T) {
    19  	rand.Seed(1)
    20  	nTrialsPerNWord := 100
    21  	for nWord := 1; nWord <= 9; nWord++ {
    22  		bs := make([]uintptr, nWord)
    23  		nBits := nWord * gbitset.BitsPerWord
    24  		expectedBits := make([]bool, nBits)
    25  		for trialIdx := 0; trialIdx < nTrialsPerNWord; trialIdx++ {
    26  			// We perform a bunch of random SetInterval and ClearInterval operations
    27  			// on a []uintptr bitset, use for-loops to update the simpler
    28  			// expectedBits slice to what we expect, and use gbitset.Test to verify
    29  			// semantic equivalence.
    30  			startIdx := rand.Intn(nBits)
    31  			limitIdx := startIdx + rand.Intn(nBits-startIdx)
    32  			gbitset.SetInterval(bs, startIdx, limitIdx)
    33  			for i := startIdx; i < limitIdx; i++ {
    34  				expectedBits[i] = true
    35  			}
    36  			for i := 0; i < nBits; i++ {
    37  				expect.EQ(t, gbitset.Test(bs, i), expectedBits[i])
    38  			}
    39  			startIdx = rand.Intn(nBits)
    40  			limitIdx = startIdx + rand.Intn(nBits-startIdx)
    41  			gbitset.ClearInterval(bs, startIdx, limitIdx)
    42  			for i := startIdx; i < limitIdx; i++ {
    43  				expectedBits[i] = false
    44  			}
    45  			for i := 0; i < nBits; i++ {
    46  				expect.EQ(t, gbitset.Test(bs, i), expectedBits[i])
    47  			}
    48  		}
    49  	}
    50  }
    51  
    52  /*
    53  Initial benchmark results:
    54    MacBook Pro (15-inch, 2016)
    55    2.7 GHz Intel Core i7, 16 GB 2133 MHz LPDDR3
    56  
    57  Benchmark_NonzeroWordLowDensity1-8               5         318053789 ns/op
    58  Benchmark_NonzeroWordLowDensity4-8              20          92268360 ns/op
    59  Benchmark_NonzeroWordLowDensityMax-8            20          75435109 ns/op
    60  Benchmark_NonzeroWordHighDensity1-8              5         338889681 ns/op
    61  Benchmark_NonzeroWordHighDensity4-8             20          93980434 ns/op
    62  Benchmark_NonzeroWordHighDensityMax-8           20          85158994 ns/op
    63  
    64  For comparison, using github.com/willf/bitset.NextSet():
    65  Benchmark_NonzeroWordLowDensity1-8               5         295363742 ns/op
    66  Benchmark_NonzeroWordLowDensity4-8              20          78013901 ns/op
    67  Benchmark_NonzeroWordLowDensityMax-8            20          73992701 ns/op
    68  Benchmark_NonzeroWordHighDensity1-8              2         600711815 ns/op
    69  Benchmark_NonzeroWordHighDensity4-8             10         156621467 ns/op
    70  Benchmark_NonzeroWordHighDensityMax-8           10         109333530 ns/op
    71  
    72  github.com/willf/bitset.NextSetMany():
    73  Benchmark_NonzeroWordLowDensity1-8               3         362510428 ns/op
    74  Benchmark_NonzeroWordLowDensity4-8              20          98390731 ns/op
    75  Benchmark_NonzeroWordLowDensityMax-8            20          89888478 ns/op
    76  Benchmark_NonzeroWordHighDensity1-8             10         202346572 ns/op
    77  Benchmark_NonzeroWordHighDensity4-8             20          57818033 ns/op
    78  Benchmark_NonzeroWordHighDensityMax-8           30          49601154 ns/op
    79  
    80  Manual inlining:
    81  Benchmark_NonzeroWordLowDensity1-8              20          66941143 ns/op
    82  Benchmark_NonzeroWordLowDensity4-8             100          17791558 ns/op
    83  Benchmark_NonzeroWordLowDensityMax-8           100          17825100 ns/op
    84  Benchmark_NonzeroWordHighDensity1-8             20         101415506 ns/op
    85  Benchmark_NonzeroWordHighDensity4-8             50          27927527 ns/op
    86  Benchmark_NonzeroWordHighDensityMax-8           50          23895500 ns/op
    87  */
    88  
    89  func nonzeroWordSubtask(dst, src []uintptr, nIter int) int {
    90  	tot := 0
    91  	nzwPop := 0
    92  	for _, bitWord := range src {
    93  		if bitWord != 0 {
    94  			nzwPop++
    95  		}
    96  	}
    97  	for iter := 0; iter < nIter; iter++ {
    98  		copy(dst, src)
    99  		for s, i := gbitset.NewNonzeroWordScanner(dst, nzwPop); i != -1; i = s.Next() {
   100  			tot += i
   101  		}
   102  	}
   103  	return tot
   104  }
   105  
   106  func willfNextSetSubtask(dst, src []uintptr, nIter int) int {
   107  	nBits := uint(len(src) * gbitset.BitsPerWord)
   108  	bsetSrc := bitset.New(nBits)
   109  	for i := uint(0); i != nBits; i++ {
   110  		if gbitset.Test(src, int(i)) {
   111  			bsetSrc.Set(i)
   112  		}
   113  	}
   114  	bsetDst := bitset.New(nBits)
   115  
   116  	tot := uint(0)
   117  	for iter := 0; iter < nIter; iter++ {
   118  		bsetSrc.Copy(bsetDst)
   119  		for i, e := bsetDst.NextSet(0); e; i, e = bsetDst.NextSet(i + 1) {
   120  			tot += i
   121  		}
   122  		bsetDst.ClearAll()
   123  	}
   124  	return int(tot)
   125  }
   126  
   127  func willfNextSetManySubtask(dst, src []uintptr, nIter int) int {
   128  	nBits := uint(len(src) * gbitset.BitsPerWord)
   129  	bsetSrc := bitset.New(nBits)
   130  	for i := uint(0); i != nBits; i++ {
   131  		if gbitset.Test(src, int(i)) {
   132  			bsetSrc.Set(i)
   133  		}
   134  	}
   135  	bsetDst := bitset.New(nBits)
   136  
   137  	tot := uint(0)
   138  	// tried other buffer sizes, 256 seems to be a sweet spot
   139  	var buffer [256]uint
   140  	for iter := 0; iter < nIter; iter++ {
   141  		bsetSrc.Copy(bsetDst)
   142  		for i, buf := bsetDst.NextSetMany(0, buffer[:]); len(buf) > 0; i, buf = bsetDst.NextSetMany(i+1, buf) {
   143  			for j := range buf {
   144  				tot += buf[j]
   145  			}
   146  		}
   147  		bsetDst.ClearAll()
   148  	}
   149  	return int(tot)
   150  }
   151  
   152  func bitsetManualInlineSubtask(dst, src []uintptr, nIter int) int {
   153  	tot := 0
   154  	nzwPop := 0
   155  	for _, bitWord := range src {
   156  		if bitWord != 0 {
   157  			nzwPop++
   158  		}
   159  	}
   160  	for iter := 0; iter < nIter; iter++ {
   161  		copy(dst, src)
   162  		nNonzeroWord := nzwPop
   163  		for i, bitWord := range dst {
   164  			if bitWord != 0 {
   165  				bitIdxOffset := i * gbitset.BitsPerWord
   166  				for {
   167  					tot += bits.TrailingZeros64(uint64(bitWord)) + bitIdxOffset
   168  					bitWord &= bitWord - 1
   169  					if bitWord == 0 {
   170  						break
   171  					}
   172  				}
   173  				dst[i] = 0
   174  			}
   175  			nNonzeroWord--
   176  			if nNonzeroWord == 0 {
   177  				break
   178  			}
   179  		}
   180  	}
   181  	return tot
   182  }
   183  
   184  func nonzeroWordSubtaskFuture(dst, src []uintptr, nIter int) chan int {
   185  	future := make(chan int)
   186  	// go func() { future <- nonzeroWordSubtask(dst, src, nIter) }()
   187  	// go func() { future <- willfNextSetSubtask(dst, src, nIter) }()
   188  	// go func() { future <- willfNextSetManySubtask(dst, src, nIter) }()
   189  	go func() { future <- bitsetManualInlineSubtask(dst, src, nIter) }()
   190  	return future
   191  }
   192  
   193  func multiNonzeroWord(dsts, srcs [][]uintptr, cpus int, nJob int) {
   194  	sumFutures := make([]chan int, cpus)
   195  	shardSizeBase := nJob / cpus
   196  	shardRemainder := nJob - shardSizeBase*cpus
   197  	shardSizeP1 := shardSizeBase + 1
   198  	var taskIdx int
   199  	for ; taskIdx < shardRemainder; taskIdx++ {
   200  		sumFutures[taskIdx] = nonzeroWordSubtaskFuture(dsts[taskIdx], srcs[taskIdx], shardSizeP1)
   201  	}
   202  	for ; taskIdx < cpus; taskIdx++ {
   203  		sumFutures[taskIdx] = nonzeroWordSubtaskFuture(dsts[taskIdx], srcs[taskIdx], shardSizeBase)
   204  	}
   205  	var sum int
   206  	for taskIdx = 0; taskIdx < cpus; taskIdx++ {
   207  		sum += <-sumFutures[taskIdx]
   208  	}
   209  }
   210  
   211  func benchmarkNonzeroWord(cpus, nWord, spacing, nJob int, b *testing.B) {
   212  	if cpus > runtime.NumCPU() {
   213  		b.Skipf("only have %v cpus", runtime.NumCPU())
   214  	}
   215  
   216  	dstSlices := make([][]uintptr, cpus)
   217  	srcSlices := make([][]uintptr, cpus)
   218  	nBits := nWord * gbitset.BitsPerWord
   219  	for ii := range dstSlices {
   220  		// 7 extra capacity to prevent false sharing.
   221  		newDst := make([]uintptr, nWord, nWord+7)
   222  		newSrc := make([]uintptr, nWord, nWord+7)
   223  		for i := spacing - 1; i < nBits; i += spacing {
   224  			gbitset.Set(newSrc, i)
   225  		}
   226  		dstSlices[ii] = newDst
   227  		srcSlices[ii] = newSrc
   228  	}
   229  	for i := 0; i < b.N; i++ {
   230  		multiNonzeroWord(dstSlices, srcSlices, cpus, nJob)
   231  	}
   232  }
   233  
   234  func Benchmark_NonzeroWordLowDensity1(b *testing.B) {
   235  	benchmarkNonzeroWord(1, 16, 369, 9999999, b)
   236  }
   237  
   238  func Benchmark_NonzeroWordLowDensity4(b *testing.B) {
   239  	benchmarkNonzeroWord(4, 16, 369, 9999999, b)
   240  }
   241  
   242  func Benchmark_NonzeroWordLowDensityMax(b *testing.B) {
   243  	benchmarkNonzeroWord(runtime.NumCPU(), 16, 369, 9999999, b)
   244  }
   245  
   246  func Benchmark_NonzeroWordHighDensity1(b *testing.B) {
   247  	benchmarkNonzeroWord(1, 16, 1, 99999, b)
   248  }
   249  
   250  func Benchmark_NonzeroWordHighDensity4(b *testing.B) {
   251  	benchmarkNonzeroWord(4, 16, 1, 99999, b)
   252  }
   253  
   254  func Benchmark_NonzeroWordHighDensityMax(b *testing.B) {
   255  	benchmarkNonzeroWord(runtime.NumCPU(), 16, 1, 99999, b)
   256  }
   257  
   258  func naiveBitScanAdder(dst []uintptr) int {
   259  	nBits := len(dst) * gbitset.BitsPerWord
   260  	tot := 0
   261  	for i := 0; i != nBits; i++ {
   262  		if gbitset.Test(dst, i) {
   263  			tot += i
   264  		}
   265  	}
   266  	return tot
   267  }
   268  
   269  func TestNonzeroWord(t *testing.T) {
   270  	maxSize := 500
   271  	nIter := 200
   272  	srcArr := make([]uintptr, maxSize)
   273  	dstArr := make([]uintptr, maxSize)
   274  	for iter := 0; iter < nIter; iter++ {
   275  		sliceStart := rand.Intn(maxSize)
   276  		sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart)
   277  		srcSlice := srcArr[sliceStart:sliceEnd]
   278  		dstSlice := dstArr[sliceStart:sliceEnd]
   279  
   280  		for i := range srcSlice {
   281  			srcSlice[i] = uintptr(rand.Uint64())
   282  		}
   283  		copy(dstSlice, srcSlice)
   284  		nzwPop := 0
   285  		for _, bitWord := range dstSlice {
   286  			if bitWord != 0 {
   287  				nzwPop++
   288  			}
   289  		}
   290  		if nzwPop == 0 {
   291  			continue
   292  		}
   293  
   294  		tot1 := 0
   295  		for s, i := gbitset.NewNonzeroWordScanner(dstSlice, nzwPop); i != -1; i = s.Next() {
   296  			tot1 += i
   297  		}
   298  		tot2 := naiveBitScanAdder(srcSlice)
   299  		if tot1 != tot2 {
   300  			t.Fatal("Mismatched bit-index sums.")
   301  		}
   302  		for _, bitWord := range dstSlice {
   303  			if bitWord != 0 {
   304  				t.Fatal("NonzeroWordScanner failed to clear all words.")
   305  			}
   306  		}
   307  	}
   308  }