github.com/grailbio/base@v0.0.11/bitset/bitset_test.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache-2.0 3 // license that can be found in the LICENSE file. 4 5 package bitset_test 6 7 import ( 8 "math/bits" 9 "math/rand" 10 "runtime" 11 "testing" 12 13 gbitset "github.com/grailbio/base/bitset" 14 "github.com/grailbio/testutil/expect" 15 "github.com/willf/bitset" 16 ) 17 18 func TestSetAndClearIntervals(t *testing.T) { 19 rand.Seed(1) 20 nTrialsPerNWord := 100 21 for nWord := 1; nWord <= 9; nWord++ { 22 bs := make([]uintptr, nWord) 23 nBits := nWord * gbitset.BitsPerWord 24 expectedBits := make([]bool, nBits) 25 for trialIdx := 0; trialIdx < nTrialsPerNWord; trialIdx++ { 26 // We perform a bunch of random SetInterval and ClearInterval operations 27 // on a []uintptr bitset, use for-loops to update the simpler 28 // expectedBits slice to what we expect, and use gbitset.Test to verify 29 // semantic equivalence. 30 startIdx := rand.Intn(nBits) 31 limitIdx := startIdx + rand.Intn(nBits-startIdx) 32 gbitset.SetInterval(bs, startIdx, limitIdx) 33 for i := startIdx; i < limitIdx; i++ { 34 expectedBits[i] = true 35 } 36 for i := 0; i < nBits; i++ { 37 expect.EQ(t, gbitset.Test(bs, i), expectedBits[i]) 38 } 39 startIdx = rand.Intn(nBits) 40 limitIdx = startIdx + rand.Intn(nBits-startIdx) 41 gbitset.ClearInterval(bs, startIdx, limitIdx) 42 for i := startIdx; i < limitIdx; i++ { 43 expectedBits[i] = false 44 } 45 for i := 0; i < nBits; i++ { 46 expect.EQ(t, gbitset.Test(bs, i), expectedBits[i]) 47 } 48 } 49 } 50 } 51 52 /* 53 Initial benchmark results: 54 MacBook Pro (15-inch, 2016) 55 2.7 GHz Intel Core i7, 16 GB 2133 MHz LPDDR3 56 57 Benchmark_NonzeroWordLowDensity1-8 5 318053789 ns/op 58 Benchmark_NonzeroWordLowDensity4-8 20 92268360 ns/op 59 Benchmark_NonzeroWordLowDensityMax-8 20 75435109 ns/op 60 Benchmark_NonzeroWordHighDensity1-8 5 338889681 ns/op 61 Benchmark_NonzeroWordHighDensity4-8 20 93980434 ns/op 62 Benchmark_NonzeroWordHighDensityMax-8 20 85158994 ns/op 63 64 For comparison, using github.com/willf/bitset.NextSet(): 65 Benchmark_NonzeroWordLowDensity1-8 5 295363742 ns/op 66 Benchmark_NonzeroWordLowDensity4-8 20 78013901 ns/op 67 Benchmark_NonzeroWordLowDensityMax-8 20 73992701 ns/op 68 Benchmark_NonzeroWordHighDensity1-8 2 600711815 ns/op 69 Benchmark_NonzeroWordHighDensity4-8 10 156621467 ns/op 70 Benchmark_NonzeroWordHighDensityMax-8 10 109333530 ns/op 71 72 github.com/willf/bitset.NextSetMany(): 73 Benchmark_NonzeroWordLowDensity1-8 3 362510428 ns/op 74 Benchmark_NonzeroWordLowDensity4-8 20 98390731 ns/op 75 Benchmark_NonzeroWordLowDensityMax-8 20 89888478 ns/op 76 Benchmark_NonzeroWordHighDensity1-8 10 202346572 ns/op 77 Benchmark_NonzeroWordHighDensity4-8 20 57818033 ns/op 78 Benchmark_NonzeroWordHighDensityMax-8 30 49601154 ns/op 79 80 Manual inlining: 81 Benchmark_NonzeroWordLowDensity1-8 20 66941143 ns/op 82 Benchmark_NonzeroWordLowDensity4-8 100 17791558 ns/op 83 Benchmark_NonzeroWordLowDensityMax-8 100 17825100 ns/op 84 Benchmark_NonzeroWordHighDensity1-8 20 101415506 ns/op 85 Benchmark_NonzeroWordHighDensity4-8 50 27927527 ns/op 86 Benchmark_NonzeroWordHighDensityMax-8 50 23895500 ns/op 87 */ 88 89 func nonzeroWordSubtask(dst, src []uintptr, nIter int) int { 90 tot := 0 91 nzwPop := 0 92 for _, bitWord := range src { 93 if bitWord != 0 { 94 nzwPop++ 95 } 96 } 97 for iter := 0; iter < nIter; iter++ { 98 copy(dst, src) 99 for s, i := gbitset.NewNonzeroWordScanner(dst, nzwPop); i != -1; i = s.Next() { 100 tot += i 101 } 102 } 103 return tot 104 } 105 106 func willfNextSetSubtask(dst, src []uintptr, nIter int) int { 107 nBits := uint(len(src) * gbitset.BitsPerWord) 108 bsetSrc := bitset.New(nBits) 109 for i := uint(0); i != nBits; i++ { 110 if gbitset.Test(src, int(i)) { 111 bsetSrc.Set(i) 112 } 113 } 114 bsetDst := bitset.New(nBits) 115 116 tot := uint(0) 117 for iter := 0; iter < nIter; iter++ { 118 bsetSrc.Copy(bsetDst) 119 for i, e := bsetDst.NextSet(0); e; i, e = bsetDst.NextSet(i + 1) { 120 tot += i 121 } 122 bsetDst.ClearAll() 123 } 124 return int(tot) 125 } 126 127 func willfNextSetManySubtask(dst, src []uintptr, nIter int) int { 128 nBits := uint(len(src) * gbitset.BitsPerWord) 129 bsetSrc := bitset.New(nBits) 130 for i := uint(0); i != nBits; i++ { 131 if gbitset.Test(src, int(i)) { 132 bsetSrc.Set(i) 133 } 134 } 135 bsetDst := bitset.New(nBits) 136 137 tot := uint(0) 138 // tried other buffer sizes, 256 seems to be a sweet spot 139 var buffer [256]uint 140 for iter := 0; iter < nIter; iter++ { 141 bsetSrc.Copy(bsetDst) 142 for i, buf := bsetDst.NextSetMany(0, buffer[:]); len(buf) > 0; i, buf = bsetDst.NextSetMany(i+1, buf) { 143 for j := range buf { 144 tot += buf[j] 145 } 146 } 147 bsetDst.ClearAll() 148 } 149 return int(tot) 150 } 151 152 func bitsetManualInlineSubtask(dst, src []uintptr, nIter int) int { 153 tot := 0 154 nzwPop := 0 155 for _, bitWord := range src { 156 if bitWord != 0 { 157 nzwPop++ 158 } 159 } 160 for iter := 0; iter < nIter; iter++ { 161 copy(dst, src) 162 nNonzeroWord := nzwPop 163 for i, bitWord := range dst { 164 if bitWord != 0 { 165 bitIdxOffset := i * gbitset.BitsPerWord 166 for { 167 tot += bits.TrailingZeros64(uint64(bitWord)) + bitIdxOffset 168 bitWord &= bitWord - 1 169 if bitWord == 0 { 170 break 171 } 172 } 173 dst[i] = 0 174 } 175 nNonzeroWord-- 176 if nNonzeroWord == 0 { 177 break 178 } 179 } 180 } 181 return tot 182 } 183 184 func nonzeroWordSubtaskFuture(dst, src []uintptr, nIter int) chan int { 185 future := make(chan int) 186 // go func() { future <- nonzeroWordSubtask(dst, src, nIter) }() 187 // go func() { future <- willfNextSetSubtask(dst, src, nIter) }() 188 // go func() { future <- willfNextSetManySubtask(dst, src, nIter) }() 189 go func() { future <- bitsetManualInlineSubtask(dst, src, nIter) }() 190 return future 191 } 192 193 func multiNonzeroWord(dsts, srcs [][]uintptr, cpus int, nJob int) { 194 sumFutures := make([]chan int, cpus) 195 shardSizeBase := nJob / cpus 196 shardRemainder := nJob - shardSizeBase*cpus 197 shardSizeP1 := shardSizeBase + 1 198 var taskIdx int 199 for ; taskIdx < shardRemainder; taskIdx++ { 200 sumFutures[taskIdx] = nonzeroWordSubtaskFuture(dsts[taskIdx], srcs[taskIdx], shardSizeP1) 201 } 202 for ; taskIdx < cpus; taskIdx++ { 203 sumFutures[taskIdx] = nonzeroWordSubtaskFuture(dsts[taskIdx], srcs[taskIdx], shardSizeBase) 204 } 205 var sum int 206 for taskIdx = 0; taskIdx < cpus; taskIdx++ { 207 sum += <-sumFutures[taskIdx] 208 } 209 } 210 211 func benchmarkNonzeroWord(cpus, nWord, spacing, nJob int, b *testing.B) { 212 if cpus > runtime.NumCPU() { 213 b.Skipf("only have %v cpus", runtime.NumCPU()) 214 } 215 216 dstSlices := make([][]uintptr, cpus) 217 srcSlices := make([][]uintptr, cpus) 218 nBits := nWord * gbitset.BitsPerWord 219 for ii := range dstSlices { 220 // 7 extra capacity to prevent false sharing. 221 newDst := make([]uintptr, nWord, nWord+7) 222 newSrc := make([]uintptr, nWord, nWord+7) 223 for i := spacing - 1; i < nBits; i += spacing { 224 gbitset.Set(newSrc, i) 225 } 226 dstSlices[ii] = newDst 227 srcSlices[ii] = newSrc 228 } 229 for i := 0; i < b.N; i++ { 230 multiNonzeroWord(dstSlices, srcSlices, cpus, nJob) 231 } 232 } 233 234 func Benchmark_NonzeroWordLowDensity1(b *testing.B) { 235 benchmarkNonzeroWord(1, 16, 369, 9999999, b) 236 } 237 238 func Benchmark_NonzeroWordLowDensity4(b *testing.B) { 239 benchmarkNonzeroWord(4, 16, 369, 9999999, b) 240 } 241 242 func Benchmark_NonzeroWordLowDensityMax(b *testing.B) { 243 benchmarkNonzeroWord(runtime.NumCPU(), 16, 369, 9999999, b) 244 } 245 246 func Benchmark_NonzeroWordHighDensity1(b *testing.B) { 247 benchmarkNonzeroWord(1, 16, 1, 99999, b) 248 } 249 250 func Benchmark_NonzeroWordHighDensity4(b *testing.B) { 251 benchmarkNonzeroWord(4, 16, 1, 99999, b) 252 } 253 254 func Benchmark_NonzeroWordHighDensityMax(b *testing.B) { 255 benchmarkNonzeroWord(runtime.NumCPU(), 16, 1, 99999, b) 256 } 257 258 func naiveBitScanAdder(dst []uintptr) int { 259 nBits := len(dst) * gbitset.BitsPerWord 260 tot := 0 261 for i := 0; i != nBits; i++ { 262 if gbitset.Test(dst, i) { 263 tot += i 264 } 265 } 266 return tot 267 } 268 269 func TestNonzeroWord(t *testing.T) { 270 maxSize := 500 271 nIter := 200 272 srcArr := make([]uintptr, maxSize) 273 dstArr := make([]uintptr, maxSize) 274 for iter := 0; iter < nIter; iter++ { 275 sliceStart := rand.Intn(maxSize) 276 sliceEnd := sliceStart + rand.Intn(maxSize-sliceStart) 277 srcSlice := srcArr[sliceStart:sliceEnd] 278 dstSlice := dstArr[sliceStart:sliceEnd] 279 280 for i := range srcSlice { 281 srcSlice[i] = uintptr(rand.Uint64()) 282 } 283 copy(dstSlice, srcSlice) 284 nzwPop := 0 285 for _, bitWord := range dstSlice { 286 if bitWord != 0 { 287 nzwPop++ 288 } 289 } 290 if nzwPop == 0 { 291 continue 292 } 293 294 tot1 := 0 295 for s, i := gbitset.NewNonzeroWordScanner(dstSlice, nzwPop); i != -1; i = s.Next() { 296 tot1 += i 297 } 298 tot2 := naiveBitScanAdder(srcSlice) 299 if tot1 != tot2 { 300 t.Fatal("Mismatched bit-index sums.") 301 } 302 for _, bitWord := range dstSlice { 303 if bitWord != 0 { 304 t.Fatal("NonzeroWordScanner failed to clear all words.") 305 } 306 } 307 } 308 }