github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/bench/onescount/bench_test.go (about) 1 package onescount_test 2 3 import ( 4 "math/bits" 5 "testing" 6 ) 7 8 const m0 = 0x5555555555555555 // 01010101 ... 9 const m1 = 0x3333333333333333 // 00110011 ... 10 const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... 11 const m3 = 0x00ff00ff00ff00ff // etc. 12 const m4 = 0x0000ffff0000ffff 13 const mz = 0xffffffffffffffff 14 15 // GoOnesCount64 returns the number of one bits ("population count") in x. 16 func GoOnesCount64(x uint64) int { 17 // Implementation: Parallel summing of adjacent bits. 18 // See "Hacker's Delight", Chap. 5: Counting Bits. 19 // The following pattern shows the general approach: 20 // 21 // x = x>>1&(m0&m) + x&(m0&m) 22 // x = x>>2&(m1&m) + x&(m1&m) 23 // x = x>>4&(m2&m) + x&(m2&m) 24 // x = x>>8&(m3&m) + x&(m3&m) 25 // x = x>>16&(m4&m) + x&(m4&m) 26 // x = x>>32&(m5&m) + x&(m5&m) 27 // return int(x) 28 // 29 // Masking (& operations) can be left away when there's no 30 // danger that a field's sum will carry over into the next 31 // field: Since the result cannot be > 64, 8 bits is enough 32 // and we can ignore the masks for the shifts by 8 and up. 33 // Per "Hacker's Delight", the first line can be simplified 34 // more, but it saves at best one instruction, so we leave 35 // it alone for clarity. 36 const m = 1<<64 - 1 37 x = x>>1&(m0&m) + x&(m0&m) 38 x = x>>2&(m1&m) + x&(m1&m) 39 x = (x>>4 + x) & (m2 & m) 40 x += x >> 8 41 x += x >> 16 42 x += x >> 32 43 return int(x) & (1<<7 - 1) 44 } 45 46 func GoOnesCount256(x, y, z, w uint64) int { 47 return GoOnesCount64(x) + 48 GoOnesCount64(y) + 49 GoOnesCount64(z) + 50 GoOnesCount64(w) 51 } 52 53 func BitsOnesCount256(x, y, z, w uint64) int { 54 return bits.OnesCount64(x) + 55 bits.OnesCount64(y) + 56 bits.OnesCount64(z) + 57 bits.OnesCount64(w) 58 } 59 60 func OnesCount256(x, y, z, w uint64) int { 61 const m = 1<<64 - 1 62 63 x = x>>1&(m0&m) + x&(m0&m) 64 y = y>>1&(m0&m) + y&(m0&m) 65 z = z>>1&(m0&m) + z&(m0&m) 66 w = w>>1&(m0&m) + w&(m0&m) 67 68 x = x>>2&(m1&m) + x&(m1&m) 69 y = y>>2&(m1&m) + y&(m1&m) 70 z = z>>2&(m1&m) + z&(m1&m) 71 w = w>>2&(m1&m) + w&(m1&m) 72 73 x = (x>>4 + x) & (m2 & m) 74 y = (y>>4 + y) & (m2 & m) 75 z = (z>>4 + z) & (m2 & m) 76 w = (w>>4 + w) & (m2 & m) 77 78 q := x + y + z + w 79 80 q += q >> 8 81 q += q >> 16 82 q += q >> 32 83 84 return int(q) & (1<<9 - 1) 85 } 86 87 func OnesCount256Alt(x, y, z, w uint64) int { 88 const m = 1<<64 - 1 89 90 var q uint64 91 92 x = x>>1&(m0&m) + x&(m0&m) 93 x = x>>2&(m1&m) + x&(m1&m) 94 x = (x>>4 + x) & (m2 & m) 95 q += x 96 97 y = y>>1&(m0&m) + y&(m0&m) 98 y = y>>2&(m1&m) + y&(m1&m) 99 y = (y>>4 + y) & (m2 & m) 100 q += y 101 102 z = z>>1&(m0&m) + z&(m0&m) 103 z = z>>2&(m1&m) + z&(m1&m) 104 z = (z>>4 + z) & (m2 & m) 105 q += z 106 107 w = w>>1&(m0&m) + w&(m0&m) 108 w = w>>2&(m1&m) + w&(m1&m) 109 w = (w>>4 + w) & (m2 & m) 110 q += w 111 112 q += q >> 8 113 q += q >> 16 114 q += q >> 32 115 116 return int(q) & (1<<9 - 1) 117 } 118 119 func BenchmarkGoOnesCount256(b *testing.B) { 120 var z int 121 for i := 0; i < b.N; i++ { 122 k := uint64(i) 123 z += GoOnesCount256(k, k, k, k) 124 } 125 sink(z) 126 } 127 128 func BenchmarkOnesCount256(b *testing.B) { 129 var z int 130 for i := 0; i < b.N; i++ { 131 k := uint64(i) 132 z += OnesCount256(k, k, k, k) 133 } 134 sink(z) 135 } 136 137 func BenchmarkOnesCount256Alt(b *testing.B) { 138 var z int 139 for i := 0; i < b.N; i++ { 140 k := uint64(i) 141 z += OnesCount256Alt(k, k, k, k) 142 } 143 sink(z) 144 } 145 146 func BenchmarkBitsOnesCount256(b *testing.B) { 147 var z int 148 for i := 0; i < b.N; i++ { 149 k := uint64(i) 150 z += BitsOnesCount256(k, k, k, k) 151 } 152 sink(z) 153 } 154 155 func TestOnesCount256(t *testing.T) { 156 check := func(a, b, c, d uint64) { 157 exp := BitsOnesCount256(a, b, c, d) 158 got := OnesCount256(a, b, c, d) 159 if exp != got { 160 t.Error(a, b, c, d, "exp", exp, "got", got) 161 } 162 } 163 164 check(mz, mz, mz, mz) 165 check(mz, mz-1, mz-2, mz-3) 166 check(mz-3, mz-2, mz-1, mz) 167 } 168 169 //go:noinline 170 func sink(v int) {}