github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/hashprobe/hashprobe_test.go (about) 1 package hashprobe 2 3 import ( 4 "encoding/binary" 5 "fmt" 6 "math/rand" 7 "testing" 8 "time" 9 "unsafe" 10 ) 11 12 func TestTable32GroupSize(t *testing.T) { 13 if n := unsafe.Sizeof(table32Group{}); n != 64 { 14 t.Errorf("size of 32 bit table group is not 64 bytes: %d", n) 15 } 16 } 17 18 func TestUint32TableProbeOneByOne(t *testing.T) { 19 const N = 500 20 table := NewUint32Table(0, 0.9) 21 22 for n := 0; n < 2; n++ { 23 // Do two passes, both should behave the same. 24 for i := 1; i <= N; i++ { 25 k := [1]uint32{} 26 v := [1]int32{} 27 28 k[0] = uint32(i) 29 table.Probe(k[:], v[:]) 30 31 if v[0] != int32(i-1) { 32 t.Errorf("wrong value probed for key=%d: want=%d got=%d", i, i-1, v[0]) 33 } 34 } 35 } 36 } 37 38 func TestUint32TableProbeBulk(t *testing.T) { 39 const N = 999 40 table := NewUint32Table(0, 0.9) 41 42 k := make([]uint32, N) 43 v := make([]int32, N) 44 45 for i := range k { 46 k[i] = uint32(i) 47 } 48 49 for n := 0; n < 2; n++ { 50 table.Probe(k, v) 51 52 for i := range v { 53 if v[i] != int32(i) { 54 t.Errorf("wrong value probed for key=%d: want=%d got=%d", k[i], i, v[i]) 55 } 56 } 57 58 if t.Failed() { 59 break 60 } 61 62 for i := range v { 63 v[i] = 0 64 } 65 } 66 } 67 68 func TestTable64GroupSize(t *testing.T) { 69 if n := unsafe.Sizeof(table64Group{}); n != 64 { 70 t.Errorf("size of 64 bit table group is not 64 bytes: %d", n) 71 } 72 } 73 74 func TestUint64TableProbeOneByOne(t *testing.T) { 75 const N = 500 76 table := NewUint64Table(0, 0.9) 77 78 for n := 0; n < 2; n++ { 79 // Do two passes, both should behave the same. 80 for i := 1; i <= N; i++ { 81 k := [1]uint64{} 82 v := [1]int32{} 83 84 k[0] = uint64(i) 85 table.Probe(k[:], v[:]) 86 87 if v[0] != int32(i-1) { 88 t.Errorf("wrong value probed for key=%d: want=%d got=%d", i, i-1, v[0]) 89 } 90 } 91 } 92 } 93 94 func TestUint64TableProbeBulk(t *testing.T) { 95 const N = 999 96 table := NewUint64Table(0, 0.9) 97 98 k := make([]uint64, N) 99 v := make([]int32, N) 100 101 for i := range k { 102 k[i] = uint64(i) 103 } 104 105 for n := 0; n < 2; n++ { 106 table.Probe(k, v) 107 108 for i := range v { 109 if v[i] != int32(i) { 110 t.Errorf("wrong value probed for key=%d: want=%d got=%d", k[i], i, v[i]) 111 } 112 } 113 114 if t.Failed() { 115 break 116 } 117 118 for i := range v { 119 v[i] = 0 120 } 121 } 122 } 123 124 func TestUint128TableProbeOneByOne(t *testing.T) { 125 const N = 500 126 table := NewUint128Table(0, 0.9) 127 128 for n := 0; n < 2; n++ { 129 // Do two passes, both should behave the same. 130 for i := 1; i <= N; i++ { 131 k := [1][16]byte{} 132 v := [1]int32{} 133 134 binary.LittleEndian.PutUint64(k[0][:8], uint64(i)) 135 table.Probe(k[:], v[:]) 136 137 if v[0] != int32(i-1) { 138 t.Errorf("wrong value probed for key=%x: want=%d got=%d", i, i-1, v[0]) 139 } 140 } 141 } 142 } 143 144 func TestUint128TableProbeBulk(t *testing.T) { 145 const N = 999 146 table := NewUint128Table(0, 0.9) 147 148 k := make([][16]byte, N) 149 v := make([]int32, N) 150 151 for i := range k { 152 binary.LittleEndian.PutUint64(k[i][:8], uint64(i)) 153 } 154 155 for n := 0; n < 2; n++ { 156 table.Probe(k, v) 157 158 for i := range v { 159 if v[i] != int32(i) { 160 t.Errorf("wrong value probed for key=%x: want=%d got=%d", k[i], i, v[i]) 161 } 162 } 163 164 if t.Failed() { 165 break 166 } 167 168 for i := range v { 169 v[i] = 0 170 } 171 } 172 } 173 174 const ( 175 benchmarkProbesPerLoop = 500 176 benchmarkMaxLoad = 0.9 177 ) 178 179 type uint32Table interface { 180 Reset() 181 Len() int 182 Probe([]uint32, []int32) int 183 } 184 185 type uint32Map map[uint32]int32 186 187 func (m uint32Map) Reset() { 188 for k := range m { 189 delete(m, k) 190 } 191 } 192 193 func (m uint32Map) Len() int { 194 return len(m) 195 } 196 197 func (m uint32Map) Probe(keys []uint32, values []int32) (n int) { 198 _ = values[:len(keys)] 199 200 for i, k := range keys { 201 v, ok := m[k] 202 if !ok { 203 v = int32(len(m)) 204 m[k] = v 205 n++ 206 } 207 values[i] = v 208 } 209 210 return n 211 } 212 213 func BenchmarkUint32Table(b *testing.B) { 214 benchmarkUint32Table(b, func(size int) uint32Table { return NewUint32Table(size, benchmarkMaxLoad) }) 215 } 216 217 func BenchmarkGoUint32Map(b *testing.B) { 218 benchmarkUint32Table(b, func(size int) uint32Table { return make(uint32Map, size) }) 219 } 220 221 func benchmarkUint32Table(b *testing.B, newTable func(size int) uint32Table) { 222 for n := 100; n <= 1e6; n *= 10 { 223 table := newTable(0) 224 keys, values := generateUint32Table(n) 225 226 b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) { 227 benchmarkUint32Loop(b, table.Probe, keys, values) 228 }) 229 } 230 } 231 232 func benchmarkUint32Loop(b *testing.B, f func([]uint32, []int32) int, keys []uint32, values []int32) { 233 i := 0 234 j := benchmarkProbesPerLoop 235 b.SetBytes(4 * int64(benchmarkProbesPerLoop)) 236 237 _ = keys[:len(values)] 238 _ = values[:len(keys)] 239 start := time.Now() 240 241 for k := 0; k < b.N; k++ { 242 if j > len(keys) { 243 j = len(keys) 244 } 245 f(keys[i:j:j], values[i:j:j]) 246 if j == len(keys) { 247 i, j = 0, benchmarkProbesPerLoop 248 } else { 249 i, j = j, j+benchmarkProbesPerLoop 250 } 251 } 252 253 seconds := time.Since(start).Seconds() 254 b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s") 255 } 256 257 func generateUint32Table(n int) ([]uint32, []int32) { 258 prng := rand.New(rand.NewSource(int64(n))) 259 keys := make([]uint32, n) 260 values := make([]int32, n) 261 262 for i := range keys { 263 keys[i] = prng.Uint32() 264 } 265 266 return keys, values 267 } 268 269 type uint64Table interface { 270 Reset() 271 Len() int 272 Probe([]uint64, []int32) int 273 } 274 275 type uint64Map map[uint64]int32 276 277 func (m uint64Map) Reset() { 278 for k := range m { 279 delete(m, k) 280 } 281 } 282 283 func (m uint64Map) Len() int { 284 return len(m) 285 } 286 287 func (m uint64Map) Probe(keys []uint64, values []int32) (n int) { 288 _ = values[:len(keys)] 289 290 for i, k := range keys { 291 v, ok := m[k] 292 if !ok { 293 v = int32(len(m)) 294 m[k] = v 295 n++ 296 } 297 values[i] = v 298 } 299 300 return n 301 } 302 303 func BenchmarkUint64Table(b *testing.B) { 304 benchmarkUint64Table(b, func(size int) uint64Table { return NewUint64Table(size, benchmarkMaxLoad) }) 305 } 306 307 func BenchmarkGoUint64Map(b *testing.B) { 308 benchmarkUint64Table(b, func(size int) uint64Table { return make(uint64Map, size) }) 309 } 310 311 func benchmarkUint64Table(b *testing.B, newTable func(size int) uint64Table) { 312 for n := 100; n <= 1e6; n *= 10 { 313 table := newTable(0) 314 keys, values := generateUint64Table(n) 315 316 b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) { 317 benchmarkUint64Loop(b, table.Probe, keys, values) 318 }) 319 } 320 } 321 322 func benchmarkUint64Loop(b *testing.B, f func([]uint64, []int32) int, keys []uint64, values []int32) { 323 i := 0 324 j := benchmarkProbesPerLoop 325 b.SetBytes(8 * int64(benchmarkProbesPerLoop)) 326 327 _ = keys[:len(values)] 328 _ = values[:len(keys)] 329 start := time.Now() 330 331 for k := 0; k < b.N; k++ { 332 if j > len(keys) { 333 j = len(keys) 334 } 335 f(keys[i:j:j], values[i:j:j]) 336 if j == len(keys) { 337 i, j = 0, benchmarkProbesPerLoop 338 } else { 339 i, j = j, j+benchmarkProbesPerLoop 340 } 341 } 342 343 seconds := time.Since(start).Seconds() 344 b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s") 345 } 346 347 func generateUint64Table(n int) ([]uint64, []int32) { 348 prng := rand.New(rand.NewSource(int64(n))) 349 keys := make([]uint64, n) 350 values := make([]int32, n) 351 352 for i := range keys { 353 keys[i] = prng.Uint64() 354 } 355 356 return keys, values 357 } 358 359 type uint128Table interface { 360 Reset() 361 Len() int 362 Probe([][16]byte, []int32) int 363 } 364 365 type uint128Map map[[16]byte]int32 366 367 func (m uint128Map) Reset() { 368 for k := range m { 369 delete(m, k) 370 } 371 } 372 373 func (m uint128Map) Len() int { 374 return len(m) 375 } 376 377 func (m uint128Map) Probe(keys [][16]byte, values []int32) (n int) { 378 _ = values[:len(keys)] 379 380 for i, k := range keys { 381 v, ok := m[k] 382 if !ok { 383 v = int32(len(m)) 384 m[k] = v 385 n++ 386 } 387 values[i] = v 388 } 389 390 return n 391 } 392 393 func BenchmarkUint128Table(b *testing.B) { 394 benchmarkUint128Table(b, func(size int) uint128Table { return NewUint128Table(size, benchmarkMaxLoad) }) 395 } 396 397 func BenchmarkGoUint128Map(b *testing.B) { 398 benchmarkUint128Table(b, func(size int) uint128Table { return make(uint128Map, size) }) 399 } 400 401 func benchmarkUint128Table(b *testing.B, newTable func(size int) uint128Table) { 402 for n := 100; n <= 1e6; n *= 10 { 403 table := newTable(0) 404 keys, values := generateUint128Table(n) 405 406 b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) { 407 benchmarkUint128Loop(b, table.Probe, keys, values) 408 }) 409 } 410 } 411 412 func benchmarkUint128Loop(b *testing.B, f func([][16]byte, []int32) int, keys [][16]byte, values []int32) { 413 i := 0 414 j := benchmarkProbesPerLoop 415 b.SetBytes(16 * int64(benchmarkProbesPerLoop)) 416 417 _ = keys[:len(values)] 418 _ = values[:len(keys)] 419 start := time.Now() 420 421 for k := 0; k < b.N; k++ { 422 if j > len(keys) { 423 j = len(keys) 424 } 425 f(keys[i:j:j], values[i:j:j]) 426 if j == len(keys) { 427 i, j = 0, benchmarkProbesPerLoop 428 } else { 429 i, j = j, j+benchmarkProbesPerLoop 430 } 431 } 432 433 seconds := time.Since(start).Seconds() 434 b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s") 435 } 436 437 func generateUint128Table(n int) ([][16]byte, []int32) { 438 prng := rand.New(rand.NewSource(int64(n))) 439 keys := make([][16]byte, n) 440 values := make([]int32, n) 441 442 for i := range keys { 443 prng.Read(keys[i][:]) 444 } 445 446 return keys, values 447 }