github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/hashprobe/hashprobe_test.go (about)

     1  package hashprobe
     2  
     3  import (
     4  	"encoding/binary"
     5  	"fmt"
     6  	"math/rand"
     7  	"testing"
     8  	"time"
     9  	"unsafe"
    10  )
    11  
    12  func TestTable32GroupSize(t *testing.T) {
    13  	if n := unsafe.Sizeof(table32Group{}); n != 64 {
    14  		t.Errorf("size of 32 bit table group is not 64 bytes: %d", n)
    15  	}
    16  }
    17  
    18  func TestUint32TableProbeOneByOne(t *testing.T) {
    19  	const N = 500
    20  	table := NewUint32Table(0, 0.9)
    21  
    22  	for n := 0; n < 2; n++ {
    23  		// Do two passes, both should behave the same.
    24  		for i := 1; i <= N; i++ {
    25  			k := [1]uint32{}
    26  			v := [1]int32{}
    27  
    28  			k[0] = uint32(i)
    29  			table.Probe(k[:], v[:])
    30  
    31  			if v[0] != int32(i-1) {
    32  				t.Errorf("wrong value probed for key=%d: want=%d got=%d", i, i-1, v[0])
    33  			}
    34  		}
    35  	}
    36  }
    37  
    38  func TestUint32TableProbeBulk(t *testing.T) {
    39  	const N = 999
    40  	table := NewUint32Table(0, 0.9)
    41  
    42  	k := make([]uint32, N)
    43  	v := make([]int32, N)
    44  
    45  	for i := range k {
    46  		k[i] = uint32(i)
    47  	}
    48  
    49  	for n := 0; n < 2; n++ {
    50  		table.Probe(k, v)
    51  
    52  		for i := range v {
    53  			if v[i] != int32(i) {
    54  				t.Errorf("wrong value probed for key=%d: want=%d got=%d", k[i], i, v[i])
    55  			}
    56  		}
    57  
    58  		if t.Failed() {
    59  			break
    60  		}
    61  
    62  		for i := range v {
    63  			v[i] = 0
    64  		}
    65  	}
    66  }
    67  
    68  func TestTable64GroupSize(t *testing.T) {
    69  	if n := unsafe.Sizeof(table64Group{}); n != 64 {
    70  		t.Errorf("size of 64 bit table group is not 64 bytes: %d", n)
    71  	}
    72  }
    73  
    74  func TestUint64TableProbeOneByOne(t *testing.T) {
    75  	const N = 500
    76  	table := NewUint64Table(0, 0.9)
    77  
    78  	for n := 0; n < 2; n++ {
    79  		// Do two passes, both should behave the same.
    80  		for i := 1; i <= N; i++ {
    81  			k := [1]uint64{}
    82  			v := [1]int32{}
    83  
    84  			k[0] = uint64(i)
    85  			table.Probe(k[:], v[:])
    86  
    87  			if v[0] != int32(i-1) {
    88  				t.Errorf("wrong value probed for key=%d: want=%d got=%d", i, i-1, v[0])
    89  			}
    90  		}
    91  	}
    92  }
    93  
    94  func TestUint64TableProbeBulk(t *testing.T) {
    95  	const N = 999
    96  	table := NewUint64Table(0, 0.9)
    97  
    98  	k := make([]uint64, N)
    99  	v := make([]int32, N)
   100  
   101  	for i := range k {
   102  		k[i] = uint64(i)
   103  	}
   104  
   105  	for n := 0; n < 2; n++ {
   106  		table.Probe(k, v)
   107  
   108  		for i := range v {
   109  			if v[i] != int32(i) {
   110  				t.Errorf("wrong value probed for key=%d: want=%d got=%d", k[i], i, v[i])
   111  			}
   112  		}
   113  
   114  		if t.Failed() {
   115  			break
   116  		}
   117  
   118  		for i := range v {
   119  			v[i] = 0
   120  		}
   121  	}
   122  }
   123  
   124  func TestUint128TableProbeOneByOne(t *testing.T) {
   125  	const N = 500
   126  	table := NewUint128Table(0, 0.9)
   127  
   128  	for n := 0; n < 2; n++ {
   129  		// Do two passes, both should behave the same.
   130  		for i := 1; i <= N; i++ {
   131  			k := [1][16]byte{}
   132  			v := [1]int32{}
   133  
   134  			binary.LittleEndian.PutUint64(k[0][:8], uint64(i))
   135  			table.Probe(k[:], v[:])
   136  
   137  			if v[0] != int32(i-1) {
   138  				t.Errorf("wrong value probed for key=%x: want=%d got=%d", i, i-1, v[0])
   139  			}
   140  		}
   141  	}
   142  }
   143  
   144  func TestUint128TableProbeBulk(t *testing.T) {
   145  	const N = 999
   146  	table := NewUint128Table(0, 0.9)
   147  
   148  	k := make([][16]byte, N)
   149  	v := make([]int32, N)
   150  
   151  	for i := range k {
   152  		binary.LittleEndian.PutUint64(k[i][:8], uint64(i))
   153  	}
   154  
   155  	for n := 0; n < 2; n++ {
   156  		table.Probe(k, v)
   157  
   158  		for i := range v {
   159  			if v[i] != int32(i) {
   160  				t.Errorf("wrong value probed for key=%x: want=%d got=%d", k[i], i, v[i])
   161  			}
   162  		}
   163  
   164  		if t.Failed() {
   165  			break
   166  		}
   167  
   168  		for i := range v {
   169  			v[i] = 0
   170  		}
   171  	}
   172  }
   173  
   174  const (
   175  	benchmarkProbesPerLoop = 500
   176  	benchmarkMaxLoad       = 0.9
   177  )
   178  
   179  type uint32Table interface {
   180  	Reset()
   181  	Len() int
   182  	Probe([]uint32, []int32) int
   183  }
   184  
   185  type uint32Map map[uint32]int32
   186  
   187  func (m uint32Map) Reset() {
   188  	for k := range m {
   189  		delete(m, k)
   190  	}
   191  }
   192  
   193  func (m uint32Map) Len() int {
   194  	return len(m)
   195  }
   196  
   197  func (m uint32Map) Probe(keys []uint32, values []int32) (n int) {
   198  	_ = values[:len(keys)]
   199  
   200  	for i, k := range keys {
   201  		v, ok := m[k]
   202  		if !ok {
   203  			v = int32(len(m))
   204  			m[k] = v
   205  			n++
   206  		}
   207  		values[i] = v
   208  	}
   209  
   210  	return n
   211  }
   212  
   213  func BenchmarkUint32Table(b *testing.B) {
   214  	benchmarkUint32Table(b, func(size int) uint32Table { return NewUint32Table(size, benchmarkMaxLoad) })
   215  }
   216  
   217  func BenchmarkGoUint32Map(b *testing.B) {
   218  	benchmarkUint32Table(b, func(size int) uint32Table { return make(uint32Map, size) })
   219  }
   220  
   221  func benchmarkUint32Table(b *testing.B, newTable func(size int) uint32Table) {
   222  	for n := 100; n <= 1e6; n *= 10 {
   223  		table := newTable(0)
   224  		keys, values := generateUint32Table(n)
   225  
   226  		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
   227  			benchmarkUint32Loop(b, table.Probe, keys, values)
   228  		})
   229  	}
   230  }
   231  
   232  func benchmarkUint32Loop(b *testing.B, f func([]uint32, []int32) int, keys []uint32, values []int32) {
   233  	i := 0
   234  	j := benchmarkProbesPerLoop
   235  	b.SetBytes(4 * int64(benchmarkProbesPerLoop))
   236  
   237  	_ = keys[:len(values)]
   238  	_ = values[:len(keys)]
   239  	start := time.Now()
   240  
   241  	for k := 0; k < b.N; k++ {
   242  		if j > len(keys) {
   243  			j = len(keys)
   244  		}
   245  		f(keys[i:j:j], values[i:j:j])
   246  		if j == len(keys) {
   247  			i, j = 0, benchmarkProbesPerLoop
   248  		} else {
   249  			i, j = j, j+benchmarkProbesPerLoop
   250  		}
   251  	}
   252  
   253  	seconds := time.Since(start).Seconds()
   254  	b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s")
   255  }
   256  
   257  func generateUint32Table(n int) ([]uint32, []int32) {
   258  	prng := rand.New(rand.NewSource(int64(n)))
   259  	keys := make([]uint32, n)
   260  	values := make([]int32, n)
   261  
   262  	for i := range keys {
   263  		keys[i] = prng.Uint32()
   264  	}
   265  
   266  	return keys, values
   267  }
   268  
   269  type uint64Table interface {
   270  	Reset()
   271  	Len() int
   272  	Probe([]uint64, []int32) int
   273  }
   274  
   275  type uint64Map map[uint64]int32
   276  
   277  func (m uint64Map) Reset() {
   278  	for k := range m {
   279  		delete(m, k)
   280  	}
   281  }
   282  
   283  func (m uint64Map) Len() int {
   284  	return len(m)
   285  }
   286  
   287  func (m uint64Map) Probe(keys []uint64, values []int32) (n int) {
   288  	_ = values[:len(keys)]
   289  
   290  	for i, k := range keys {
   291  		v, ok := m[k]
   292  		if !ok {
   293  			v = int32(len(m))
   294  			m[k] = v
   295  			n++
   296  		}
   297  		values[i] = v
   298  	}
   299  
   300  	return n
   301  }
   302  
   303  func BenchmarkUint64Table(b *testing.B) {
   304  	benchmarkUint64Table(b, func(size int) uint64Table { return NewUint64Table(size, benchmarkMaxLoad) })
   305  }
   306  
   307  func BenchmarkGoUint64Map(b *testing.B) {
   308  	benchmarkUint64Table(b, func(size int) uint64Table { return make(uint64Map, size) })
   309  }
   310  
   311  func benchmarkUint64Table(b *testing.B, newTable func(size int) uint64Table) {
   312  	for n := 100; n <= 1e6; n *= 10 {
   313  		table := newTable(0)
   314  		keys, values := generateUint64Table(n)
   315  
   316  		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
   317  			benchmarkUint64Loop(b, table.Probe, keys, values)
   318  		})
   319  	}
   320  }
   321  
   322  func benchmarkUint64Loop(b *testing.B, f func([]uint64, []int32) int, keys []uint64, values []int32) {
   323  	i := 0
   324  	j := benchmarkProbesPerLoop
   325  	b.SetBytes(8 * int64(benchmarkProbesPerLoop))
   326  
   327  	_ = keys[:len(values)]
   328  	_ = values[:len(keys)]
   329  	start := time.Now()
   330  
   331  	for k := 0; k < b.N; k++ {
   332  		if j > len(keys) {
   333  			j = len(keys)
   334  		}
   335  		f(keys[i:j:j], values[i:j:j])
   336  		if j == len(keys) {
   337  			i, j = 0, benchmarkProbesPerLoop
   338  		} else {
   339  			i, j = j, j+benchmarkProbesPerLoop
   340  		}
   341  	}
   342  
   343  	seconds := time.Since(start).Seconds()
   344  	b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s")
   345  }
   346  
   347  func generateUint64Table(n int) ([]uint64, []int32) {
   348  	prng := rand.New(rand.NewSource(int64(n)))
   349  	keys := make([]uint64, n)
   350  	values := make([]int32, n)
   351  
   352  	for i := range keys {
   353  		keys[i] = prng.Uint64()
   354  	}
   355  
   356  	return keys, values
   357  }
   358  
   359  type uint128Table interface {
   360  	Reset()
   361  	Len() int
   362  	Probe([][16]byte, []int32) int
   363  }
   364  
   365  type uint128Map map[[16]byte]int32
   366  
   367  func (m uint128Map) Reset() {
   368  	for k := range m {
   369  		delete(m, k)
   370  	}
   371  }
   372  
   373  func (m uint128Map) Len() int {
   374  	return len(m)
   375  }
   376  
   377  func (m uint128Map) Probe(keys [][16]byte, values []int32) (n int) {
   378  	_ = values[:len(keys)]
   379  
   380  	for i, k := range keys {
   381  		v, ok := m[k]
   382  		if !ok {
   383  			v = int32(len(m))
   384  			m[k] = v
   385  			n++
   386  		}
   387  		values[i] = v
   388  	}
   389  
   390  	return n
   391  }
   392  
   393  func BenchmarkUint128Table(b *testing.B) {
   394  	benchmarkUint128Table(b, func(size int) uint128Table { return NewUint128Table(size, benchmarkMaxLoad) })
   395  }
   396  
   397  func BenchmarkGoUint128Map(b *testing.B) {
   398  	benchmarkUint128Table(b, func(size int) uint128Table { return make(uint128Map, size) })
   399  }
   400  
   401  func benchmarkUint128Table(b *testing.B, newTable func(size int) uint128Table) {
   402  	for n := 100; n <= 1e6; n *= 10 {
   403  		table := newTable(0)
   404  		keys, values := generateUint128Table(n)
   405  
   406  		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
   407  			benchmarkUint128Loop(b, table.Probe, keys, values)
   408  		})
   409  	}
   410  }
   411  
   412  func benchmarkUint128Loop(b *testing.B, f func([][16]byte, []int32) int, keys [][16]byte, values []int32) {
   413  	i := 0
   414  	j := benchmarkProbesPerLoop
   415  	b.SetBytes(16 * int64(benchmarkProbesPerLoop))
   416  
   417  	_ = keys[:len(values)]
   418  	_ = values[:len(keys)]
   419  	start := time.Now()
   420  
   421  	for k := 0; k < b.N; k++ {
   422  		if j > len(keys) {
   423  			j = len(keys)
   424  		}
   425  		f(keys[i:j:j], values[i:j:j])
   426  		if j == len(keys) {
   427  			i, j = 0, benchmarkProbesPerLoop
   428  		} else {
   429  			i, j = j, j+benchmarkProbesPerLoop
   430  		}
   431  	}
   432  
   433  	seconds := time.Since(start).Seconds()
   434  	b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s")
   435  }
   436  
   437  func generateUint128Table(n int) ([][16]byte, []int32) {
   438  	prng := rand.New(rand.NewSource(int64(n)))
   439  	keys := make([][16]byte, n)
   440  	values := make([]int32, n)
   441  
   442  	for i := range keys {
   443  		prng.Read(keys[i][:])
   444  	}
   445  
   446  	return keys, values
   447  }