github.com/fluhus/gostuff@v0.4.1-0.20240331134726-be71864f2b5d/hll/hll2.go (about)

     1  package hll
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  )
     7  
     8  // An HLL2 is a HyperLogLog counter for arbitrary values.
     9  //
    10  // Deprecated: use the hll/v2 package.
    11  type HLL2[T any] struct {
    12  	counters []byte
    13  	h        func(T) uint64
    14  	nbits    int
    15  	m        int
    16  	mask     uint64
    17  }
    18  
    19  // New2 creates a new HyperLogLog counter.
    20  // The counter will use 2^logSize bytes.
    21  // h is the hash function to use for added values.
    22  func New2[T any](logSize int, h func(T) uint64) *HLL2[T] {
    23  	if logSize < 4 {
    24  		panic(fmt.Sprintf("logSize=%v, should be at least 4", logSize))
    25  	}
    26  	m := 1 << logSize
    27  	return &HLL2[T]{
    28  		counters: make([]byte, m),
    29  		h:        h,
    30  		nbits:    logSize,
    31  		m:        m,
    32  		mask:     uint64(m - 1),
    33  	}
    34  }
    35  
    36  // Add adds v to the counter. Calls hash once.
    37  func (h *HLL2[T]) Add(t T) {
    38  	hash := h.h(t)
    39  	idx := hash & h.mask
    40  	fp := hash >> h.nbits
    41  	z := byte(h.nzeros(fp)) + 1
    42  	if z > h.counters[idx] {
    43  		h.counters[idx] = z
    44  	}
    45  }
    46  
    47  // ApproxCount returns the current approximate count.
    48  // Does not alter the state of the counter.
    49  func (h *HLL2[T]) ApproxCount() int {
    50  	z := 0.0
    51  	for _, v := range h.counters {
    52  		z += math.Pow(2, -float64(v))
    53  	}
    54  	z = 1.0 / z
    55  	fm := float64(h.m)
    56  	result := int(h.alpha() * fm * fm * z)
    57  
    58  	if result < h.m*5/2 {
    59  		zeros := 0
    60  		for _, v := range h.counters {
    61  			if v == 0 {
    62  				zeros++
    63  			}
    64  		}
    65  		// If some registers are zero, use linear counting.
    66  		if zeros > 0 {
    67  			result = int(fm * math.Log(fm/float64(zeros)))
    68  		}
    69  	}
    70  
    71  	return result
    72  }
    73  
    74  // Returns the alpha value to use depending on m.
    75  func (h *HLL2[T]) alpha() float64 {
    76  	switch h.m {
    77  	case 16:
    78  		return 0.673
    79  	case 32:
    80  		return 0.697
    81  	case 64:
    82  		return 0.709
    83  	}
    84  	return 0.7213 / (1 + 1.079/float64(h.m))
    85  }
    86  
    87  // nzeros counts the number of zeros on the right side of a binary number.
    88  func (h *HLL2[T]) nzeros(a uint64) int {
    89  	if a == 0 {
    90  		return 64 - h.nbits // Number of bits after using the first nbits.
    91  	}
    92  	n := 0
    93  	for a&1 == 0 {
    94  		n++
    95  		a /= 2
    96  	}
    97  	return n
    98  }
    99  
   100  // AddHLL adds the state of another counter to h,
   101  // assuming they use the same hash function.
   102  // The result is equivalent to adding all the values of other to h.
   103  func (h *HLL2[T]) AddHLL(other *HLL2[T]) {
   104  	if len(h.counters) != len(other.counters) {
   105  		panic("merging HLLs with different sizes")
   106  	}
   107  	for i, b := range other.counters {
   108  		if h.counters[i] < b {
   109  			h.counters[i] = b
   110  		}
   111  	}
   112  }