github.com/fluhus/gostuff@v0.4.1-0.20240331134726-be71864f2b5d/hll/hll2.go (about) 1 package hll 2 3 import ( 4 "fmt" 5 "math" 6 ) 7 8 // An HLL2 is a HyperLogLog counter for arbitrary values. 9 // 10 // Deprecated: use the hll/v2 package. 11 type HLL2[T any] struct { 12 counters []byte 13 h func(T) uint64 14 nbits int 15 m int 16 mask uint64 17 } 18 19 // New2 creates a new HyperLogLog counter. 20 // The counter will use 2^logSize bytes. 21 // h is the hash function to use for added values. 22 func New2[T any](logSize int, h func(T) uint64) *HLL2[T] { 23 if logSize < 4 { 24 panic(fmt.Sprintf("logSize=%v, should be at least 4", logSize)) 25 } 26 m := 1 << logSize 27 return &HLL2[T]{ 28 counters: make([]byte, m), 29 h: h, 30 nbits: logSize, 31 m: m, 32 mask: uint64(m - 1), 33 } 34 } 35 36 // Add adds v to the counter. Calls hash once. 37 func (h *HLL2[T]) Add(t T) { 38 hash := h.h(t) 39 idx := hash & h.mask 40 fp := hash >> h.nbits 41 z := byte(h.nzeros(fp)) + 1 42 if z > h.counters[idx] { 43 h.counters[idx] = z 44 } 45 } 46 47 // ApproxCount returns the current approximate count. 48 // Does not alter the state of the counter. 49 func (h *HLL2[T]) ApproxCount() int { 50 z := 0.0 51 for _, v := range h.counters { 52 z += math.Pow(2, -float64(v)) 53 } 54 z = 1.0 / z 55 fm := float64(h.m) 56 result := int(h.alpha() * fm * fm * z) 57 58 if result < h.m*5/2 { 59 zeros := 0 60 for _, v := range h.counters { 61 if v == 0 { 62 zeros++ 63 } 64 } 65 // If some registers are zero, use linear counting. 66 if zeros > 0 { 67 result = int(fm * math.Log(fm/float64(zeros))) 68 } 69 } 70 71 return result 72 } 73 74 // Returns the alpha value to use depending on m. 75 func (h *HLL2[T]) alpha() float64 { 76 switch h.m { 77 case 16: 78 return 0.673 79 case 32: 80 return 0.697 81 case 64: 82 return 0.709 83 } 84 return 0.7213 / (1 + 1.079/float64(h.m)) 85 } 86 87 // nzeros counts the number of zeros on the right side of a binary number. 88 func (h *HLL2[T]) nzeros(a uint64) int { 89 if a == 0 { 90 return 64 - h.nbits // Number of bits after using the first nbits. 91 } 92 n := 0 93 for a&1 == 0 { 94 n++ 95 a /= 2 96 } 97 return n 98 } 99 100 // AddHLL adds the state of another counter to h, 101 // assuming they use the same hash function. 102 // The result is equivalent to adding all the values of other to h. 103 func (h *HLL2[T]) AddHLL(other *HLL2[T]) { 104 if len(h.counters) != len(other.counters) { 105 panic("merging HLLs with different sizes") 106 } 107 for i, b := range other.counters { 108 if h.counters[i] < b { 109 h.counters[i] = b 110 } 111 } 112 }