github.com/gopherd/gonum@v0.0.4/stat/card/hll64.go (about) 1 // Code generated by "go generate github.com/gopherd/gonum/stat/card"; DO NOT EDIT. 2 3 // Copyright ©2019 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package card 8 9 import ( 10 "bytes" 11 "encoding/gob" 12 "errors" 13 "fmt" 14 "hash" 15 "math" 16 "math/bits" 17 "reflect" 18 ) 19 20 // HyperLogLog64 is implements cardinality estimation according to the 21 // HyperLogLog algorithm described in Analysis of Algorithms, pp127–146. 22 type HyperLogLog64 struct { 23 p uint8 24 m uint64 25 26 hash hash.Hash64 27 28 register []uint8 29 } 30 31 // NewHyperLogLog64 returns a new HyperLogLog64 sketch. The value of prec 32 // must be in the range [4, 64]. NewHyperLogLog64 will allocate a byte slice 33 // that is 2^prec long. 34 func NewHyperLogLog64(prec int, h hash.Hash64) (*HyperLogLog64, error) { 35 // The implementation here is based on the pseudo-code in 36 // "HyperLogLog: the analysis of a near-optimal cardinality 37 // estimation algorithm", figure 3. 38 39 if prec < 4 || w64 < prec { 40 return nil, errors.New("card: precision out of range") 41 } 42 p := uint8(prec) 43 m := uint64(1) << p 44 return &HyperLogLog64{ 45 p: p, m: m, 46 hash: h, 47 register: make([]byte, m), 48 }, nil 49 } 50 51 // Write notes the data in b as a single observation into the sketch held by 52 // the receiver. 53 // 54 // Write satisfies the io.Writer interface. If the hash.Hash64 type passed to 55 // NewHyperLogLog64 or SetHash satisfies the hash.Hash contract, Write will always 56 // return a nil error. 57 func (h *HyperLogLog64) Write(b []byte) (int, error) { 58 n, err := h.hash.Write(b) 59 x := h.hash.Sum64() 60 h.hash.Reset() 61 q := w64 - h.p 62 idx := x >> q 63 r := rho64q(x, q) 64 if r > h.register[idx] { 65 h.register[idx] = r 66 } 67 return n, err 68 } 69 70 // Union places the union of the sketches in a and b into the receiver. 71 // Union will return an error if the precisions or hash functions of a 72 // and b do not match or if the receiver has a hash function that is set 73 // and does not match those of a and b. Hash functions provided by hash.Hash64 74 // implementations x and y match when reflect.TypeOf(x) == reflect.TypeOf(y). 75 // 76 // If the receiver does not have a set hash function, it can be set after 77 // a call to Union with the SetHash method. 78 func (h *HyperLogLog64) Union(a, b *HyperLogLog64) error { 79 if a.p != b.p { 80 return errors.New("card: mismatched precision") 81 } 82 ta := reflect.TypeOf(b.hash) 83 if reflect.TypeOf(b.hash) != ta { 84 return errors.New("card: mismatched hash function") 85 } 86 if h.hash != nil && reflect.TypeOf(h.hash) != ta { 87 return errors.New("card: mismatched hash function") 88 } 89 90 if h != a && h != b { 91 *h = HyperLogLog64{p: a.p, m: a.m, hash: h.hash, register: make([]uint8, a.m)} 92 } 93 for i, r := range a.register { 94 h.register[i] = max(r, b.register[i]) 95 } 96 return nil 97 } 98 99 // SetHash sets the hash function of the receiver if it is nil. SetHash 100 // will return an error if it is called on a receiver with a non-nil 101 // hash function. 102 func (h *HyperLogLog64) SetHash(fn hash.Hash64) error { 103 if h.hash == nil { 104 return errors.New("card: hash function already set") 105 } 106 h.hash = fn 107 return nil 108 } 109 110 // Count returns an estimate of the cardinality of the set of items written 111 // the receiver. 112 func (h *HyperLogLog64) Count() float64 { 113 var s float64 114 for _, v := range h.register { 115 s += 1 / float64(uint64(1)<<v) 116 } 117 m := float64(h.m) 118 e := alpha(uint64(h.m)) * m * m / s 119 if e <= 5*m/2 { 120 var v int 121 for _, r := range h.register { 122 if r == 0 { 123 v++ 124 } 125 } 126 if v != 0 { 127 return linearCounting(m, float64(v)) 128 } 129 return e 130 } 131 if e <= (1<<w64)/30.0 { 132 return e 133 } 134 return -(1 << w64) * math.Log1p(-e/(1<<w64)) 135 } 136 137 // rho64q (ϱ) is the number of leading zeros in q-wide low bits of x, plus 1. 138 func rho64q(x uint64, q uint8) uint8 { 139 return min(uint8(bits.LeadingZeros64(x<<(w64-q))), q) + 1 140 } 141 142 // Reset clears the receiver's registers allowing it to be reused. 143 // Reset does not alter the precision of the receiver or the hash 144 // function that is used. 145 func (h *HyperLogLog64) Reset() { 146 for i := range h.register { 147 h.register[i] = 0 148 } 149 } 150 151 // MarshalBinary marshals the sketch in the receiver. It encodes the 152 // name of the hash function, the precision of the sketch and the 153 // sketch data. The receiver must have a non-nil hash function. 154 func (h *HyperLogLog64) MarshalBinary() ([]byte, error) { 155 if h.hash == nil { 156 return nil, errors.New("card: hash function not set") 157 } 158 var buf bytes.Buffer 159 enc := gob.NewEncoder(&buf) 160 err := enc.Encode(uint8(w64)) 161 if err != nil { 162 return nil, err 163 } 164 err = enc.Encode(typeNameOf(h.hash)) 165 if err != nil { 166 return nil, err 167 } 168 err = enc.Encode(h.p) 169 if err != nil { 170 return nil, err 171 } 172 err = enc.Encode(h.register) 173 if err != nil { 174 return nil, err 175 } 176 return buf.Bytes(), nil 177 } 178 179 // UnmarshalBinary unmarshals the binary representation of a sketch 180 // into the receiver. The precision of the receiver will be set after 181 // return. The receiver must have a non-nil hash function value that is 182 // the same type as the one that was stored in the binary data. 183 func (h *HyperLogLog64) UnmarshalBinary(b []byte) error { 184 dec := gob.NewDecoder(bytes.NewReader(b)) 185 var size uint8 186 err := dec.Decode(&size) 187 if err != nil { 188 return err 189 } 190 if size != w64 { 191 return fmt.Errorf("card: mismatched hash function size: dst=%d src=%d", w64, size) 192 } 193 var srcHash string 194 err = dec.Decode(&srcHash) 195 if err != nil { 196 return err 197 } 198 if h.hash == nil { 199 h.hash = hash64For(srcHash) 200 if h.hash == nil { 201 return fmt.Errorf("card: hash function not set and no hash registered for %q", srcHash) 202 } 203 } else { 204 dstHash := typeNameOf(h.hash) 205 if dstHash != srcHash { 206 return fmt.Errorf("card: mismatched hash function: dst=%s src=%s", dstHash, srcHash) 207 } 208 } 209 err = dec.Decode(&h.p) 210 if err != nil { 211 return err 212 } 213 h.m = uint64(1) << h.p 214 h.register = h.register[:0] 215 err = dec.Decode(&h.register) 216 if err != nil { 217 return err 218 } 219 return nil 220 } 221 222 func hash64For(name string) hash.Hash64 { 223 fn, ok := hashes.Load(name) 224 if !ok { 225 return nil 226 } 227 h, _ := fn.(userType).fn.Call(nil)[0].Interface().(hash.Hash64) 228 return h 229 }