gonum.org/v1/gonum@v0.14.0/stat/card/hll64.go (about)

     1  // Code generated by "go generate gonum.org/v1/gonum/stat/card"; DO NOT EDIT.
     2  
     3  // Copyright ©2019 The Gonum Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package card
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/gob"
    12  	"errors"
    13  	"fmt"
    14  	"hash"
    15  	"math"
    16  	"math/bits"
    17  	"reflect"
    18  )
    19  
    20  // HyperLogLog64 is implements cardinality estimation according to the
    21  // HyperLogLog algorithm described in Analysis of Algorithms, pp127–146.
    22  type HyperLogLog64 struct {
    23  	p uint8
    24  	m uint64
    25  
    26  	hash hash.Hash64
    27  
    28  	register []uint8
    29  }
    30  
    31  // NewHyperLogLog64 returns a new HyperLogLog64 sketch. The value of prec
    32  // must be in the range [4, 64]. NewHyperLogLog64 will allocate a byte slice
    33  // that is 2^prec long.
    34  func NewHyperLogLog64(prec int, h hash.Hash64) (*HyperLogLog64, error) {
    35  	// The implementation here is based on the pseudo-code in
    36  	// "HyperLogLog: the analysis of a near-optimal cardinality
    37  	// estimation algorithm", figure 3.
    38  
    39  	if prec < 4 || w64 < prec {
    40  		return nil, errors.New("card: precision out of range")
    41  	}
    42  	p := uint8(prec)
    43  	m := uint64(1) << p
    44  	return &HyperLogLog64{
    45  		p: p, m: m,
    46  		hash:     h,
    47  		register: make([]byte, m),
    48  	}, nil
    49  }
    50  
    51  // Write notes the data in b as a single observation into the sketch held by
    52  // the receiver.
    53  //
    54  // Write satisfies the io.Writer interface. If the hash.Hash64 type passed to
    55  // NewHyperLogLog64 or SetHash satisfies the hash.Hash contract, Write will always
    56  // return a nil error.
    57  func (h *HyperLogLog64) Write(b []byte) (int, error) {
    58  	n, err := h.hash.Write(b)
    59  	x := h.hash.Sum64()
    60  	h.hash.Reset()
    61  	q := w64 - h.p
    62  	idx := x >> q
    63  	r := rho64q(x, q)
    64  	if r > h.register[idx] {
    65  		h.register[idx] = r
    66  	}
    67  	return n, err
    68  }
    69  
    70  // Union places the union of the sketches in a and b into the receiver.
    71  // Union will return an error if the precisions or hash functions of a
    72  // and b do not match or if the receiver has a hash function that is set
    73  // and does not match those of a and b. Hash functions provided by hash.Hash64
    74  // implementations x and y match when reflect.TypeOf(x) == reflect.TypeOf(y).
    75  //
    76  // If the receiver does not have a set hash function, it can be set after
    77  // a call to Union with the SetHash method.
    78  func (h *HyperLogLog64) Union(a, b *HyperLogLog64) error {
    79  	if a.p != b.p {
    80  		return errors.New("card: mismatched precision")
    81  	}
    82  	ta := reflect.TypeOf(b.hash)
    83  	if reflect.TypeOf(b.hash) != ta {
    84  		return errors.New("card: mismatched hash function")
    85  	}
    86  	if h.hash != nil && reflect.TypeOf(h.hash) != ta {
    87  		return errors.New("card: mismatched hash function")
    88  	}
    89  
    90  	if h != a && h != b {
    91  		*h = HyperLogLog64{p: a.p, m: a.m, hash: h.hash, register: make([]uint8, a.m)}
    92  	}
    93  	for i, r := range a.register {
    94  		h.register[i] = max(r, b.register[i])
    95  	}
    96  	return nil
    97  }
    98  
    99  // SetHash sets the hash function of the receiver if it is nil. SetHash
   100  // will return an error if it is called on a receiver with a non-nil
   101  // hash function.
   102  func (h *HyperLogLog64) SetHash(fn hash.Hash64) error {
   103  	if h.hash == nil {
   104  		return errors.New("card: hash function already set")
   105  	}
   106  	h.hash = fn
   107  	return nil
   108  }
   109  
   110  // Count returns an estimate of the cardinality of the set of items written
   111  // the receiver.
   112  func (h *HyperLogLog64) Count() float64 {
   113  	var s float64
   114  	for _, v := range h.register {
   115  		s += 1 / float64(uint64(1)<<v)
   116  	}
   117  	m := float64(h.m)
   118  	e := alpha(uint64(h.m)) * m * m / s
   119  	if e <= 5*m/2 {
   120  		var v int
   121  		for _, r := range h.register {
   122  			if r == 0 {
   123  				v++
   124  			}
   125  		}
   126  		if v != 0 {
   127  			return linearCounting(m, float64(v))
   128  		}
   129  		return e
   130  	}
   131  	if e <= (1<<w64)/30.0 {
   132  		return e
   133  	}
   134  	return -(1 << w64) * math.Log1p(-e/(1<<w64))
   135  }
   136  
   137  // rho64q (ϱ) is the number of leading zeros in q-wide low bits of x, plus 1.
   138  func rho64q(x uint64, q uint8) uint8 {
   139  	return min(uint8(bits.LeadingZeros64(x<<(w64-q))), q) + 1
   140  }
   141  
   142  // Reset clears the receiver's registers allowing it to be reused.
   143  // Reset does not alter the precision of the receiver or the hash
   144  // function that is used.
   145  func (h *HyperLogLog64) Reset() {
   146  	for i := range h.register {
   147  		h.register[i] = 0
   148  	}
   149  }
   150  
   151  // MarshalBinary marshals the sketch in the receiver. It encodes the
   152  // name of the hash function, the precision of the sketch and the
   153  // sketch data. The receiver must have a non-nil hash function.
   154  func (h *HyperLogLog64) MarshalBinary() ([]byte, error) {
   155  	if h.hash == nil {
   156  		return nil, errors.New("card: hash function not set")
   157  	}
   158  	var buf bytes.Buffer
   159  	enc := gob.NewEncoder(&buf)
   160  	err := enc.Encode(uint8(w64))
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  	err = enc.Encode(typeNameOf(h.hash))
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	err = enc.Encode(h.p)
   169  	if err != nil {
   170  		return nil, err
   171  	}
   172  	err = enc.Encode(h.register)
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  	return buf.Bytes(), nil
   177  }
   178  
   179  // UnmarshalBinary unmarshals the binary representation of a sketch
   180  // into the receiver. The precision of the receiver will be set after
   181  // return. The receiver must have a non-nil hash function value that is
   182  // the same type as the one that was stored in the binary data.
   183  func (h *HyperLogLog64) UnmarshalBinary(b []byte) error {
   184  	dec := gob.NewDecoder(bytes.NewReader(b))
   185  	var size uint8
   186  	err := dec.Decode(&size)
   187  	if err != nil {
   188  		return err
   189  	}
   190  	if size != w64 {
   191  		return fmt.Errorf("card: mismatched hash function size: dst=%d src=%d", w64, size)
   192  	}
   193  	var srcHash string
   194  	err = dec.Decode(&srcHash)
   195  	if err != nil {
   196  		return err
   197  	}
   198  	if h.hash == nil {
   199  		h.hash = hash64For(srcHash)
   200  		if h.hash == nil {
   201  			return fmt.Errorf("card: hash function not set and no hash registered for %q", srcHash)
   202  		}
   203  	} else {
   204  		dstHash := typeNameOf(h.hash)
   205  		if dstHash != srcHash {
   206  			return fmt.Errorf("card: mismatched hash function: dst=%s src=%s", dstHash, srcHash)
   207  		}
   208  	}
   209  	err = dec.Decode(&h.p)
   210  	if err != nil {
   211  		return err
   212  	}
   213  	h.m = uint64(1) << h.p
   214  	h.register = h.register[:0]
   215  	err = dec.Decode(&h.register)
   216  	if err != nil {
   217  		return err
   218  	}
   219  	return nil
   220  }
   221  
   222  func hash64For(name string) hash.Hash64 {
   223  	fn, ok := hashes.Load(name)
   224  	if !ok {
   225  		return nil
   226  	}
   227  	h, _ := fn.(userType).fn.Call(nil)[0].Interface().(hash.Hash64)
   228  	return h
   229  }