github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/milevadb-server/statistics/fmsketch.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package statistics 15 16 import ( 17 "hash" 18 19 "github.com/whtcorpsinc/errors" 20 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 21 "github.com/whtcorpsinc/milevadb/types" 22 "github.com/whtcorpsinc/milevadb/soliton/codec" 23 "github.com/whtcorpsinc/fidelpb/go-fidelpb" 24 "github.com/twmb/murmur3" 25 ) 26 27 // FMSketch is used to count the number of distinct elements in a set. 28 type FMSketch struct { 29 hashset map[uint64]bool 30 mask uint64 31 maxSize int 32 hashFunc hash.Hash64 33 } 34 35 // NewFMSketch returns a new FM sketch. 36 func NewFMSketch(maxSize int) *FMSketch { 37 return &FMSketch{ 38 hashset: make(map[uint64]bool), 39 maxSize: maxSize, 40 hashFunc: murmur3.New64(), 41 } 42 } 43 44 // NDV returns the ndv of the sketch. 45 func (s *FMSketch) NDV() int64 { 46 return int64(s.mask+1) * int64(len(s.hashset)) 47 } 48 49 func (s *FMSketch) insertHashValue(hashVal uint64) { 50 if (hashVal & s.mask) != 0 { 51 return 52 } 53 s.hashset[hashVal] = true 54 if len(s.hashset) > s.maxSize { 55 s.mask = s.mask*2 + 1 56 for key := range s.hashset { 57 if (key & s.mask) != 0 { 58 delete(s.hashset, key) 59 } 60 } 61 } 62 } 63 64 // InsertValue inserts a value into the FM sketch. 65 func (s *FMSketch) InsertValue(sc *stmtctx.StatementContext, value types.Causet) error { 66 bytes, err := codec.EncodeValue(sc, nil, value) 67 if err != nil { 68 return errors.Trace(err) 69 } 70 s.hashFunc.Reset() 71 _, err = s.hashFunc.Write(bytes) 72 if err != nil { 73 return errors.Trace(err) 74 } 75 s.insertHashValue(s.hashFunc.Sum64()) 76 return nil 77 } 78 79 func buildFMSketch(sc *stmtctx.StatementContext, values []types.Causet, maxSize int) (*FMSketch, int64, error) { 80 s := NewFMSketch(maxSize) 81 for _, value := range values { 82 err := s.InsertValue(sc, value) 83 if err != nil { 84 return nil, 0, errors.Trace(err) 85 } 86 } 87 return s, s.NDV(), nil 88 } 89 90 func (s *FMSketch) mergeFMSketch(rs *FMSketch) { 91 if s.mask < rs.mask { 92 s.mask = rs.mask 93 for key := range s.hashset { 94 if (key & s.mask) != 0 { 95 delete(s.hashset, key) 96 } 97 } 98 } 99 for key := range rs.hashset { 100 s.insertHashValue(key) 101 } 102 } 103 104 // FMSketchToProto converts FMSketch to its protobuf representation. 105 func FMSketchToProto(s *FMSketch) *fidelpb.FMSketch { 106 protoSketch := new(fidelpb.FMSketch) 107 protoSketch.Mask = s.mask 108 for val := range s.hashset { 109 protoSketch.Hashset = append(protoSketch.Hashset, val) 110 } 111 return protoSketch 112 } 113 114 // FMSketchFromProto converts FMSketch from its protobuf representation. 115 func FMSketchFromProto(protoSketch *fidelpb.FMSketch) *FMSketch { 116 sketch := &FMSketch{ 117 hashset: make(map[uint64]bool, len(protoSketch.Hashset)), 118 mask: protoSketch.Mask, 119 } 120 for _, val := range protoSketch.Hashset { 121 sketch.hashset[val] = true 122 } 123 return sketch 124 }