github.com/matrixorigin/matrixone@v1.2.0/pkg/common/bloomfilter/util.go (about) 1 // Copyright 2021 - 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bloomfilter 16 17 import ( 18 "math" 19 "unsafe" 20 21 "github.com/matrixorigin/matrixone/pkg/container/hashtable" 22 "github.com/matrixorigin/matrixone/pkg/container/types" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 ) 25 26 func fillStringGroupStr(keys [][]byte, vec *vector.Vector, n int, start int) { 27 area := vec.GetArea() 28 vs := vector.MustFixedCol[types.Varlena](vec) 29 if !vec.GetNulls().Any() { 30 for i := 0; i < n; i++ { 31 keys[i] = append(keys[i], byte(0)) 32 keys[i] = append(keys[i], vs[i+start].GetByteSlice(area)...) 33 } 34 } else { 35 nsp := vec.GetNulls() 36 for i := 0; i < n; i++ { 37 hasNull := nsp.Contains(uint64(i + start)) 38 if hasNull { 39 keys[i] = append(keys[i], byte(1)) 40 } else { 41 keys[i] = append(keys[i], byte(0)) 42 keys[i] = append(keys[i], vs[i+start].GetByteSlice(area)...) 43 } 44 } 45 } 46 } 47 48 func fillGroupStr(keys [][]byte, vec *vector.Vector, n int, sz int, start int) { 49 data := unsafe.Slice(vector.GetPtrAt[byte](vec, 0), (n+start)*sz) 50 if !vec.GetNulls().Any() { 51 for i := 0; i < n; i++ { 52 keys[i] = append(keys[i], byte(0)) 53 keys[i] = append(keys[i], data[(i+start)*sz:(i+start+1)*sz]...) 54 } 55 } else { 56 nsp := vec.GetNulls() 57 for i := 0; i < n; i++ { 58 isNull := nsp.Contains(uint64(i + start)) 59 if isNull { 60 keys[i] = append(keys[i], byte(1)) 61 } else { 62 keys[i] = append(keys[i], byte(0)) 63 keys[i] = append(keys[i], data[(i+start)*sz:(i+start+1)*sz]...) 64 } 65 } 66 } 67 } 68 69 func encodeHashKeys(keys [][]byte, vec *vector.Vector, start, count int) { 70 if vec.GetType().IsFixedLen() { 71 fillGroupStr(keys, vec, count, vec.GetType().TypeSize(), start) 72 } else { 73 fillStringGroupStr(keys, vec, count, start) 74 } 75 76 for i := 0; i < count; i++ { 77 if l := len(keys[i]); l < 16 { 78 keys[i] = append(keys[i], hashtable.StrKeyPadding[l:]...) 79 } 80 } 81 } 82 83 func computeMemAndHashCount(rowCount int64, probability float64) (int64, int) { 84 k := 1 85 if rowCount < 10001 { 86 k = 1 87 } else if rowCount < 100001 { 88 k = 1 89 } else if rowCount < 1000001 { 90 k = 1 91 } else if rowCount < 10000001 { 92 k = 2 93 } else if rowCount < 100000001 { 94 k = 3 95 } else if rowCount < 1000000001 { 96 k = 3 97 } else if rowCount < 10000000001 { 98 k = 3 99 } else { 100 panic("unsupport rowCount") 101 } 102 hashCount := k * 3 103 m := -float64(hashCount) * float64(rowCount) / math.Log(1-math.Pow(probability, 1.0/float64(hashCount))) 104 return int64(m), k 105 }