github.com/matrixorigin/matrixone@v1.2.0/pkg/common/bloomfilter/util.go (about)

     1  // Copyright 2021 - 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bloomfilter
    16  
    17  import (
    18  	"math"
    19  	"unsafe"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/container/hashtable"
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  )
    25  
    26  func fillStringGroupStr(keys [][]byte, vec *vector.Vector, n int, start int) {
    27  	area := vec.GetArea()
    28  	vs := vector.MustFixedCol[types.Varlena](vec)
    29  	if !vec.GetNulls().Any() {
    30  		for i := 0; i < n; i++ {
    31  			keys[i] = append(keys[i], byte(0))
    32  			keys[i] = append(keys[i], vs[i+start].GetByteSlice(area)...)
    33  		}
    34  	} else {
    35  		nsp := vec.GetNulls()
    36  		for i := 0; i < n; i++ {
    37  			hasNull := nsp.Contains(uint64(i + start))
    38  			if hasNull {
    39  				keys[i] = append(keys[i], byte(1))
    40  			} else {
    41  				keys[i] = append(keys[i], byte(0))
    42  				keys[i] = append(keys[i], vs[i+start].GetByteSlice(area)...)
    43  			}
    44  		}
    45  	}
    46  }
    47  
    48  func fillGroupStr(keys [][]byte, vec *vector.Vector, n int, sz int, start int) {
    49  	data := unsafe.Slice(vector.GetPtrAt[byte](vec, 0), (n+start)*sz)
    50  	if !vec.GetNulls().Any() {
    51  		for i := 0; i < n; i++ {
    52  			keys[i] = append(keys[i], byte(0))
    53  			keys[i] = append(keys[i], data[(i+start)*sz:(i+start+1)*sz]...)
    54  		}
    55  	} else {
    56  		nsp := vec.GetNulls()
    57  		for i := 0; i < n; i++ {
    58  			isNull := nsp.Contains(uint64(i + start))
    59  			if isNull {
    60  				keys[i] = append(keys[i], byte(1))
    61  			} else {
    62  				keys[i] = append(keys[i], byte(0))
    63  				keys[i] = append(keys[i], data[(i+start)*sz:(i+start+1)*sz]...)
    64  			}
    65  		}
    66  	}
    67  }
    68  
    69  func encodeHashKeys(keys [][]byte, vec *vector.Vector, start, count int) {
    70  	if vec.GetType().IsFixedLen() {
    71  		fillGroupStr(keys, vec, count, vec.GetType().TypeSize(), start)
    72  	} else {
    73  		fillStringGroupStr(keys, vec, count, start)
    74  	}
    75  
    76  	for i := 0; i < count; i++ {
    77  		if l := len(keys[i]); l < 16 {
    78  			keys[i] = append(keys[i], hashtable.StrKeyPadding[l:]...)
    79  		}
    80  	}
    81  }
    82  
    83  func computeMemAndHashCount(rowCount int64, probability float64) (int64, int) {
    84  	k := 1
    85  	if rowCount < 10001 {
    86  		k = 1
    87  	} else if rowCount < 100001 {
    88  		k = 1
    89  	} else if rowCount < 1000001 {
    90  		k = 1
    91  	} else if rowCount < 10000001 {
    92  		k = 2
    93  	} else if rowCount < 100000001 {
    94  		k = 3
    95  	} else if rowCount < 1000000001 {
    96  		k = 3
    97  	} else if rowCount < 10000000001 {
    98  		k = 3
    99  	} else {
   100  		panic("unsupport rowCount")
   101  	}
   102  	hashCount := k * 3
   103  	m := -float64(hashCount) * float64(rowCount) / math.Log(1-math.Pow(probability, 1.0/float64(hashCount)))
   104  	return int64(m), k
   105  }