github.com/matrixorigin/matrixone@v1.2.0/pkg/common/hashmap/types.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashmap
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    19  	"github.com/matrixorigin/matrixone/pkg/container/hashtable"
    20  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    21  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    22  )
    23  
    24  const (
    25  	UnitLimit             = 256
    26  	HashMapSizeThreshHold = UnitLimit * 128
    27  	HashMapSizeEstimate   = UnitLimit * 32
    28  )
    29  
    30  var (
    31  	OneUInt8s  []uint8
    32  	OneInt64s  []int64
    33  	zeroUint64 []uint64
    34  	zeroUint32 []uint32
    35  )
    36  
    37  // HashMap is the encapsulated hash table interface exposed to the outside
    38  type HashMap interface {
    39  	// HasNull returns whether the hash map considers the null values.
    40  	HasNull() bool
    41  	// Free method frees the hash map.
    42  	Free()
    43  	// AddGroup adds 1 to the row count of hash map.
    44  	AddGroup()
    45  	// AddGroups adds N to the row count of hash map.
    46  	AddGroups(uint64)
    47  	// GroupCount returns the hash map's row count.
    48  	GroupCount() uint64
    49  	// Size returns the hash map's size
    50  	Size() int64
    51  }
    52  
    53  // Iterator allows users to do insert or find operations on hash tables in bulk.
    54  type Iterator interface {
    55  	// Insert vecs[start, start+count) into hashmap
    56  	// vs  : the number of rows corresponding to each value in the hash table (start with 1)
    57  	// zvs : if zvs[i] is 0 indicates the presence null, 1 indicates the absence of a null.
    58  	Insert(start, count int, vecs []*vector.Vector) (vs []uint64, zvs []int64, err error)
    59  
    60  	// Find vecs[start, start+count) in hashmap
    61  	// vs  : the number of rows corresponding to each value in the hash table (start with 1, and 0 means not found.)
    62  	// zvs : if zvs[i] is 0 indicates the presence null, 1 indicates the absence of a null.
    63  	Find(start, count int, vecs []*vector.Vector, inBuckets []uint8) (vs []uint64, zvs []int64)
    64  }
    65  
    66  // JoinMap is used for join
    67  type JoinMap struct {
    68  	cnt       *int64
    69  	dupCnt    *int64
    70  	multiSels [][]int32
    71  	// push-down filter expression, possibly a bloomfilter
    72  	expr    *plan.Expr
    73  	shm     *StrHashMap
    74  	ihm     *IntHashMap
    75  	hasNull bool
    76  
    77  	isDup            bool
    78  	runtimeFilter_In bool
    79  }
    80  
    81  // StrHashMap key is []byte, value is an uint64 value (starting from 1)
    82  //
    83  //	each time a new key is inserted, the hashtable returns a last-value+1 or, if the old key is inserted, the value corresponding to that key
    84  type StrHashMap struct {
    85  	hasNull bool
    86  	rows    uint64
    87  	keys    [][]byte
    88  	values  []uint64
    89  	// zValues, 0 indicates the presence null, 1 indicates the absence of a null
    90  	zValues       []int64
    91  	strHashStates [][3]uint64
    92  	ibucket       uint64
    93  	nbucket       uint64
    94  
    95  	m       *mpool.MPool
    96  	hashMap *hashtable.StringHashMap
    97  }
    98  
    99  // IntHashMap key is int64, value is an uint64 (start from 1)
   100  // before you use the IntHashMap, the user should make sure that
   101  // sum of vectors' length equal to 8
   102  type IntHashMap struct {
   103  	hasNull bool
   104  
   105  	rows    uint64
   106  	keys    []uint64
   107  	keyOffs []uint32
   108  	values  []uint64
   109  	zValues []int64
   110  	hashes  []uint64
   111  	ibucket uint64
   112  	nbucket uint64
   113  
   114  	m       *mpool.MPool
   115  	hashMap *hashtable.Int64HashMap
   116  }
   117  
   118  type strHashmapIterator struct {
   119  	m                *mpool.MPool
   120  	mp               *StrHashMap
   121  	ibucket, nbucket uint64
   122  }
   123  
   124  type intHashMapIterator struct {
   125  	ibucket, nbucket uint64
   126  	m                *mpool.MPool
   127  	mp               *IntHashMap
   128  }