github.com/matrixorigin/matrixone@v0.7.0/pkg/common/hashmap/types.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashmap
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    19  	"github.com/matrixorigin/matrixone/pkg/container/hashtable"
    20  	"github.com/matrixorigin/matrixone/pkg/container/index"
    21  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    22  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    23  )
    24  
    25  const (
    26  	UnitLimit = 256
    27  )
    28  
    29  var (
    30  	OneUInt8s  []uint8
    31  	OneInt64s  []int64
    32  	zeroUint64 []uint64
    33  	zeroUint32 []uint32
    34  )
    35  
    36  // HashMap is the encapsulated hash table interface exposed to the outside
    37  type HashMap interface {
    38  	// HasNull returns whether the hash map considers the null values.
    39  	HasNull() bool
    40  	// Free method frees the hash map.
    41  	Free()
    42  	// AddGroup adds 1 to the row count of hash map.
    43  	AddGroup()
    44  	// AddGroups adds N to the row count of hash map.
    45  	AddGroups(uint64)
    46  	// GroupCount returns the hash map's row count.
    47  	GroupCount() uint64
    48  	// Size returns the hash map's size
    49  	Size() int64
    50  }
    51  
    52  // Iterator allows users to do insert or find operations on hash tables in bulk.
    53  type Iterator interface {
    54  	// Insert vecs[start, start+count) into hashmap
    55  	// vs  : the number of rows corresponding to each value in the hash table (start with 1)
    56  	// zvs : if zvs[i] is 0 indicates the presence null, 1 indicates the absence of a null.
    57  	Insert(start, count int, vecs []*vector.Vector) (vs []uint64, zvs []int64, err error)
    58  
    59  	// Find vecs[start, start+count) in hashmap
    60  	// vs  : the number of rows corresponding to each value in the hash table (start with 1, and 0 means not found.)
    61  	// zvs : if zvs[i] is 0 indicates the presence null, 1 indicates the absence of a null.
    62  	Find(start, count int, vecs []*vector.Vector, inBuckets []uint8) (vs []uint64, zvs []int64)
    63  }
    64  
    65  // JoinMap is used for join
    66  type JoinMap struct {
    67  	cnt    *int64
    68  	dupCnt *int64
    69  	sels   [][]int32
    70  	// push-down filter expression, possibly a bloomfilter
    71  	expr    *plan.Expr
    72  	mp      *StrHashMap
    73  	hasNull bool
    74  	idx     *index.LowCardinalityIndex
    75  }
    76  
    77  // StrHashMap key is []byte, value is an uint64 value (starting from 1)
    78  //
    79  //	each time a new key is inserted, the hashtable returns a last-value+1 or, if the old key is inserted, the value corresponding to that key
    80  type StrHashMap struct {
    81  	hasNull bool
    82  	rows    uint64
    83  	keys    [][]byte
    84  	values  []uint64
    85  	// zValues, 0 indicates the presence null, 1 indicates the absence of a null
    86  	zValues          []int64
    87  	strHashStates    [][3]uint64
    88  	ibucket, nbucket uint64
    89  
    90  	m       *mpool.MPool
    91  	hashMap *hashtable.StringHashMap
    92  }
    93  
    94  // IntHashMap key is int64, value is an uint64 (start from 1)
    95  // before you use the IntHashMap, the user should make sure that
    96  // sum of vectors' length equal to 8
    97  type IntHashMap struct {
    98  	hasNull bool
    99  
   100  	rows             uint64
   101  	keys             []uint64
   102  	keyOffs          []uint32
   103  	values           []uint64
   104  	zValues          []int64
   105  	hashes           []uint64
   106  	ibucket, nbucket uint64
   107  
   108  	m       *mpool.MPool
   109  	hashMap *hashtable.Int64HashMap
   110  }
   111  
   112  type strHashmapIterator struct {
   113  	m                *mpool.MPool
   114  	mp               *StrHashMap
   115  	ibucket, nbucket uint64
   116  }
   117  
   118  type intHashMapIterator struct {
   119  	ibucket, nbucket uint64
   120  	m                *mpool.MPool
   121  	mp               *IntHashMap
   122  }