github.com/matrixorigin/matrixone@v0.7.0/pkg/container/index/low_cardinality_index.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package index 16 17 import ( 18 "math" 19 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/common/mpool" 22 "github.com/matrixorigin/matrixone/pkg/container/index/dict" 23 "github.com/matrixorigin/matrixone/pkg/container/nulls" 24 "github.com/matrixorigin/matrixone/pkg/container/types" 25 "github.com/matrixorigin/matrixone/pkg/container/vector" 26 ) 27 28 const ( 29 MaxLowCardinality = math.MaxUint16 + 1 30 ) 31 32 var ( 33 ErrNotSupported = moerr.NewNotSupportedNoCtx("the type is not supported for low cardinality index") 34 ) 35 36 type LowCardinalityIndex struct { 37 typ types.Type 38 39 m *mpool.MPool 40 dict *dict.Dict 41 // poses is the positions of original data in the dictionary. 42 // Currently, the type of poses[i] is `T_uint16` which means 43 // the max cardinality of LowCardinalityIndex is 65536. 44 // The position of `null` value is 0. 45 poses *vector.Vector 46 47 ref int 48 } 49 50 func New(typ types.Type, m *mpool.MPool) (*LowCardinalityIndex, error) { 51 if typ.Oid == types.T_decimal128 || typ.Oid == types.T_json { 52 return nil, ErrNotSupported 53 } 54 55 d, err := dict.New(typ, m) 56 if err != nil { 57 return nil, err 58 } 59 return &LowCardinalityIndex{ 60 typ: typ, 61 m: m, 62 dict: d, 63 poses: vector.New(types.T_uint16.ToType()), 64 ref: 1, 65 }, nil 66 } 67 68 func (idx *LowCardinalityIndex) GetPoses() *vector.Vector { 69 return idx.poses 70 } 71 72 func (idx *LowCardinalityIndex) GetDict() *dict.Dict { 73 return idx.dict 74 } 75 76 func (idx *LowCardinalityIndex) Dup() *LowCardinalityIndex { 77 idx.ref++ 78 return idx 79 } 80 81 func (idx *LowCardinalityIndex) DupEmpty() *LowCardinalityIndex { 82 return &LowCardinalityIndex{ 83 typ: idx.typ, 84 m: idx.m, 85 dict: idx.dict.Dup(), 86 poses: vector.New(types.T_uint16.ToType()), 87 ref: 1, 88 } 89 } 90 91 func (idx *LowCardinalityIndex) InsertBatch(data *vector.Vector) error { 92 originalLen := data.Length() 93 var sels []int64 94 if nulls.Any(data.Nsp) { 95 sels = make([]int64, 0, originalLen) 96 for i := 0; i < originalLen; i++ { 97 if !nulls.Contains(data.Nsp, uint64(i)) { 98 sels = append(sels, int64(i)) 99 } 100 } 101 } 102 103 var ips []uint16 104 var err error 105 if sels != nil { 106 if err = vector.Shuffle(data, sels, idx.m); err != nil { 107 return err 108 } 109 110 values, err := idx.dict.InsertBatch(data) 111 if err != nil { 112 return err 113 } 114 115 i := 0 116 ips = make([]uint16, originalLen) 117 for j := 0; j < originalLen; j++ { 118 if i < len(sels) && int64(j) == sels[i] { 119 ips[j] = values[i] 120 i++ 121 } else { 122 ips[j] = 0 123 } 124 } 125 } else { 126 if ips, err = idx.dict.InsertBatch(data); err != nil { 127 return err 128 } 129 } 130 131 return vector.AppendFixed(idx.poses, ips, idx.m) 132 } 133 134 // Encode uses the dictionary of the current index to encode the original data. 135 func (idx *LowCardinalityIndex) Encode(dst, src *vector.Vector) error { 136 poses := idx.dict.FindBatch(src) 137 return vector.AppendFixed(dst, poses, idx.m) 138 } 139 140 func (idx *LowCardinalityIndex) Free() { 141 if idx.ref == 0 { 142 return 143 } 144 idx.ref-- 145 if idx.ref > 0 { 146 return 147 } 148 149 idx.poses.Free(idx.m) 150 idx.dict.Free() 151 }