github.com/matrixorigin/matrixone@v0.7.0/pkg/container/index/dict/dict.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dict 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/common/mpool" 19 "github.com/matrixorigin/matrixone/pkg/container/types" 20 "github.com/matrixorigin/matrixone/pkg/container/vector" 21 ) 22 23 type Dict struct { 24 typ types.Type 25 26 m *mpool.MPool 27 idx reverseIndex 28 unique *vector.Vector 29 30 ref int 31 } 32 33 func New(typ types.Type, m *mpool.MPool) (*Dict, error) { 34 d := &Dict{ 35 typ: typ, 36 m: m, 37 } 38 39 var idx reverseIndex 40 var err error 41 42 if d.fixed() { // check whether the type of data is fixed-length or variable-length 43 if idx, err = newFixedReverseIndex(m); err != nil { 44 return nil, err 45 } 46 d.unique = vector.New(types.T_uint64.ToType()) 47 } else { 48 if idx, err = newVarReverseIndex(m); err != nil { 49 return nil, err 50 } 51 d.unique = vector.New(types.T_varchar.ToType()) 52 } 53 54 d.idx = idx 55 d.ref = 1 56 return d, nil 57 } 58 59 func (d *Dict) GetUnique() *vector.Vector { 60 return d.unique 61 } 62 63 func (d *Dict) Cardinality() uint64 { 64 return uint64(d.unique.Length()) 65 } 66 67 func (d *Dict) Dup() *Dict { 68 d.ref++ 69 return d 70 } 71 72 func (d *Dict) InsertBatch(data *vector.Vector) ([]uint16, error) { 73 var ks any 74 if d.fixed() { 75 ks = d.encodeFixedData(data) 76 } else { 77 ks = d.encodeVarData(data) 78 } 79 80 values, err := d.idx.insert(ks) 81 if err != nil { 82 return nil, err 83 } 84 ips /* insertion points */ := make([]uint16, len(values)) 85 for i, v := range values { 86 if int(v) > d.unique.Length() { 87 if d.fixed() { 88 err = d.unique.Append(ks.([]uint64)[i], false, d.m) 89 } else { 90 err = d.unique.Append(ks.([][]byte)[i], false, d.m) 91 } 92 if err != nil { 93 return nil, err 94 } 95 } 96 ips[i] = uint16(v) 97 } 98 return ips, nil 99 } 100 101 func (d *Dict) FindBatch(data *vector.Vector) []uint16 { 102 var ks any 103 if d.fixed() { 104 ks = d.encodeFixedData(data) 105 } else { 106 ks = d.encodeVarData(data) 107 } 108 values := d.idx.find(ks) 109 110 poses := make([]uint16, len(values)) 111 for i, v := range values { 112 poses[i] = uint16(v) 113 } 114 return poses 115 } 116 117 func (d *Dict) FindData(pos uint16) *vector.Vector { 118 if d.fixed() { 119 return d.findFixedData(int(pos)) 120 } else { 121 return d.findVarData(int(pos)) 122 } 123 } 124 125 func (d *Dict) Free() { 126 if d.ref == 0 { 127 return 128 } 129 d.ref-- 130 if d.ref > 0 { 131 return 132 } 133 134 if d.unique != nil { 135 d.unique.Free(d.m) 136 } 137 if d.idx != nil { 138 d.idx.free() 139 } 140 } 141 142 func (d *Dict) fixed() bool { return !d.typ.IsString() } 143 144 func (d *Dict) encodeFixedData(data *vector.Vector) []uint64 { 145 us := make([]uint64, data.Length()) 146 switch d.typ.Oid { 147 case types.T_bool: 148 col := vector.MustTCols[bool](data) 149 for i, v := range col { 150 if v { 151 us[i] = 1 152 } 153 } 154 case types.T_int32: 155 col := vector.MustTCols[int32](data) 156 for i, v := range col { 157 us[i] = uint64(v) 158 } 159 case types.T_int64: 160 col := vector.MustTCols[int64](data) 161 for i, v := range col { 162 us[i] = uint64(v) 163 } 164 case types.T_uint32: 165 col := vector.MustTCols[uint32](data) 166 for i, v := range col { 167 us[i] = uint64(v) 168 } 169 case types.T_uint64: 170 copy(us, vector.MustTCols[uint64](data)) 171 case types.T_float32: 172 col := vector.MustTCols[float32](data) 173 for i, v := range col { 174 us[i] = uint64(v) 175 } 176 case types.T_float64: 177 col := vector.MustTCols[float64](data) 178 for i, v := range col { 179 us[i] = uint64(v) 180 } 181 case types.T_decimal64: 182 col := vector.MustTCols[types.Decimal64](data) 183 for i, v := range col { 184 us[i] = types.DecodeUint64(types.EncodeDecimal64(&v)) 185 } 186 case types.T_date: 187 col := vector.MustTCols[types.Date](data) 188 for i, v := range col { 189 us[i] = uint64(v) 190 } 191 case types.T_datetime: 192 col := vector.MustTCols[types.Datetime](data) 193 for i, v := range col { 194 us[i] = uint64(v) 195 } 196 case types.T_timestamp: 197 col := vector.MustTCols[types.Timestamp](data) 198 for i, v := range col { 199 us[i] = uint64(v) 200 } 201 } 202 return us 203 } 204 205 func (d *Dict) encodeVarData(data *vector.Vector) [][]byte { 206 return vector.GetBytesVectorValues(data) 207 } 208 209 func (d *Dict) findFixedData(pos int) *vector.Vector { 210 v := vector.NewConst(d.typ, 1) 211 data := d.getFixedData(pos) 212 switch d.typ.Oid { 213 case types.T_bool: 214 val := false 215 if data == 1 { 216 val = true 217 } 218 vector.MustTCols[bool](v)[0] = val 219 case types.T_int32: 220 vector.MustTCols[int32](v)[0] = int32(data) 221 case types.T_int64: 222 vector.MustTCols[int64](v)[0] = int64(data) 223 case types.T_uint32: 224 vector.MustTCols[uint32](v)[0] = uint32(data) 225 case types.T_uint64: 226 vector.MustTCols[uint64](v)[0] = uint64(data) 227 case types.T_float32: 228 vector.MustTCols[float32](v)[0] = float32(data) 229 case types.T_float64: 230 vector.MustTCols[float64](v)[0] = float64(data) 231 case types.T_decimal64: 232 val := types.DecodeDecimal64(types.EncodeUint64(&data)) 233 vector.MustTCols[types.Decimal64](v)[0] = val 234 case types.T_date: 235 vector.MustTCols[types.Date](v)[0] = types.Date(data) 236 case types.T_datetime: 237 vector.MustTCols[types.Datetime](v)[0] = types.Datetime(data) 238 case types.T_timestamp: 239 vector.MustTCols[types.Timestamp](v)[0] = types.Timestamp(data) 240 } 241 return v 242 } 243 244 func (d *Dict) findVarData(pos int) *vector.Vector { 245 return vector.NewConstBytes(d.typ, 1, d.getVarData(pos), d.m) 246 } 247 248 func (d *Dict) getFixedData(n int) uint64 { 249 return vector.MustTCols[uint64](d.unique)[n-1] 250 } 251 252 func (d *Dict) getVarData(n int) []byte { 253 return d.unique.GetBytes(int64(n - 1)) 254 }