github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/txn/txnimpl/index.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package txnimpl 16 17 import ( 18 "io" 19 "sync" 20 21 "github.com/matrixorigin/matrixone/pkg/common/moerr" 22 "github.com/matrixorigin/matrixone/pkg/common/util" 23 "github.com/matrixorigin/matrixone/pkg/container/types" 24 "github.com/matrixorigin/matrixone/pkg/container/vector" 25 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 26 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers" 27 ) 28 29 type TableIndex interface { 30 io.Closer 31 BatchDedup(string, containers.Vector) error 32 BatchInsert(string, containers.Vector, int, int, uint32, bool) error 33 Insert(any, uint32) error 34 Delete(any) error 35 Search(any) (uint32, error) 36 Name() string 37 Count() int 38 KeyToVector(types.Type) containers.Vector 39 KeyToVectors(types.Type) []containers.Vector 40 } 41 42 type simpleTableIndex struct { 43 sync.RWMutex 44 tree map[any]uint32 45 } 46 47 func NewSimpleTableIndex() *simpleTableIndex { 48 return &simpleTableIndex{ 49 tree: make(map[any]uint32), 50 } 51 } 52 53 func DedupOp[T comparable]( 54 t *types.Type, 55 attr string, 56 vs []T, 57 tree map[any]uint32) (err error) { 58 for _, v := range vs { 59 if _, ok := tree[v]; ok { 60 entry := common.TypeStringValue(*t, v, false) 61 return moerr.NewDuplicateEntryNoCtx(entry, attr) 62 } 63 } 64 return 65 } 66 67 func InsertOp[T comparable]( 68 t *types.Type, 69 attr string, 70 vals []T, 71 start, count int, 72 fromRow uint32, 73 dedupInput bool, 74 tree map[any]uint32) (err error) { 75 if dedupInput { 76 set := make(map[T]bool) 77 for _, v := range vals[start : start+count] { 78 if _, ok := set[v]; ok { 79 entry := common.TypeStringValue(*t, v, false) 80 return moerr.NewDuplicateEntryNoCtx(entry, attr) 81 } 82 set[v] = true 83 } 84 return 85 } 86 for _, v := range vals[start : start+count] { 87 if _, ok := tree[v]; ok { 88 entry := common.TypeStringValue(*t, v, false) 89 return moerr.NewDuplicateEntryNoCtx(entry, attr) 90 } 91 tree[v] = fromRow 92 fromRow++ 93 } 94 return 95 } 96 97 func (idx *simpleTableIndex) KeyToVector(kType types.Type) containers.Vector { 98 vec := makeWorkspaceVector(kType) 99 switch kType.Oid { 100 case types.T_char, types.T_varchar, types.T_json, 101 types.T_binary, types.T_varbinary, types.T_blob, types.T_text: 102 for k := range idx.tree { 103 vec.Append([]byte(k.(string)), false) 104 } 105 case types.T_array_float32, types.T_array_float64: 106 // No usage for this func. 107 for k := range idx.tree { 108 vec.Append(k.([]byte), false) 109 } 110 default: 111 for k := range idx.tree { 112 vec.Append(k, false) 113 } 114 } 115 return vec 116 } 117 118 func (idx *simpleTableIndex) KeyToVectors(kType types.Type) []containers.Vector { 119 vec := makeWorkspaceVector(kType) 120 var vecs []containers.Vector 121 switch kType.Oid { 122 case types.T_char, types.T_varchar, types.T_json, 123 types.T_binary, types.T_varbinary, types.T_blob, types.T_text: 124 for k := range idx.tree { 125 if vec.Length() > int(MaxNodeRows) { 126 vecs = append(vecs, vec) 127 vec = makeWorkspaceVector(kType) 128 } 129 vec.Append([]byte(k.(string)), false) 130 } 131 case types.T_array_float32: 132 // No usage for this func. 133 for k := range idx.tree { 134 if vec.Length() > int(MaxNodeRows) { 135 vecs = append(vecs, vec) 136 vec = makeWorkspaceVector(kType) 137 } 138 vec.Append(types.BytesToArrayToString[float32](k.([]byte)), false) 139 } 140 case types.T_array_float64: 141 for k := range idx.tree { 142 if vec.Length() > int(MaxNodeRows) { 143 vecs = append(vecs, vec) 144 vec = makeWorkspaceVector(kType) 145 } 146 vec.Append(types.BytesToArrayToString[float64](k.([]byte)), false) 147 } 148 default: 149 for k := range idx.tree { 150 if vec.Length() > int(MaxNodeRows) { 151 vecs = append(vecs, vec) 152 vec = makeWorkspaceVector(kType) 153 } 154 vec.Append(k, false) 155 } 156 } 157 if vec.Length() > 0 { 158 vecs = append(vecs, vec) 159 } 160 return vecs 161 } 162 163 func (idx *simpleTableIndex) Close() error { 164 idx.tree = nil 165 return nil 166 } 167 func (idx *simpleTableIndex) Name() string { return "SimpleIndex" } 168 func (idx *simpleTableIndex) Count() int { 169 idx.RLock() 170 cnt := len(idx.tree) 171 idx.RUnlock() 172 return cnt 173 } 174 175 func (idx *simpleTableIndex) Insert(v any, row uint32) error { 176 idx.Lock() 177 defer idx.Unlock() 178 _, ok := idx.tree[v] 179 if ok { 180 return moerr.GetOkExpectedDup() 181 } 182 idx.tree[v] = row 183 return nil 184 } 185 func (idx *simpleTableIndex) Delete(vv any) error { 186 idx.Lock() 187 defer idx.Unlock() 188 var v any 189 switch vv := vv.(type) { 190 case []uint8: 191 v = string(vv) 192 default: 193 v = vv 194 } 195 _, ok := idx.tree[v] 196 if !ok { 197 return moerr.GetOkExpectedDup() 198 } 199 delete(idx.tree, v) 200 return nil 201 } 202 203 func (idx *simpleTableIndex) Search(v any) (uint32, error) { 204 idx.RLock() 205 defer idx.RUnlock() 206 row, ok := idx.tree[v] 207 if !ok { 208 return 0, moerr.NewNotFoundNoCtx() 209 } 210 return uint32(row), nil 211 } 212 213 func (idx *simpleTableIndex) BatchInsert( 214 attr string, 215 col containers.Vector, 216 start, count int, 217 row uint32, 218 dedupInput bool) error { 219 idx.Lock() 220 defer idx.Unlock() 221 colType := col.GetType() 222 switch colType.Oid { 223 case types.T_bool: 224 vs := vector.MustFixedCol[bool](col.GetDownstreamVector()) 225 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 226 case types.T_bit: 227 vs := vector.MustFixedCol[uint64](col.GetDownstreamVector()) 228 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 229 case types.T_int8: 230 vs := vector.MustFixedCol[int8](col.GetDownstreamVector()) 231 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 232 case types.T_int16: 233 vs := vector.MustFixedCol[int16](col.GetDownstreamVector()) 234 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 235 case types.T_int32: 236 vs := vector.MustFixedCol[int32](col.GetDownstreamVector()) 237 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 238 case types.T_int64: 239 vs := vector.MustFixedCol[int64](col.GetDownstreamVector()) 240 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 241 case types.T_uint8: 242 vs := vector.MustFixedCol[uint8](col.GetDownstreamVector()) 243 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 244 case types.T_uint16: 245 vs := vector.MustFixedCol[uint16](col.GetDownstreamVector()) 246 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 247 case types.T_uint32: 248 vs := vector.MustFixedCol[uint32](col.GetDownstreamVector()) 249 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 250 case types.T_uint64: 251 vs := vector.MustFixedCol[uint64](col.GetDownstreamVector()) 252 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 253 case types.T_decimal64: 254 vs := vector.MustFixedCol[types.Decimal64](col.GetDownstreamVector()) 255 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 256 case types.T_decimal128: 257 vs := vector.MustFixedCol[types.Decimal128](col.GetDownstreamVector()) 258 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 259 case types.T_uuid: 260 vs := vector.MustFixedCol[types.Uuid](col.GetDownstreamVector()) 261 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 262 case types.T_float32: 263 vs := vector.MustFixedCol[float32](col.GetDownstreamVector()) 264 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 265 case types.T_float64: 266 vs := vector.MustFixedCol[float64](col.GetDownstreamVector()) 267 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 268 case types.T_date: 269 vs := vector.MustFixedCol[types.Date](col.GetDownstreamVector()) 270 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 271 case types.T_timestamp: 272 vs := vector.MustFixedCol[types.Timestamp](col.GetDownstreamVector()) 273 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 274 case types.T_time: 275 vs := vector.MustFixedCol[types.Time](col.GetDownstreamVector()) 276 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 277 case types.T_datetime: 278 vs := vector.MustFixedCol[types.Datetime](col.GetDownstreamVector()) 279 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 280 case types.T_enum: 281 vs := vector.MustFixedCol[types.Enum](col.GetDownstreamVector()) 282 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 283 case types.T_TS: 284 vs := vector.MustFixedCol[types.TS](col.GetDownstreamVector()) 285 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 286 case types.T_Rowid: 287 vs := vector.MustFixedCol[types.Rowid](col.GetDownstreamVector()) 288 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 289 case types.T_Blockid: 290 vs := vector.MustFixedCol[types.Blockid](col.GetDownstreamVector()) 291 return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree) 292 case types.T_char, types.T_varchar, types.T_json, 293 types.T_binary, types.T_varbinary, types.T_blob, types.T_text: 294 vec := col.GetDownstreamVector() 295 if dedupInput { 296 set := make(map[string]bool) 297 for i := start; i < start+count; i++ { 298 v := vec.GetStringAt(i) 299 if _, ok := set[v]; ok { 300 entry := common.TypeStringValue(*colType, []byte(v), false) 301 return moerr.NewDuplicateEntryNoCtx(entry, attr) 302 } 303 set[v] = true 304 } 305 break 306 } 307 for i := start; i < start+count; i++ { 308 v := vec.GetStringAt(i) 309 if _, ok := idx.tree[v]; ok { 310 entry := common.TypeStringValue(*colType, []byte(v), false) 311 return moerr.NewDuplicateEntryNoCtx(entry, attr) 312 } 313 idx.tree[v] = row 314 row++ 315 } 316 case types.T_array_float32: 317 vec := col.GetDownstreamVector() 318 if dedupInput { 319 set := make(map[string]bool) 320 for i := start; i < start+count; i++ { 321 v := types.ArrayToString[float32](vector.GetArrayAt[float32](vec, i)) 322 if _, ok := set[v]; ok { 323 entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false) 324 return moerr.NewDuplicateEntryNoCtx(entry, attr) 325 } 326 set[v] = true 327 } 328 break 329 } 330 for i := start; i < start+count; i++ { 331 v := types.ArrayToString[float32](vector.GetArrayAt[float32](vec, i)) 332 if _, ok := idx.tree[v]; ok { 333 entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false) 334 return moerr.NewDuplicateEntryNoCtx(entry, attr) 335 } 336 idx.tree[v] = row 337 row++ 338 } 339 case types.T_array_float64: 340 vec := col.GetDownstreamVector() 341 if dedupInput { 342 set := make(map[string]bool) 343 for i := start; i < start+count; i++ { 344 v := types.ArrayToString[float64](vector.GetArrayAt[float64](vec, i)) 345 if _, ok := set[v]; ok { 346 entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false) 347 return moerr.NewDuplicateEntryNoCtx(entry, attr) 348 } 349 set[v] = true 350 } 351 break 352 } 353 for i := start; i < start+count; i++ { 354 v := types.ArrayToString[float64](vector.GetArrayAt[float64](vec, i)) 355 if _, ok := idx.tree[v]; ok { 356 entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false) 357 return moerr.NewDuplicateEntryNoCtx(entry, attr) 358 } 359 idx.tree[v] = row 360 row++ 361 } 362 default: 363 panic(moerr.NewInternalErrorNoCtx("%s not supported", col.GetType().String())) 364 } 365 return nil 366 } 367 368 // TODO: rewrite 369 func (idx *simpleTableIndex) BatchDedup(attr string, col containers.Vector) error { 370 idx.RLock() 371 defer idx.RUnlock() 372 colType := col.GetType() 373 switch colType.Oid { 374 case types.T_bool: 375 vals := vector.MustFixedCol[bool](col.GetDownstreamVector()) 376 return DedupOp(colType, attr, vals, idx.tree) 377 case types.T_bit: 378 vals := vector.MustFixedCol[uint64](col.GetDownstreamVector()) 379 return DedupOp(colType, attr, vals, idx.tree) 380 case types.T_int8: 381 vals := vector.MustFixedCol[int8](col.GetDownstreamVector()) 382 return DedupOp(colType, attr, vals, idx.tree) 383 case types.T_int16: 384 vals := vector.MustFixedCol[int16](col.GetDownstreamVector()) 385 return DedupOp(colType, attr, vals, idx.tree) 386 case types.T_int32: 387 vals := vector.MustFixedCol[int32](col.GetDownstreamVector()) 388 return DedupOp(colType, attr, vals, idx.tree) 389 case types.T_int64: 390 vals := vector.MustFixedCol[int64](col.GetDownstreamVector()) 391 return DedupOp(colType, attr, vals, idx.tree) 392 case types.T_uint8: 393 vals := vector.MustFixedCol[uint8](col.GetDownstreamVector()) 394 return DedupOp(colType, attr, vals, idx.tree) 395 case types.T_uint16: 396 vals := vector.MustFixedCol[uint16](col.GetDownstreamVector()) 397 return DedupOp(colType, attr, vals, idx.tree) 398 case types.T_uint32: 399 vals := vector.MustFixedCol[uint32](col.GetDownstreamVector()) 400 return DedupOp(colType, attr, vals, idx.tree) 401 case types.T_uint64: 402 vals := vector.MustFixedCol[uint64](col.GetDownstreamVector()) 403 return DedupOp(colType, attr, vals, idx.tree) 404 case types.T_decimal64: 405 vals := vector.MustFixedCol[types.Decimal64](col.GetDownstreamVector()) 406 return DedupOp(colType, attr, vals, idx.tree) 407 case types.T_decimal128: 408 vals := vector.MustFixedCol[types.Decimal128](col.GetDownstreamVector()) 409 return DedupOp(colType, attr, vals, idx.tree) 410 case types.T_float32: 411 vals := vector.MustFixedCol[float32](col.GetDownstreamVector()) 412 return DedupOp(colType, attr, vals, idx.tree) 413 case types.T_float64: 414 vals := vector.MustFixedCol[float64](col.GetDownstreamVector()) 415 return DedupOp(colType, attr, vals, idx.tree) 416 case types.T_date: 417 vals := vector.MustFixedCol[types.Date](col.GetDownstreamVector()) 418 return DedupOp(colType, attr, vals, idx.tree) 419 case types.T_time: 420 vals := vector.MustFixedCol[types.Time](col.GetDownstreamVector()) 421 return DedupOp(colType, attr, vals, idx.tree) 422 case types.T_datetime: 423 vals := vector.MustFixedCol[types.Datetime](col.GetDownstreamVector()) 424 return DedupOp(colType, attr, vals, idx.tree) 425 case types.T_timestamp: 426 vals := vector.MustFixedCol[types.Timestamp](col.GetDownstreamVector()) 427 return DedupOp(colType, attr, vals, idx.tree) 428 case types.T_enum: 429 vals := vector.MustFixedCol[types.Enum](col.GetDownstreamVector()) 430 return DedupOp(colType, attr, vals, idx.tree) 431 case types.T_TS: 432 vals := vector.MustFixedCol[types.TS](col.GetDownstreamVector()) 433 return DedupOp(colType, attr, vals, idx.tree) 434 case types.T_Rowid: 435 vals := vector.MustFixedCol[types.Rowid](col.GetDownstreamVector()) 436 return DedupOp(colType, attr, vals, idx.tree) 437 case types.T_Blockid: 438 vals := vector.MustFixedCol[types.Blockid](col.GetDownstreamVector()) 439 return DedupOp(colType, attr, vals, idx.tree) 440 case types.T_char, types.T_varchar, types.T_json, 441 types.T_binary, types.T_varbinary, types.T_blob, types.T_text: 442 vec := col.GetDownstreamVector() 443 for i := 0; i < col.Length(); i++ { 444 bs := vec.GetBytesAt(i) 445 v := util.UnsafeBytesToString(bs) 446 if _, ok := idx.tree[v]; ok { 447 entry := common.TypeStringValue(*colType, bs, false) 448 return moerr.NewDuplicateEntryNoCtx(entry, attr) 449 } 450 } 451 case types.T_array_float32: 452 vec := col.GetDownstreamVector() 453 for i := 0; i < col.Length(); i++ { 454 bs := vec.GetBytesAt(i) 455 v := types.BytesToArrayToString[float32](bs) 456 if _, ok := idx.tree[v]; ok { 457 entry := common.TypeStringValue(*colType, bs, false) 458 return moerr.NewDuplicateEntryNoCtx(entry, attr) 459 } 460 } 461 case types.T_array_float64: 462 vec := col.GetDownstreamVector() 463 for i := 0; i < col.Length(); i++ { 464 bs := vec.GetBytesAt(i) 465 v := types.BytesToArrayToString[float64](bs) 466 if _, ok := idx.tree[v]; ok { 467 entry := common.TypeStringValue(*colType, bs, false) 468 return moerr.NewDuplicateEntryNoCtx(entry, attr) 469 } 470 } 471 default: 472 panic(moerr.NewInternalErrorNoCtx("%s not supported", col.GetType().String())) 473 } 474 return nil 475 }