github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/memoryengine/shard_hash.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package memoryengine 16 17 import ( 18 "context" 19 "fmt" 20 "hash/fnv" 21 "sort" 22 "unsafe" 23 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/common/mpool" 26 "github.com/matrixorigin/matrixone/pkg/container/batch" 27 "github.com/matrixorigin/matrixone/pkg/container/types" 28 "github.com/matrixorigin/matrixone/pkg/container/vector" 29 logservicepb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 30 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 31 "github.com/matrixorigin/matrixone/pkg/vm/engine" 32 ) 33 34 type HashShard struct { 35 mp *mpool.MPool 36 } 37 38 func NewHashShard(mp *mpool.MPool) *HashShard { 39 return &HashShard{ 40 mp: mp, 41 } 42 } 43 44 func (*HashShard) Batch( 45 ctx context.Context, 46 tableID ID, 47 getDefs getDefsFunc, 48 bat *batch.Batch, 49 nodes []logservicepb.DNStore, 50 ) ( 51 sharded []*ShardedBatch, 52 err error, 53 ) { 54 55 // get defs 56 defs, err := getDefs(ctx) 57 if err != nil { 58 return nil, err 59 } 60 61 // get shard key 62 var primaryAttrs []engine.Attribute 63 for _, def := range defs { 64 attr, ok := def.(*engine.AttributeDef) 65 if !ok { 66 continue 67 } 68 if attr.Attr.Primary { 69 primaryAttrs = append(primaryAttrs, attr.Attr) 70 } 71 } 72 sort.Slice(primaryAttrs, func(i, j int) bool { 73 return primaryAttrs[i].Name < primaryAttrs[j].Name 74 }) 75 if len(primaryAttrs) == 0 { 76 // no shard key 77 return nil, nil 78 } 79 type keyInfo struct { 80 Attr engine.Attribute 81 Index int 82 } 83 var infos []keyInfo 84 for _, attr := range primaryAttrs { 85 for i, name := range bat.Attrs { 86 if name == attr.Name { 87 infos = append(infos, keyInfo{ 88 Attr: attr, 89 Index: i, 90 }) 91 } 92 } 93 } 94 95 // shards 96 var shards []*Shard 97 for _, store := range nodes { 98 for _, info := range store.Shards { 99 shards = append(shards, &Shard{ 100 DNShardRecord: metadata.DNShardRecord{ 101 ShardID: info.ShardID, 102 }, 103 ReplicaID: info.ReplicaID, 104 Address: store.ServiceAddress, 105 }) 106 } 107 } 108 sort.Slice(shards, func(i, j int) bool { 109 return shards[i].ShardID < shards[j].ShardID 110 }) 111 m := make(map[*Shard]*batch.Batch) 112 for _, shard := range shards { 113 batchCopy := *bat 114 for i := range batchCopy.Zs { 115 batchCopy.Zs[i] = 0 116 } 117 m[shard] = &batchCopy 118 } 119 120 // shard batch 121 for i := 0; i < bat.Length(); i++ { 122 hasher := fnv.New32() 123 for _, info := range infos { 124 vec := bat.Vecs[info.Index] 125 bs, err := getBytesFromPrimaryVectorForHash(ctx, vec, i, info.Attr.Type) 126 if err != nil { 127 return nil, err 128 } 129 _, err = hasher.Write(bs) 130 if err != nil { 131 panic(err) 132 } 133 } 134 n := int(hasher.Sum32()) 135 shard := shards[n%len(shards)] 136 m[shard].Zs[i] = 1 137 } 138 139 for shard, bat := range m { 140 isEmpty := true 141 for _, i := range bat.Zs { 142 if i > 0 { 143 isEmpty = false 144 break 145 } 146 } 147 if isEmpty { 148 continue 149 } 150 sharded = append(sharded, &ShardedBatch{ 151 Shard: *shard, 152 Batch: bat, 153 }) 154 } 155 156 return 157 } 158 159 func (h *HashShard) Vector( 160 ctx context.Context, 161 tableID ID, 162 getDefs getDefsFunc, 163 colName string, 164 vec *vector.Vector, 165 nodes []logservicepb.DNStore, 166 ) ( 167 sharded []*ShardedVector, 168 err error, 169 ) { 170 171 //TODO use vector nulls mask 172 173 // get defs 174 defs, err := getDefs(ctx) 175 if err != nil { 176 return nil, err 177 } 178 179 // get shard key 180 var shardAttr *engine.Attribute 181 for _, def := range defs { 182 attr, ok := def.(*engine.AttributeDef) 183 if !ok { 184 continue 185 } 186 if attr.Attr.Primary { 187 if attr.Attr.Name == colName { 188 shardAttr = &attr.Attr 189 break 190 } 191 } 192 } 193 if shardAttr == nil { 194 // no shard key 195 return nil, nil 196 } 197 198 // shards 199 var shards []*Shard 200 for _, store := range nodes { 201 for _, info := range store.Shards { 202 shards = append(shards, &Shard{ 203 DNShardRecord: metadata.DNShardRecord{ 204 ShardID: info.ShardID, 205 }, 206 ReplicaID: info.ReplicaID, 207 Address: store.ServiceAddress, 208 }) 209 } 210 } 211 sort.Slice(shards, func(i, j int) bool { 212 return shards[i].ShardID < shards[j].ShardID 213 }) 214 m := make(map[*Shard]*vector.Vector) 215 216 // shard vector 217 for i := 0; i < vec.Length(); i++ { 218 hasher := fnv.New32() 219 bs, err := getBytesFromPrimaryVectorForHash(ctx, vec, i, shardAttr.Type) 220 if err != nil { 221 return nil, err 222 } 223 _, err = hasher.Write(bs) 224 if err != nil { 225 panic(err) 226 } 227 n := int(hasher.Sum32()) 228 shard := shards[n%len(shards)] 229 shardVec, ok := m[shard] 230 if !ok { 231 shardVec = vector.New(shardAttr.Type) 232 m[shard] = shardVec 233 } 234 v := getNullableValueFromVector(vec, i) 235 appendNullableValueToVector(shardVec, v, h.mp) 236 } 237 238 for shard, vec := range m { 239 if vec.Length() == 0 { 240 continue 241 } 242 sharded = append(sharded, &ShardedVector{ 243 Shard: *shard, 244 Vector: vec, 245 }) 246 } 247 248 return 249 } 250 251 var _ ShardPolicy = new(HashShard) 252 253 func getBytesFromPrimaryVectorForHash(ctx context.Context, vec *vector.Vector, i int, typ types.Type) ([]byte, error) { 254 if vec.IsConst() { 255 panic("primary value vector should not be const") 256 } 257 if vec.GetNulls().Any() { 258 //TODO mimic to pass BVT 259 return nil, moerr.NewDuplicate(ctx) 260 //panic("primary value vector should not contain nulls") 261 } 262 if vec.Typ.IsFixedLen() { 263 // is slice 264 size := vec.Typ.TypeSize() 265 l := vec.Length() * size 266 data := unsafe.Slice((*byte)(vector.GetPtrAt(vec, 0)), l) 267 end := (i + 1) * size 268 if end > len(data) { 269 //TODO mimic to pass BVT 270 return nil, moerr.NewDuplicate(ctx) 271 //return nil, moerr.NewInvalidInput("vector size not match") 272 } 273 return data[i*size : (i+1)*size], nil 274 } else if vec.Typ.IsVarlen() { 275 slice := vector.GetBytesVectorValues(vec) 276 if i >= len(slice) { 277 return []byte{}, nil 278 } 279 return slice[i], nil 280 } 281 panic(fmt.Sprintf("unknown type: %v", typ)) 282 } 283 284 type Nullable struct { 285 IsNull bool 286 Value any 287 } 288 289 func getNullableValueFromVector(vec *vector.Vector, i int) (value Nullable) { 290 if vec.IsConst() { 291 i = 0 292 } 293 switch vec.Typ.Oid { 294 295 case types.T_bool: 296 if vec.IsScalarNull() { 297 value = Nullable{ 298 IsNull: true, 299 Value: false, 300 } 301 return 302 } 303 value = Nullable{ 304 IsNull: vec.GetNulls().Contains(uint64(i)), 305 Value: vec.Col.([]bool)[i], 306 } 307 return 308 309 case types.T_int8: 310 if vec.IsScalarNull() { 311 value = Nullable{ 312 IsNull: true, 313 Value: int8(0), 314 } 315 return 316 } 317 value = Nullable{ 318 IsNull: vec.GetNulls().Contains(uint64(i)), 319 Value: vec.Col.([]int8)[i], 320 } 321 return 322 323 case types.T_int16: 324 if vec.IsScalarNull() { 325 value = Nullable{ 326 IsNull: true, 327 Value: int16(0), 328 } 329 return 330 } 331 value = Nullable{ 332 IsNull: vec.GetNulls().Contains(uint64(i)), 333 Value: vec.Col.([]int16)[i], 334 } 335 return 336 337 case types.T_int32: 338 if vec.IsScalarNull() { 339 value = Nullable{ 340 IsNull: true, 341 Value: int32(0), 342 } 343 return 344 } 345 value = Nullable{ 346 IsNull: vec.GetNulls().Contains(uint64(i)), 347 Value: vec.Col.([]int32)[i], 348 } 349 return 350 351 case types.T_int64: 352 if vec.IsScalarNull() { 353 value = Nullable{ 354 IsNull: true, 355 Value: int64(0), 356 } 357 return 358 } 359 value = Nullable{ 360 IsNull: vec.GetNulls().Contains(uint64(i)), 361 Value: vec.Col.([]int64)[i], 362 } 363 return 364 365 case types.T_uint8: 366 if vec.IsScalarNull() { 367 value = Nullable{ 368 IsNull: true, 369 Value: uint8(0), 370 } 371 return 372 } 373 value = Nullable{ 374 IsNull: vec.GetNulls().Contains(uint64(i)), 375 Value: vec.Col.([]uint8)[i], 376 } 377 return 378 379 case types.T_uint16: 380 if vec.IsScalarNull() { 381 value = Nullable{ 382 IsNull: true, 383 Value: uint16(0), 384 } 385 return 386 } 387 value = Nullable{ 388 IsNull: vec.GetNulls().Contains(uint64(i)), 389 Value: vec.Col.([]uint16)[i], 390 } 391 return 392 393 case types.T_uint32: 394 if vec.IsScalarNull() { 395 value = Nullable{ 396 IsNull: true, 397 Value: uint32(0), 398 } 399 return 400 } 401 value = Nullable{ 402 IsNull: vec.GetNulls().Contains(uint64(i)), 403 Value: vec.Col.([]uint32)[i], 404 } 405 return 406 407 case types.T_uint64: 408 if vec.IsScalarNull() { 409 value = Nullable{ 410 IsNull: true, 411 Value: uint64(0), 412 } 413 return 414 } 415 value = Nullable{ 416 IsNull: vec.GetNulls().Contains(uint64(i)), 417 Value: vec.Col.([]uint64)[i], 418 } 419 return 420 421 case types.T_float32: 422 if vec.IsScalarNull() { 423 value = Nullable{ 424 IsNull: true, 425 Value: float32(0), 426 } 427 return 428 } 429 value = Nullable{ 430 IsNull: vec.GetNulls().Contains(uint64(i)), 431 Value: vec.Col.([]float32)[i], 432 } 433 return 434 435 case types.T_float64: 436 if vec.IsScalarNull() { 437 value = Nullable{ 438 IsNull: true, 439 Value: float64(0), 440 } 441 return 442 } 443 value = Nullable{ 444 IsNull: vec.GetNulls().Contains(uint64(i)), 445 Value: vec.Col.([]float64)[i], 446 } 447 return 448 449 case types.T_tuple: 450 if vec.IsScalarNull() { 451 value = Nullable{ 452 IsNull: true, 453 Value: []any{}, 454 } 455 return 456 } 457 value = Nullable{ 458 IsNull: vec.GetNulls().Contains(uint64(i)), 459 Value: vec.Col.([][]any)[i], 460 } 461 return 462 463 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 464 if vec.IsScalarNull() { 465 value = Nullable{ 466 IsNull: true, 467 Value: []byte{}, 468 } 469 return 470 } 471 value = Nullable{ 472 IsNull: vec.GetNulls().Contains(uint64(i)), 473 Value: vec.GetBytes(int64(i)), 474 } 475 return 476 477 case types.T_date: 478 if vec.IsScalarNull() { 479 var zero types.Date 480 value = Nullable{ 481 IsNull: true, 482 Value: zero, 483 } 484 return 485 } 486 value = Nullable{ 487 IsNull: vec.GetNulls().Contains(uint64(i)), 488 Value: vec.Col.([]types.Date)[i], 489 } 490 return 491 492 case types.T_time: 493 if vec.IsScalarNull() { 494 var zero types.Time 495 value = Nullable{ 496 IsNull: true, 497 Value: zero, 498 } 499 return 500 } 501 value = Nullable{ 502 IsNull: vec.GetNulls().Contains(uint64(i)), 503 Value: vec.Col.([]types.Time)[i], 504 } 505 return 506 507 case types.T_datetime: 508 if vec.IsScalarNull() { 509 var zero types.Datetime 510 value = Nullable{ 511 IsNull: true, 512 Value: zero, 513 } 514 return 515 } 516 value = Nullable{ 517 IsNull: vec.GetNulls().Contains(uint64(i)), 518 Value: vec.Col.([]types.Datetime)[i], 519 } 520 return 521 522 case types.T_timestamp: 523 if vec.IsScalarNull() { 524 var zero types.Timestamp 525 value = Nullable{ 526 IsNull: true, 527 Value: zero, 528 } 529 return 530 } 531 value = Nullable{ 532 IsNull: vec.GetNulls().Contains(uint64(i)), 533 Value: vec.Col.([]types.Timestamp)[i], 534 } 535 return 536 537 case types.T_decimal64: 538 if vec.IsScalarNull() { 539 var zero types.Decimal64 540 value = Nullable{ 541 IsNull: true, 542 Value: zero, 543 } 544 return 545 } 546 value = Nullable{ 547 IsNull: vec.GetNulls().Contains(uint64(i)), 548 Value: vec.Col.([]types.Decimal64)[i], 549 } 550 return 551 552 case types.T_decimal128: 553 if vec.IsScalarNull() { 554 var zero types.Decimal128 555 value = Nullable{ 556 IsNull: true, 557 Value: zero, 558 } 559 return 560 } 561 value = Nullable{ 562 IsNull: vec.GetNulls().Contains(uint64(i)), 563 Value: vec.Col.([]types.Decimal128)[i], 564 } 565 return 566 567 case types.T_Rowid: 568 if vec.IsScalarNull() { 569 var zero types.Rowid 570 value = Nullable{ 571 IsNull: true, 572 Value: zero, 573 } 574 return 575 } 576 value = Nullable{ 577 IsNull: vec.GetNulls().Contains(uint64(i)), 578 Value: vec.Col.([]types.Rowid)[i], 579 } 580 return 581 582 case types.T_uuid: 583 if vec.IsScalarNull() { 584 var zero types.Uuid 585 value = Nullable{ 586 IsNull: true, 587 Value: zero, 588 } 589 return 590 } 591 value = Nullable{ 592 IsNull: vec.GetNulls().Contains(uint64(i)), 593 Value: vec.Col.([]types.Uuid)[i], 594 } 595 return 596 597 } 598 599 panic(fmt.Sprintf("unknown column type: %v", vec.Typ)) 600 } 601 602 func appendNullableValueToVector(vec *vector.Vector, value Nullable, mp *mpool.MPool) { 603 str, ok := value.Value.(string) 604 if ok { 605 value.Value = []byte(str) 606 } 607 vec.Append(value.Value, false, mp) 608 if value.IsNull { 609 vec.GetNulls().Set(uint64(vec.Length() - 1)) 610 } 611 }