github.com/matrixorigin/matrixone@v0.7.0/pkg/container/vector/vector.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vector 16 17 import ( 18 "bytes" 19 "fmt" 20 "reflect" 21 "unsafe" 22 23 "github.com/matrixorigin/matrixone/pkg/vectorize/lengthutf8" 24 25 "github.com/matrixorigin/matrixone/pkg/common/bitmap" 26 27 "github.com/matrixorigin/matrixone/pkg/common/moerr" 28 "github.com/matrixorigin/matrixone/pkg/common/mpool" 29 "github.com/matrixorigin/matrixone/pkg/container/nulls" 30 "github.com/matrixorigin/matrixone/pkg/container/types" 31 "github.com/matrixorigin/matrixone/pkg/vectorize/shuffle" 32 ) 33 34 // XXX Moved vector from types.go to vector.go 35 // XXX Deleted vector interface, which was commented out and outdated anyway. 36 /* Vector vector 37 * origin true: 38 * count || type || bitmap size || bitmap || vector 39 * origin false: 40 * count || vector 41 */ 42 type Vector struct { 43 // XXX There was Ref and Link, from the impl, it is totally wrong stuff. 44 // Removed. 45 Typ types.Type 46 Col interface{} // column data, encoded Data 47 Nsp *nulls.Nulls // nulls list 48 49 original bool 50 // data of fixed length element, in case of varlen, the Varlena 51 data []byte 52 // area for holding large strings. 53 area []byte 54 55 // some attributes for const vector (a vector with a lot of rows of a same const value) 56 isConst bool 57 length int 58 59 // tag for distinguish '0x00..' and 0x... and 0x... is binary 60 // TODO: check whether isBin should be changed into array/bitmap 61 // now we assumpt that it can only be true in the case of only one data in vector 62 isBin bool 63 64 // idx for low cardinality scenario. 65 idx any 66 } 67 68 func (v *Vector) SetIsBin(isBin bool) { 69 v.isBin = isBin 70 } 71 72 func (v *Vector) GetIsBin() bool { 73 return v.isBin 74 } 75 76 func (v *Vector) Length() int { 77 return Length(v) 78 } 79 80 func (v *Vector) ScalarLength() int { 81 if !v.isConst { 82 panic("Getting scalar length of non const vector.") 83 } 84 return v.length 85 } 86 87 func (v *Vector) SetScalarLength(length int) { 88 if !v.isConst { 89 panic("Setting length to non const vector.") 90 } 91 v.length = length 92 } 93 94 func (v *Vector) IsOriginal() bool { 95 return v.original 96 } 97 98 func (v *Vector) SetOriginal(status bool) { 99 v.original = status 100 } 101 102 func (v *Vector) IsLowCardinality() bool { 103 return v.idx != nil 104 } 105 106 func (v *Vector) Index() any { 107 return v.idx 108 } 109 110 func (v *Vector) SetIndex(idx any) { 111 v.idx = idx 112 } 113 114 func DecodeFixedCol[T types.FixedSizeT](v *Vector, sz int) []T { 115 return types.DecodeSlice[T](v.data) 116 } 117 118 // GetFixedVector decode data and return decoded []T. 119 // For const/scalar vector we expand and return newly allocated slice. 120 func GetFixedVectorValues[T types.FixedSizeT](v *Vector) []T { 121 if v.isConst { 122 cols := MustTCols[T](v) 123 vs := make([]T, v.Length()) 124 for i := range vs { 125 vs[i] = cols[0] 126 } 127 return vs 128 } 129 return MustTCols[T](v) 130 } 131 132 func GetStrVectorValues(v *Vector) []string { 133 if v.isConst { 134 cols := MustTCols[types.Varlena](v) 135 ss := cols[0].GetString(v.area) 136 vs := make([]string, v.Length()) 137 for i := range vs { 138 vs[i] = ss 139 } 140 return vs 141 } 142 return MustStrCols(v) 143 } 144 145 func GetBytesVectorValues(v *Vector) [][]byte { 146 if v.isConst { 147 cols := MustTCols[types.Varlena](v) 148 ss := cols[0].GetByteSlice(v.area) 149 vs := make([][]byte, v.Length()) 150 for i := range vs { 151 vs[i] = ss 152 } 153 return vs 154 } 155 return MustBytesCols(v) 156 } 157 158 // XXX A huge hammer, get rid of any typing and totally depends on v.Col 159 // We should really not using this one but it is wide spread already. 160 func GetColumn[T any](v *Vector) []T { 161 return v.Col.([]T) 162 } 163 164 // XXX Compatibility: how many aliases do we need ... 165 func GetStrColumn(v *Vector) []string { 166 return GetStrVectorValues(v) 167 } 168 169 // Get Value at index 170 func GetValueAt[T types.FixedSizeT](v *Vector, idx int64) T { 171 return MustTCols[T](v)[idx] 172 } 173 174 func GetValueAtOrZero[T types.FixedSizeT](v *Vector, idx int64) T { 175 var zt T 176 ts := MustTCols[T](v) 177 if int64(len(ts)) <= idx { 178 return zt 179 } 180 return ts[idx] 181 } 182 183 // Get the pointer to idx-th fixed size entry. 184 func GetPtrAt(v *Vector, idx int64) unsafe.Pointer { 185 return unsafe.Pointer(&v.data[idx*int64(v.GetType().TypeSize())]) 186 } 187 188 // Raw version, get from v.data. Adopt python convention and 189 // neg idx means counting from end, that is, -1 means last element. 190 func (v *Vector) getRawValueAt(idx int64) []byte { 191 if v.IsScalar() && len(v.data) == 0 { 192 return v.encodeColToByteSlice() 193 } 194 195 tlen := int64(v.GetType().TypeSize()) 196 dlen := int64(len(v.data)) 197 if idx >= 0 { 198 if dlen < (idx+1)*tlen { 199 panic("vector invalid index access") 200 } 201 return v.data[idx*tlen : idx*tlen+tlen] 202 } else { 203 start := dlen + tlen*idx 204 end := start + tlen 205 if start < 0 { 206 panic("vector invalid index access") 207 } 208 return v.data[start:end] 209 } 210 } 211 212 func (v *Vector) MarshalBinary() ([]byte, error) { 213 var buf bytes.Buffer 214 215 if v.isConst { 216 i64 := int64(v.ScalarLength()) 217 buf.WriteByte(1) 218 buf.Write(types.EncodeInt64(&i64)) 219 } else { 220 buf.WriteByte(0) 221 // length, even not used, let's fill it. 222 i64 := int64(0) 223 buf.Write(types.EncodeInt64(&i64)) 224 } 225 data, err := v.Show() 226 if err != nil { 227 return nil, err 228 } 229 buf.Write(data) 230 return buf.Bytes(), nil 231 } 232 233 func (v *Vector) UnmarshalBinary(data []byte) error { 234 if data[0] == 1 { 235 v.isConst = true 236 data = data[1:] 237 v.SetScalarLength(int(types.DecodeInt64(data[:8]))) 238 data = data[8:] 239 } else { 240 data = data[1:] 241 // skip 0 242 data = data[8:] 243 } 244 return v.Read(data) 245 } 246 247 // Size of data, I think this function is inherently broken. This 248 // Size is not meaningful other than used in (approximate) memory accounting. 249 func (v *Vector) Size() int { 250 return len(v.data) + len(v.area) 251 } 252 253 func (v *Vector) GetArea() []byte { 254 return v.area 255 } 256 257 func (v *Vector) GetType() types.Type { 258 return v.Typ 259 } 260 261 func (v *Vector) GetNulls() *nulls.Nulls { 262 return v.Nsp 263 } 264 265 func (v *Vector) GetBytes(i int64) []byte { 266 bs := MustTCols[types.Varlena](v) 267 return bs[i].GetByteSlice(v.area) 268 } 269 270 func (v *Vector) GetString(i int64) string { 271 272 bs := MustTCols[types.Varlena](v) 273 return bs[i].GetString(v.area) 274 } 275 276 func (v *Vector) FillDefaultValue() { 277 if !nulls.Any(v.Nsp) || len(v.data) == 0 { 278 return 279 } 280 switch v.Typ.Oid { 281 case types.T_bool: 282 fillDefaultValue[bool](v) 283 case types.T_int8: 284 fillDefaultValue[int8](v) 285 case types.T_int16: 286 fillDefaultValue[int16](v) 287 case types.T_int32: 288 fillDefaultValue[int32](v) 289 case types.T_int64: 290 fillDefaultValue[int64](v) 291 case types.T_uint8: 292 fillDefaultValue[uint8](v) 293 case types.T_uint16: 294 fillDefaultValue[uint16](v) 295 case types.T_uint32: 296 fillDefaultValue[uint32](v) 297 case types.T_uint64: 298 fillDefaultValue[uint64](v) 299 case types.T_float32: 300 fillDefaultValue[float32](v) 301 case types.T_float64: 302 fillDefaultValue[float64](v) 303 case types.T_date: 304 fillDefaultValue[types.Date](v) 305 case types.T_datetime: 306 fillDefaultValue[types.Datetime](v) 307 case types.T_time: 308 fillDefaultValue[types.Time](v) 309 case types.T_timestamp: 310 fillDefaultValue[types.Timestamp](v) 311 case types.T_decimal64: 312 fillDefaultValue[types.Decimal64](v) 313 case types.T_decimal128: 314 fillDefaultValue[types.Decimal128](v) 315 case types.T_uuid: 316 fillDefaultValue[types.Uuid](v) 317 case types.T_TS: 318 fillDefaultValue[types.TS](v) 319 case types.T_Rowid: 320 fillDefaultValue[types.Rowid](v) 321 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 322 fillDefaultValue[types.Varlena](v) 323 default: 324 panic("unsupported type in FillDefaultValue") 325 } 326 } 327 328 func (v *Vector) ToConst(row int, mp *mpool.MPool) *Vector { 329 if v.isConst { 330 return v 331 } 332 switch v.Typ.Oid { 333 case types.T_bool: 334 return toConstVector[bool](v, row, mp) 335 case types.T_int8: 336 return toConstVector[int8](v, row, mp) 337 case types.T_int16: 338 return toConstVector[int16](v, row, mp) 339 case types.T_int32: 340 return toConstVector[int32](v, row, mp) 341 case types.T_int64: 342 return toConstVector[int64](v, row, mp) 343 case types.T_uint8: 344 return toConstVector[uint8](v, row, mp) 345 case types.T_uint16: 346 return toConstVector[uint16](v, row, mp) 347 case types.T_uint32: 348 return toConstVector[uint32](v, row, mp) 349 case types.T_uint64: 350 return toConstVector[uint64](v, row, mp) 351 case types.T_float32: 352 return toConstVector[float32](v, row, mp) 353 case types.T_float64: 354 return toConstVector[float64](v, row, mp) 355 case types.T_date: 356 return toConstVector[types.Date](v, row, mp) 357 case types.T_datetime: 358 return toConstVector[types.Datetime](v, row, mp) 359 case types.T_time: 360 return toConstVector[types.Time](v, row, mp) 361 case types.T_timestamp: 362 return toConstVector[types.Timestamp](v, row, mp) 363 case types.T_decimal64: 364 return toConstVector[types.Decimal64](v, row, mp) 365 case types.T_decimal128: 366 return toConstVector[types.Decimal128](v, row, mp) 367 case types.T_uuid: 368 return toConstVector[types.Uuid](v, row, mp) 369 case types.T_TS: 370 return toConstVector[types.TS](v, row, mp) 371 case types.T_Rowid: 372 return toConstVector[types.Rowid](v, row, mp) 373 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 374 if nulls.Contains(v.Nsp, uint64(row)) { 375 return NewConstNull(v.GetType(), 1) 376 } 377 bs := v.GetBytes(int64(row)) 378 return NewConstBytes(v.Typ, 1, bs, mp) 379 } 380 return nil 381 } 382 383 /* 384 ConstExpand 385 expandCols: 386 - true: extend the field Col of the vector that is scalar null 387 - false: same as before 388 */ 389 func (v *Vector) ConstExpand(expandCols bool, m *mpool.MPool) *Vector { 390 if !v.isConst { 391 return v 392 } 393 if !expandCols && v.IsScalarNull() { 394 vlen := uint64(v.ScalarLength()) 395 nulls.AddRange(v.Nsp, 0, vlen) 396 return v 397 } 398 399 switch v.Typ.Oid { 400 case types.T_bool: 401 expandVector[bool](v, 1, m) 402 case types.T_int8: 403 expandVector[int8](v, 1, m) 404 case types.T_int16: 405 expandVector[int16](v, 2, m) 406 case types.T_int32: 407 expandVector[int32](v, 4, m) 408 case types.T_int64: 409 expandVector[int64](v, 8, m) 410 case types.T_uint8: 411 expandVector[uint8](v, 1, m) 412 case types.T_uint16: 413 expandVector[uint16](v, 2, m) 414 case types.T_uint32: 415 expandVector[uint32](v, 4, m) 416 case types.T_uint64: 417 expandVector[uint64](v, 8, m) 418 case types.T_float32: 419 expandVector[float32](v, 4, m) 420 case types.T_float64: 421 expandVector[float64](v, 8, m) 422 case types.T_date: 423 expandVector[types.Date](v, 4, m) 424 case types.T_datetime: 425 expandVector[types.Datetime](v, 8, m) 426 case types.T_time: 427 expandVector[types.Time](v, 8, m) 428 case types.T_timestamp: 429 expandVector[types.Timestamp](v, 8, m) 430 case types.T_decimal64: 431 expandVector[types.Decimal64](v, 8, m) 432 case types.T_decimal128: 433 expandVector[types.Decimal128](v, 16, m) 434 case types.T_uuid: 435 expandVector[types.Uuid](v, 16, m) 436 case types.T_TS: 437 expandVector[types.TS](v, types.TxnTsSize, m) 438 case types.T_Rowid: 439 expandVector[types.Rowid](v, types.RowidSize, m) 440 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 441 expandVector[types.Varlena](v, types.VarlenaSize, m) 442 } 443 v.isConst = false 444 return v 445 } 446 447 func (v *Vector) TryExpandNulls(n int) { 448 if v.Nsp == nil { 449 v.Nsp = &nulls.Nulls{Np: bitmap.New(0)} 450 } 451 nulls.TryExpand(v.Nsp, n) 452 } 453 454 func fillDefaultValue[T types.FixedSizeT](v *Vector) { 455 var dv T 456 col := v.Col.([]T) 457 rows := v.Nsp.Np.ToArray() 458 for _, row := range rows { 459 col[row] = dv 460 } 461 v.Col = col 462 } 463 464 func toConstVector[T types.FixedSizeT](v *Vector, row int, m *mpool.MPool) *Vector { 465 if nulls.Contains(v.Nsp, uint64(row)) { 466 return NewConstNull(v.Typ, 1) 467 } else { 468 val := GetValueAt[T](v, int64(row)) 469 return NewConstFixed(v.Typ, 1, val, m) 470 } 471 } 472 473 // expandVector is used only in expand const vector. 474 func expandVector[T any](v *Vector, sz int, m *mpool.MPool) *Vector { 475 data, err := m.Alloc(v.ScalarLength() * sz) 476 if err != nil { 477 return nil 478 } 479 vs := types.DecodeSlice[T](data) 480 if nulls.Any(v.Nsp) { 481 for i := 0; i < v.ScalarLength(); i++ { 482 nulls.Add(v.Nsp, uint64(i)) 483 } 484 } else { 485 val := v.Col.([]T)[0] 486 for i := 0; i < v.ScalarLength(); i++ { 487 vs[i] = val 488 } 489 } 490 v.Col = vs 491 if v.data != nil { 492 m.Free(v.data) 493 } 494 v.data = data[:len(vs)*sz] 495 return v 496 } 497 498 func NewWithStrings(typ types.Type, vals []string, nsp *nulls.Nulls, m *mpool.MPool) *Vector { 499 vec := New(typ) 500 nulls.Set(vec.Nsp, nsp) 501 AppendString(vec, vals, m) 502 return vec 503 } 504 505 func NewWithBytes(typ types.Type, vals [][]byte, nsp *nulls.Nulls, m *mpool.MPool) *Vector { 506 vec := New(typ) 507 nulls.Set(vec.Nsp, nsp) 508 AppendBytes(vec, vals, m) 509 return vec 510 } 511 512 func NewWithFixed[T any](typ types.Type, vals []T, nsp *nulls.Nulls, m *mpool.MPool) *Vector { 513 vec := New(typ) 514 nulls.Set(vec.Nsp, nsp) 515 AppendFixed(vec, vals, m) 516 return vec 517 } 518 519 func New(typ types.Type) *Vector { 520 return &Vector{ 521 Nsp: &nulls.Nulls{}, 522 Typ: typ, 523 original: false, 524 } 525 } 526 527 func NewOriginal(typ types.Type) *Vector { 528 return &Vector{ 529 Nsp: &nulls.Nulls{}, 530 Typ: typ, 531 original: true, 532 } 533 } 534 535 func NewOriginalWithData(typ types.Type, data []byte, nsp *nulls.Nulls) *Vector { 536 v := &Vector{ 537 Nsp: nsp, 538 Typ: typ, 539 data: data, 540 } 541 v.SetOriginal(true) 542 v.colFromData() 543 return v 544 } 545 546 func NewWithNspSize(typ types.Type, n int64) *Vector { 547 return &Vector{ 548 Nsp: nulls.NewWithSize(int(n)), 549 Typ: typ, 550 original: false, 551 } 552 } 553 554 func NewConstNullWithData(typ types.Type, length int, mp *mpool.MPool) *Vector { 555 v := New(typ) 556 v.isConst = true 557 val := GetInitConstVal(typ) 558 v.Append(val, true, mp) 559 v.length = length 560 return v 561 } 562 563 func NewConst(typ types.Type, length int) *Vector { 564 v := New(typ) 565 v.isConst = true 566 v.initConst(typ) 567 v.length = length 568 return v 569 } 570 571 func NewConstNull(typ types.Type, length int) *Vector { 572 v := New(typ) 573 v.isConst = true 574 v.initConst(typ) 575 nulls.Add(v.Nsp, 0) 576 v.length = length 577 return v 578 } 579 580 func NewConstFixed[T types.FixedSizeT](typ types.Type, length int, val T, mp *mpool.MPool) *Vector { 581 if mp == nil { 582 panic(moerr.NewInternalErrorNoCtx("vector NewConstFixed does not have a mpool")) 583 } 584 v := NewConst(typ, length) 585 v.Append(val, false, mp) 586 return v 587 } 588 589 func NewConstString(typ types.Type, length int, val string, mp *mpool.MPool) *Vector { 590 if mp == nil { 591 panic(moerr.NewInternalErrorNoCtx("vector NewConstString does not have a mpool")) 592 } 593 v := NewConst(typ, length) 594 SetStringAt(v, 0, val, mp) 595 return v 596 } 597 598 func NewConstBytes(typ types.Type, length int, val []byte, mp *mpool.MPool) *Vector { 599 if mp == nil { 600 panic(moerr.NewInternalErrorNoCtx("vector NewConstBytes does not have a mpool")) 601 } 602 v := NewConst(typ, length) 603 SetBytesAt(v, 0, val, mp) 604 return v 605 } 606 607 func (v *Vector) initConst(typ types.Type) { 608 switch typ.Oid { 609 case types.T_bool: 610 v.Col = []bool{false} 611 case types.T_int8: 612 v.Col = []int8{0} 613 case types.T_int16: 614 v.Col = []int16{0} 615 case types.T_int32: 616 v.Col = []int32{0} 617 case types.T_int64: 618 v.Col = []int64{0} 619 case types.T_uint8: 620 v.Col = []uint8{0} 621 case types.T_uint16: 622 v.Col = []uint16{0} 623 case types.T_uint32: 624 v.Col = []uint32{0} 625 case types.T_uint64: 626 v.Col = []uint64{0} 627 case types.T_float32: 628 v.Col = []float32{0} 629 case types.T_float64: 630 v.Col = []float64{0} 631 case types.T_date: 632 v.Col = make([]types.Date, 1) 633 case types.T_datetime: 634 v.Col = make([]types.Datetime, 1) 635 case types.T_time: 636 v.Col = make([]types.Time, 1) 637 case types.T_timestamp: 638 v.Col = make([]types.Timestamp, 1) 639 case types.T_decimal64: 640 v.Col = make([]types.Decimal64, 1) 641 case types.T_decimal128: 642 v.Col = make([]types.Decimal128, 1) 643 case types.T_uuid: 644 v.Col = make([]types.Uuid, 1) 645 case types.T_TS: 646 v.Col = make([]types.TS, 1) 647 case types.T_Rowid: 648 v.Col = make([]types.Rowid, 1) 649 case types.T_char, types.T_varchar, types.T_blob, types.T_json, types.T_text: 650 v.Col = make([]types.Varlena, 1) 651 } 652 } 653 654 // IsScalar return true if the vector means a scalar value. 655 // e.g. 656 // 657 // a + 1, and 1's vector will return true 658 func (v *Vector) IsScalar() bool { 659 return v.isConst 660 } 661 func (v *Vector) IsConst() bool { 662 return v.isConst 663 } 664 665 // MakeScalar converts a vector to a scalar vec of length. 666 func (v *Vector) MakeScalar(length int) { 667 if v.isConst { 668 v.length = length 669 } else { 670 if v.Length() != 1 { 671 panic("make scalar called on a vec") 672 } 673 v.isConst = true 674 v.length = length 675 } 676 } 677 678 // IsScalarNull return true if the vector means a scalar Null. 679 // e.g. 680 // 681 // a + Null, and the vector of right part will return true 682 func (v *Vector) IsScalarNull() bool { 683 return v.isConst && v.Nsp != nil && nulls.Contains(v.Nsp, 0) 684 } 685 686 // XXX aliases ... 687 func (v *Vector) ConstVectorIsNull() bool { 688 return v.IsScalarNull() 689 } 690 691 func (v *Vector) Free(m *mpool.MPool) { 692 if v.original { 693 // XXX: Should we panic, or this is really an Noop? 694 return 695 } 696 697 // const vector's data & area allocate with nil, 698 // so we can't free it by using mpool. 699 if v.data != nil { 700 m.Free(v.data) 701 } 702 if v.area != nil { 703 m.Free(v.area) 704 } 705 v.data = nil 706 v.area = nil 707 v.colFromData() 708 } 709 710 func (v *Vector) FreeOriginal(m *mpool.MPool) { 711 if v.original { 712 m.Free(v.data) 713 v.data = nil 714 v.colFromData() 715 m.Free(v.area) 716 v.area = nil 717 return 718 } 719 panic("force original tries to free non-orignal vec") 720 } 721 722 func appendOne[T types.FixedSizeT](v *Vector, w T, isNull bool, m *mpool.MPool) error { 723 if err := v.extend(1, m); err != nil { 724 return err 725 } 726 col := MustTCols[T](v) 727 pos := len(col) - 1 728 if isNull { 729 nulls.Add(v.Nsp, uint64(pos)) 730 } else { 731 col[pos] = w 732 } 733 return nil 734 } 735 736 func appendOneBytes(v *Vector, bs []byte, isNull bool, m *mpool.MPool) error { 737 var err error 738 var va types.Varlena 739 if isNull { 740 return appendOne(v, va, true, m) 741 } else { 742 va, v.area, err = types.BuildVarlena(bs, v.area, m) 743 if err != nil { 744 return err 745 } 746 return appendOne(v, va, false, m) 747 } 748 } 749 750 func (v *Vector) Append(w any, isNull bool, m *mpool.MPool) error { 751 if m == nil { 752 panic(moerr.NewInternalErrorNoCtx("vector append does not have a mpool")) 753 } 754 switch v.Typ.Oid { 755 case types.T_bool: 756 return appendOne(v, w.(bool), isNull, m) 757 case types.T_int8: 758 return appendOne(v, w.(int8), isNull, m) 759 case types.T_int16: 760 return appendOne(v, w.(int16), isNull, m) 761 case types.T_int32: 762 return appendOne(v, w.(int32), isNull, m) 763 case types.T_int64: 764 return appendOne(v, w.(int64), isNull, m) 765 case types.T_uint8: 766 return appendOne(v, w.(uint8), isNull, m) 767 case types.T_uint16: 768 return appendOne(v, w.(uint16), isNull, m) 769 case types.T_uint32: 770 return appendOne(v, w.(uint32), isNull, m) 771 case types.T_uint64: 772 return appendOne(v, w.(uint64), isNull, m) 773 case types.T_float32: 774 return appendOne(v, w.(float32), isNull, m) 775 case types.T_float64: 776 return appendOne(v, w.(float64), isNull, m) 777 case types.T_date: 778 return appendOne(v, w.(types.Date), isNull, m) 779 case types.T_datetime: 780 return appendOne(v, w.(types.Datetime), isNull, m) 781 case types.T_time: 782 return appendOne(v, w.(types.Time), isNull, m) 783 case types.T_timestamp: 784 return appendOne(v, w.(types.Timestamp), isNull, m) 785 case types.T_decimal64: 786 return appendOne(v, w.(types.Decimal64), isNull, m) 787 case types.T_decimal128: 788 return appendOne(v, w.(types.Decimal128), isNull, m) 789 case types.T_uuid: 790 return appendOne(v, w.(types.Uuid), isNull, m) 791 case types.T_TS: 792 return appendOne(v, w.(types.TS), isNull, m) 793 case types.T_Rowid: 794 return appendOne(v, w.(types.Rowid), isNull, m) 795 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 796 return appendOneBytes(v, w.([]byte), isNull, m) 797 } 798 return nil 799 } 800 801 func Clean(v *Vector, m *mpool.MPool) { 802 v.Free(m) 803 } 804 805 func SetCol(v *Vector, col interface{}) { 806 v.Col = col 807 } 808 809 func SetTAt[T types.FixedSizeT](v *Vector, idx int, t T) error { 810 // Let it panic if v is not a varlena vec 811 vacol := MustTCols[T](v) 812 813 if idx < 0 { 814 idx = len(vacol) + idx 815 } 816 if idx < 0 || idx >= len(vacol) { 817 return moerr.NewInternalErrorNoCtx("vector idx out of range") 818 } 819 vacol[idx] = t 820 return nil 821 } 822 823 func SetBytesAt(v *Vector, idx int, bs []byte, m *mpool.MPool) error { 824 var va types.Varlena 825 var err error 826 va, v.area, err = types.BuildVarlena(bs, v.area, m) 827 if err != nil { 828 return err 829 } 830 return SetTAt(v, idx, va) 831 } 832 833 func SetStringAt(v *Vector, idx int, bs string, m *mpool.MPool) error { 834 return SetBytesAt(v, idx, []byte(bs), m) 835 } 836 837 // XXX: PreAlloc create a empty v, with enough fixed slots to cap entry. 838 func PreAlloc(v *Vector, rows, cap int, m *mpool.MPool) { 839 var data []byte 840 var err error 841 sz := int64(cap * v.GetType().TypeSize()) 842 if m == nil { 843 panic(moerr.NewInternalErrorNoCtx("vector alloc must use mpool")) 844 } else { 845 // XXX was alloc rows, not cap. This is wrong, at least not 846 // matching the comment. 847 data, err = m.Alloc(int(sz)) 848 } 849 850 // XXX: Was just returned and runs defer, which was Huh? Let me panic. 851 if err != nil { 852 panic(err) 853 } 854 v.data = data 855 v.setupColFromData(0, rows) 856 } 857 858 func PreAllocType(t types.Type, rows, cap int, m *mpool.MPool) *Vector { 859 vec := New(t) 860 PreAlloc(vec, rows, cap, m) 861 return vec 862 } 863 864 func Length(v *Vector) int { 865 if !v.isConst { 866 if v.Col == nil { 867 return 0 868 } 869 // XXX reflect? 870 // Hard to tell which is faster, len(v.data) / v.typ.TypeLen() 871 return reflect.ValueOf(v.Col).Len() 872 } 873 return v.ScalarLength() 874 } 875 876 func SetLength(v *Vector, n int) { 877 if v.IsScalar() { 878 // XXX old code test this one. Why? || v.Typ.Oid == types.T_any { 879 v.SetScalarLength(n) 880 return 881 } 882 SetVectorLength(v, n) 883 } 884 885 func SetVectorLength(v *Vector, n int) { 886 end := len(v.data) / v.GetType().TypeSize() 887 if n > end { 888 panic("extend instead of shink vector") 889 } 890 nulls.RemoveRange(v.Nsp, uint64(n), uint64(end)) 891 v.setupColFromData(0, n) 892 } 893 894 // XXX Original code is really confused by what is dup ... 895 func Dup(v *Vector, m *mpool.MPool) (*Vector, error) { 896 to := Vector{ 897 Typ: v.Typ, 898 Nsp: v.Nsp.Clone(), 899 } 900 901 var err error 902 903 // Copy v.data, note that this should work for Varlena type 904 // as because we will copy area next and offset len will stay 905 // valid for long varlena. 906 if len(v.data) > 0 { 907 if to.data, err = m.Alloc(int(len(v.data))); err != nil { 908 return nil, err 909 } 910 copy(to.data, v.data) 911 } 912 if len(v.area) > 0 { 913 if to.area, err = m.Alloc(int(len(v.area))); err != nil { 914 return nil, err 915 } 916 copy(to.area, v.area) 917 } 918 919 nele := len(v.data) / v.GetType().TypeSize() 920 to.setupColFromData(0, nele) 921 return &to, nil 922 } 923 924 // Window just returns a window out of input and no deep copy. 925 func Window(v *Vector, start, end int, w *Vector) *Vector { 926 w.Typ = v.Typ 927 w.Nsp = nulls.Range(v.Nsp, uint64(start), uint64(end), uint64(start), w.Nsp) 928 w.data = v.data 929 w.area = v.area 930 w.setupColFromData(start, end) 931 return w 932 } 933 934 func AppendFixed[T any](v *Vector, arg []T, m *mpool.MPool) error { 935 var err error 936 narg := len(arg) 937 if narg == 0 { 938 return nil 939 } 940 941 if m == nil { 942 panic(moerr.NewInternalErrorNoCtx("vector AppendFixed does not have a valid mpool")) 943 } 944 945 oldSz := len(v.data) 946 argSz := narg * v.GetType().TypeSize() 947 nsz := oldSz + argSz 948 v.data, err = m.Grow(v.data, nsz) 949 if err != nil { 950 return err 951 } 952 copy(v.data[oldSz:nsz], types.EncodeSlice(arg)) 953 v.colFromData() 954 return nil 955 } 956 957 func AppendFixedRaw(v *Vector, data []byte, m *mpool.MPool) error { 958 var err error 959 if m == nil { 960 panic(moerr.NewInternalErrorNoCtx("vector AppendFixed does not have a valid mpool")) 961 } 962 963 argSz := len(data) 964 if argSz == 0 { 965 return nil 966 } 967 968 oldSz := len(v.data) 969 nsz := oldSz + argSz 970 v.data, err = m.Grow(v.data, nsz) 971 if err != nil { 972 return err 973 } 974 copy(v.data[oldSz:nsz], data) 975 v.colFromData() 976 return nil 977 } 978 979 func AppendBytes(v *Vector, arg [][]byte, m *mpool.MPool) error { 980 var err error 981 if m == nil { 982 panic(moerr.NewInternalErrorNoCtx("vector AppendBytes does not have a pool")) 983 } 984 vas := make([]types.Varlena, len(arg)) 985 for idx, bs := range arg { 986 vas[idx], v.area, err = types.BuildVarlena(bs, v.area, m) 987 if err != nil { 988 return err 989 } 990 } 991 return AppendFixed(v, vas, m) 992 } 993 994 func AppendString(v *Vector, arg []string, m *mpool.MPool) error { 995 var err error 996 if m == nil { 997 panic(moerr.NewInternalErrorNoCtx("vector AppendBytes does not have a pool")) 998 } 999 vas := make([]types.Varlena, len(arg)) 1000 for idx, bs := range arg { 1001 vas[idx], v.area, err = types.BuildVarlena([]byte(bs), v.area, m) 1002 if err != nil { 1003 return err 1004 } 1005 } 1006 return AppendFixed(v, vas, m) 1007 } 1008 1009 func AppendTuple(v *Vector, arg [][]interface{}) error { 1010 if v.GetType().IsTuple() { 1011 return moerr.NewInternalErrorNoCtx("append tuple to non tuple vector") 1012 } 1013 v.Col = append(v.Col.([][]interface{}), arg...) 1014 return nil 1015 } 1016 1017 func ShrinkFixed[T types.FixedSizeT](v *Vector, sels []int64) { 1018 vs := MustTCols[T](v) 1019 for i, sel := range sels { 1020 vs[i] = vs[sel] 1021 } 1022 v.Col = vs[:len(sels)] 1023 if len(sels) == 0 { 1024 v.data = v.data[:0] 1025 } else { 1026 v.data = v.encodeColToByteSlice() 1027 } 1028 v.Nsp = nulls.Filter(v.Nsp, sels) 1029 } 1030 func Shrink(v *Vector, sels []int64) { 1031 if v.IsScalar() { 1032 v.SetScalarLength(len(sels)) 1033 return 1034 } 1035 1036 switch v.Typ.Oid { 1037 case types.T_bool: 1038 ShrinkFixed[bool](v, sels) 1039 case types.T_int8: 1040 ShrinkFixed[int8](v, sels) 1041 case types.T_int16: 1042 ShrinkFixed[int16](v, sels) 1043 case types.T_int32: 1044 ShrinkFixed[int32](v, sels) 1045 case types.T_int64: 1046 ShrinkFixed[int64](v, sels) 1047 case types.T_uint8: 1048 ShrinkFixed[uint8](v, sels) 1049 case types.T_uint16: 1050 ShrinkFixed[uint16](v, sels) 1051 case types.T_uint32: 1052 ShrinkFixed[uint32](v, sels) 1053 case types.T_uint64: 1054 ShrinkFixed[uint64](v, sels) 1055 case types.T_float32: 1056 ShrinkFixed[float32](v, sels) 1057 case types.T_float64: 1058 ShrinkFixed[float64](v, sels) 1059 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 1060 // XXX shrink varlena, but did not shrink area. For our vector, this 1061 // may well be the right thing. If want to shrink area as well, we 1062 // have to copy each varlena value and swizzle pointer. 1063 ShrinkFixed[types.Varlena](v, sels) 1064 case types.T_date: 1065 ShrinkFixed[types.Date](v, sels) 1066 case types.T_datetime: 1067 ShrinkFixed[types.Datetime](v, sels) 1068 case types.T_time: 1069 ShrinkFixed[types.Time](v, sels) 1070 case types.T_timestamp: 1071 ShrinkFixed[types.Timestamp](v, sels) 1072 case types.T_decimal64: 1073 ShrinkFixed[types.Decimal64](v, sels) 1074 case types.T_decimal128: 1075 ShrinkFixed[types.Decimal128](v, sels) 1076 case types.T_uuid: 1077 ShrinkFixed[types.Uuid](v, sels) 1078 case types.T_TS: 1079 ShrinkFixed[types.TS](v, sels) 1080 case types.T_Rowid: 1081 ShrinkFixed[types.Rowid](v, sels) 1082 case types.T_tuple: 1083 vs := v.Col.([][]interface{}) 1084 for i, sel := range sels { 1085 vs[i] = vs[sel] 1086 } 1087 v.Col = vs[:len(sels)] 1088 v.Nsp = nulls.Filter(v.Nsp, sels) 1089 default: 1090 panic("vector shrink unknown type") 1091 } 1092 } 1093 1094 // Shuffle assumes we do not have dup in sels. 1095 func ShuffleFixed[T types.FixedSizeT](v *Vector, sels []int64, m *mpool.MPool) error { 1096 olddata := v.data 1097 ns := len(sels) 1098 vs := MustTCols[T](v) 1099 data, err := m.Alloc(int(ns * v.GetType().TypeSize())) 1100 if err != nil { 1101 return err 1102 } 1103 ws := types.DecodeSlice[T](data) 1104 v.Col = shuffle.FixedLengthShuffle(vs, ws, sels) 1105 v.data = types.EncodeSliceWithCap(ws) 1106 v.Nsp = nulls.Filter(v.Nsp, sels) 1107 1108 m.Free(olddata) 1109 return nil 1110 } 1111 1112 func Shuffle(v *Vector, sels []int64, m *mpool.MPool) error { 1113 if v.IsScalar() { 1114 v.SetScalarLength(len(sels)) 1115 return nil 1116 } 1117 switch v.Typ.Oid { 1118 case types.T_bool: 1119 ShuffleFixed[bool](v, sels, m) 1120 case types.T_int8: 1121 ShuffleFixed[int8](v, sels, m) 1122 case types.T_int16: 1123 ShuffleFixed[int16](v, sels, m) 1124 case types.T_int32: 1125 ShuffleFixed[int32](v, sels, m) 1126 case types.T_int64: 1127 ShuffleFixed[int64](v, sels, m) 1128 case types.T_uint8: 1129 ShuffleFixed[uint8](v, sels, m) 1130 case types.T_uint16: 1131 ShuffleFixed[uint16](v, sels, m) 1132 case types.T_uint32: 1133 ShuffleFixed[uint32](v, sels, m) 1134 case types.T_uint64: 1135 ShuffleFixed[uint64](v, sels, m) 1136 case types.T_float32: 1137 ShuffleFixed[float32](v, sels, m) 1138 case types.T_float64: 1139 ShuffleFixed[float64](v, sels, m) 1140 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 1141 ShuffleFixed[types.Varlena](v, sels, m) 1142 case types.T_date: 1143 ShuffleFixed[types.Date](v, sels, m) 1144 case types.T_datetime: 1145 ShuffleFixed[types.Datetime](v, sels, m) 1146 case types.T_time: 1147 ShuffleFixed[types.Time](v, sels, m) 1148 case types.T_timestamp: 1149 ShuffleFixed[types.Timestamp](v, sels, m) 1150 case types.T_decimal64: 1151 ShuffleFixed[types.Decimal64](v, sels, m) 1152 case types.T_decimal128: 1153 ShuffleFixed[types.Decimal128](v, sels, m) 1154 case types.T_uuid: 1155 ShuffleFixed[types.Uuid](v, sels, m) 1156 case types.T_TS: 1157 ShuffleFixed[types.TS](v, sels, m) 1158 case types.T_Rowid: 1159 ShuffleFixed[types.Rowid](v, sels, m) 1160 case types.T_tuple: 1161 vs := v.Col.([][]interface{}) 1162 ws := make([][]interface{}, len(vs)) 1163 v.Col = shuffle.TupleShuffle(vs, ws, sels) 1164 v.Nsp = nulls.Filter(v.Nsp, sels) 1165 default: 1166 panic(fmt.Sprintf("unexpect type %s for function vector.Shuffle", v.Typ)) 1167 } 1168 return nil 1169 } 1170 1171 func (v *Vector) Show() ([]byte, error) { 1172 // Write Typ 1173 var buf bytes.Buffer 1174 vtbs := types.EncodeType(&v.Typ) 1175 buf.Write(vtbs) 1176 1177 // Write nspLen, nsp 1178 nb, err := v.Nsp.Show() 1179 if err != nil { 1180 return nil, err 1181 } 1182 1183 lenNb := uint32(len(nb)) 1184 buf.Write(types.EncodeUint32(&lenNb)) 1185 if len(nb) > 0 { 1186 buf.Write(nb) 1187 } 1188 1189 // Write colLen, col 1190 bs := v.encodeColToByteSlice() 1191 lenBs := uint32(len(bs)) 1192 buf.Write(types.EncodeUint32(&lenBs)) 1193 if len(bs) > 0 { 1194 buf.Write(bs) 1195 } 1196 1197 // Write areaLen, area 1198 if len(v.area) == 0 { 1199 z := uint32(0) 1200 buf.Write(types.EncodeUint32(&z)) 1201 } else { 1202 lenA := uint32(len(v.area)) 1203 buf.Write(types.EncodeUint32(&lenA)) 1204 buf.Write(v.area) 1205 } 1206 return buf.Bytes(), nil 1207 } 1208 1209 func (v *Vector) Read(data []byte) error { 1210 typ := types.DecodeType(data[:types.TSize]) 1211 data = data[types.TSize:] 1212 v.Typ = typ 1213 v.original = true 1214 1215 // Read nspLen, nsp 1216 v.Nsp = &nulls.Nulls{} 1217 size := types.DecodeUint32(data) 1218 data = data[4:] 1219 if size > 0 { 1220 if err := v.Nsp.Read(data[:size]); err != nil { 1221 return err 1222 } 1223 data = data[size:] 1224 } 1225 1226 // Read colLen, col, 1227 size = types.DecodeUint32(data) 1228 data = data[4:] 1229 if size > 0 { 1230 if v.GetType().IsTuple() { 1231 col := v.Col.([][]interface{}) 1232 if err := types.Decode(data[:size], &col); err != nil { 1233 return err 1234 } 1235 v.Col = col 1236 } else { 1237 v.data = data[:size] 1238 v.setupColFromData(0, int(size/uint32(v.GetType().TypeSize()))) 1239 } 1240 data = data[size:] 1241 } else { 1242 // This will give Col correct type. 1243 v.colFromData() 1244 } 1245 1246 // Read areaLen and area 1247 size = types.DecodeUint32(data) 1248 if size != 0 { 1249 data = data[4:] 1250 v.area = data[:size] 1251 } 1252 return nil 1253 } 1254 1255 // XXX Old Copy is FUBAR. 1256 // Copy simply does v[vi] = w[wi] 1257 func Copy(v, w *Vector, vi, wi int64, m *mpool.MPool) error { 1258 if v.GetType().IsTuple() { 1259 // Not sure if Copy ever handle tuple 1260 panic("copy tuple vector.") 1261 } else if v.GetType().IsFixedLen() { 1262 tlen := int64(v.GetType().TypeSize()) 1263 copy(v.data[vi*tlen:(vi+1)*tlen], w.data[wi*tlen:(wi+1)*tlen]) 1264 } else { 1265 var err error 1266 vva := MustTCols[types.Varlena](v) 1267 wva := MustTCols[types.Varlena](w) 1268 if wva[wi].IsSmall() { 1269 vva[vi] = wva[wi] 1270 } else { 1271 bs := wva[wi].GetByteSlice(w.area) 1272 vva[vi], v.area, err = types.BuildVarlena(bs, v.area, m) 1273 if err != nil { 1274 return err 1275 } 1276 } 1277 } 1278 return nil 1279 } 1280 1281 // XXX Old UnionOne is FUBAR 1282 // It is simply append. We do not go through appendOne interface because 1283 // we don't want to horrible type switch. 1284 func UnionOne(v, w *Vector, sel int64, m *mpool.MPool) (err error) { 1285 if v.original { 1286 return moerr.NewInternalErrorNoCtx("UnionOne cannot be performed on orig vector") 1287 } 1288 1289 if err = v.extend(1, m); err != nil { 1290 return err 1291 } 1292 1293 if v.GetType().IsTuple() { 1294 vs := v.Col.([][]interface{}) 1295 ws := w.Col.([][]interface{}) 1296 if w.IsScalar() { 1297 sel = 0 1298 } 1299 v.Col = append(vs, ws[sel]) 1300 return nil 1301 } 1302 1303 if w.IsScalarNull() || nulls.Contains(w.Nsp, uint64(sel)) { 1304 pos := uint64(v.Length() - 1) 1305 nulls.Add(v.Nsp, pos) 1306 } else { 1307 if w.IsScalar() { 1308 sel = 0 1309 } 1310 if v.GetType().IsVarlen() { 1311 bs := w.GetBytes(sel) 1312 if v.GetType().Width != 0 && lengthutf8.CountUTF8CodePoints([]byte(bs)) > uint64(v.GetType().Width) { 1313 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), v.GetType().Width) 1314 } 1315 if v.GetType().Width == 0 && (v.GetType().Oid == types.T_varchar || v.GetType().Oid == types.T_char) { 1316 if len(bs) > 0 { 1317 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), 0) 1318 } 1319 } 1320 tgt := MustTCols[types.Varlena](v) 1321 nele := len(tgt) 1322 tgt[nele-1], v.area, err = types.BuildVarlena(bs, v.area, m) 1323 if err != nil { 1324 return err 1325 } 1326 } else { 1327 src := w.getRawValueAt(sel) 1328 tgt := v.getRawValueAt(-1) 1329 copy(tgt, src) 1330 } 1331 } 1332 return nil 1333 } 1334 1335 func UnionMulti(v, w *Vector, sel int64, cnt int, m *mpool.MPool) (err error) { 1336 if v.original { 1337 return moerr.NewInternalErrorNoCtx("UnionMulti cannot be performed on orig vector") 1338 } 1339 1340 curIdx := v.Length() 1341 oldLen := uint64(curIdx) 1342 1343 if err = v.extend(cnt, m); err != nil { 1344 return err 1345 } 1346 1347 if w.IsScalar() { 1348 sel = 0 1349 } 1350 1351 if v.GetType().IsTuple() { 1352 vs := v.Col.([][]interface{}) 1353 ws := w.Col.([][]interface{}) 1354 for i := 0; i < cnt; i++ { 1355 vs = append(vs, ws[sel]) 1356 } 1357 v.Col = vs 1358 } else { 1359 if v.GetType().IsVarlen() { 1360 tgt := MustTCols[types.Varlena](v) 1361 bs := w.GetBytes(sel) 1362 if v.GetType().Width != 0 && len(bs) > int(v.GetType().Width) { 1363 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), v.GetType().Width) 1364 } 1365 if v.GetType().Width == 0 && (v.GetType().Oid == types.T_varchar || v.GetType().Oid == types.T_char) { 1366 if len(bs) > 0 { 1367 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), 0) 1368 } 1369 } 1370 for i := 0; i < cnt; i++ { 1371 tgt[curIdx], v.area, err = types.BuildVarlena(bs, v.area, m) 1372 curIdx += 1 1373 } 1374 } else { 1375 src := w.getRawValueAt(sel) 1376 for i := 0; i < cnt; i++ { 1377 tgt := v.getRawValueAt(int64(curIdx)) 1378 copy(tgt, src) 1379 curIdx += 1 1380 } 1381 } 1382 } 1383 1384 if nulls.Contains(w.Nsp, uint64(sel)) { 1385 nulls.AddRange(v.Nsp, oldLen, oldLen+uint64(cnt)) 1386 } 1387 1388 return 1389 } 1390 1391 // XXX Old UnionNull is FUBAR 1392 // func UnionNull(v, _ *Vector, m *mpool.MPool) error 1393 // It seems to do UnionOne(v, v, 0, m), only that if v is empty, 1394 // append a zero value instead of v[0]. I don't know why this 1395 // is called UnionNull -- it does not have much to do with Null. 1396 // 1397 // XXX Original code alloc or grow typesize * 8 bytes. It is not 1398 // clear people want to amortize alloc/grow, or it is a bug. 1399 func UnionNull(v, _ *Vector, m *mpool.MPool) error { 1400 if v.original { 1401 return moerr.NewInternalErrorNoCtx("UnionNull cannot be performed on orig vector") 1402 } 1403 1404 if v.Typ.IsTuple() { 1405 panic(moerr.NewInternalErrorNoCtx("unionnull of tuple vector")) 1406 } 1407 1408 if err := v.extend(1, m); err != nil { 1409 return err 1410 } 1411 1412 // XXX old code actually copies, but it is a null, so what 1413 // is that good for. 1414 // 1415 // We don't care if v.GetType() is fixed len or not. Since 1416 // v.area stays valid, a simple slice copy of Varlena works. 1417 // src := v.getRawValueAtOrZero(0) 1418 // tgt := v.getRawValueAt(-1) 1419 // copy(tgt, src) 1420 1421 pos := uint64(v.Length() - 1) 1422 nulls.Add(v.Nsp, pos) 1423 return nil 1424 } 1425 1426 // XXX Old Union is FUBAR 1427 // Union is just append. 1428 func Union(v, w *Vector, sels []int64, hasNull bool, m *mpool.MPool) (err error) { 1429 if v.original { 1430 return moerr.NewInternalErrorNoCtx("Union cannot be performed on orig vector") 1431 } 1432 1433 oldLen := v.Length() 1434 1435 if err = v.extend(len(sels), m); err != nil { 1436 return err 1437 } 1438 1439 if v.GetType().IsTuple() { 1440 panic("union called on tuple vector") 1441 } else if v.GetType().IsVarlen() { 1442 tgt := MustTCols[types.Varlena](v) 1443 next := len(tgt) - len(sels) 1444 for idx, sel := range sels { 1445 bs := w.GetBytes(sel) 1446 if v.GetType().Width != 0 && len(bs) > int(v.GetType().Width) { 1447 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), v.GetType().Width) 1448 } 1449 1450 if v.GetType().Width == 0 && (v.GetType().Oid == types.T_varchar || v.GetType().Oid == types.T_char) { 1451 if len(bs) > 0 { 1452 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), 0) 1453 } 1454 } 1455 tgt[next+idx], v.area, err = types.BuildVarlena(bs, v.area, m) 1456 if err != nil { 1457 return err 1458 } 1459 } 1460 } else { 1461 next := -int64(len(sels)) 1462 for idx, sel := range sels { 1463 src := w.getRawValueAt(sel) 1464 tgt := v.getRawValueAt(next + int64(idx)) 1465 copy(tgt, src) 1466 } 1467 } 1468 1469 if hasNull && w.Nsp != nil { 1470 for i := range sels { 1471 if nulls.Contains(w.Nsp, uint64(sels[i])) { 1472 nulls.Add(v.Nsp, uint64(oldLen+i)) 1473 } 1474 } 1475 } 1476 1477 return 1478 } 1479 1480 // XXX Old UnionBatch is FUBAR. 1481 func UnionBatch(v, w *Vector, offset int64, cnt int, flags []uint8, m *mpool.MPool) (err error) { 1482 if v.original { 1483 return moerr.NewInternalErrorNoCtx("UnionBatch cannot be performed on orig vector") 1484 } 1485 1486 curIdx := v.Length() 1487 oldLen := uint64(curIdx) 1488 1489 if err = v.extend(cnt, m); err != nil { 1490 return err 1491 } 1492 1493 if v.GetType().IsTuple() { 1494 vs := v.Col.([][]interface{}) 1495 ws := w.Col.([][]interface{}) 1496 for i, flag := range flags { 1497 if flag > 0 { 1498 vs = append(vs, ws[int(offset)+i]) 1499 } 1500 } 1501 v.Col = vs 1502 } else if v.GetType().IsVarlen() { 1503 tgt := MustTCols[types.Varlena](v) 1504 for idx, flg := range flags { 1505 if flg > 0 { 1506 bs := w.GetBytes(offset + int64(idx)) 1507 if v.GetType().Width != 0 && lengthutf8.CountUTF8CodePoints([]byte(bs)) > uint64(v.GetType().Width) { 1508 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), v.GetType().Width) 1509 } 1510 1511 if v.GetType().Width == 0 && (v.GetType().Oid == types.T_varchar || v.GetType().Oid == types.T_char) { 1512 if len(bs) > 0 { 1513 return moerr.NewOutOfRangeNoCtx("varchar/char ", "%v oversize of %v ", string(bs), 0) 1514 } 1515 } 1516 tgt[curIdx], v.area, err = types.BuildVarlena(bs, v.area, m) 1517 curIdx += 1 1518 } 1519 } 1520 } else { 1521 for idx, flg := range flags { 1522 if flg > 0 { 1523 src := w.getRawValueAt(offset + int64(idx)) 1524 tgt := v.getRawValueAt(int64(curIdx)) 1525 copy(tgt, src) 1526 curIdx += 1 1527 } 1528 } 1529 } 1530 1531 if w.Nsp != nil { 1532 for idx, flg := range flags { 1533 if flg > 0 { 1534 if nulls.Contains(w.Nsp, uint64(offset)+uint64(idx)) { 1535 nulls.Add(v.Nsp, oldLen) 1536 } 1537 // Advance oldLen regardless if it is null 1538 oldLen += 1 1539 } 1540 } 1541 } 1542 1543 return 1544 } 1545 1546 // XXX Old Reset is FUBAR FUBAR. I will put the code here just for fun. 1547 func Reset(v *Vector) { 1548 /* 1549 switch v.Typ.Oid { 1550 case types.T_char, types.T_varchar, types.T_json, types.T_blob: 1551 v.Col.(*types.Bytes).Reset() 1552 default: 1553 // WTF is going on? 1554 *(*int)(unsafe.Pointer(uintptr((*(*emptyIntervade)(unsafe.Pointer(&v.Col))).word) + uintptr(strconv.IntSize>>3))) = 0 1555 } 1556 */ 1557 1558 // XXX Reset does not do mem accounting? 1559 // I have no idea what is the purpose of Reset, so let me just Free it. 1560 // Maybe Reset want to keep v.data and v.area to save an allocation. 1561 // Let me do that ... 1562 v.setupColFromData(0, 0) 1563 v.area = v.area[:0] 1564 // XXX What about Nsp? Original code does not do anything to Nsp, which seems OK assuming 1565 // that will be set when we add data and we only test null within range of len(v.Col) 1566 // but who knows ... 1567 } 1568 1569 // XXX What are these stuff, who use it? 1570 func VecToString[T types.FixedSizeT](v *Vector) string { 1571 col := MustTCols[T](v) 1572 if len(col) == 1 { 1573 if nulls.Contains(v.Nsp, 0) { 1574 return "null" 1575 } else { 1576 return fmt.Sprintf("%v", col[0]) 1577 } 1578 } 1579 // XXX Really? What is this ... 1580 return fmt.Sprintf("%v-%s", v.Col, v.Nsp) 1581 } 1582 1583 func (v *Vector) String() string { 1584 switch v.Typ.Oid { 1585 case types.T_bool: 1586 return VecToString[bool](v) 1587 case types.T_int8: 1588 return VecToString[int8](v) 1589 case types.T_int16: 1590 return VecToString[int16](v) 1591 case types.T_int32: 1592 return VecToString[int32](v) 1593 case types.T_int64: 1594 return VecToString[int64](v) 1595 case types.T_uint8: 1596 return VecToString[uint8](v) 1597 case types.T_uint16: 1598 return VecToString[uint16](v) 1599 case types.T_uint32: 1600 return VecToString[uint32](v) 1601 case types.T_uint64: 1602 return VecToString[uint64](v) 1603 case types.T_float32: 1604 return VecToString[float32](v) 1605 case types.T_float64: 1606 return VecToString[float64](v) 1607 case types.T_date: 1608 return VecToString[types.Date](v) 1609 case types.T_datetime: 1610 return VecToString[types.Datetime](v) 1611 case types.T_time: 1612 return VecToString[types.Time](v) 1613 case types.T_timestamp: 1614 return VecToString[types.Timestamp](v) 1615 case types.T_decimal64: 1616 return VecToString[types.Decimal64](v) 1617 case types.T_decimal128: 1618 return VecToString[types.Decimal128](v) 1619 case types.T_uuid: 1620 return VecToString[types.Uuid](v) 1621 case types.T_TS: 1622 return VecToString[types.TS](v) 1623 case types.T_Rowid: 1624 return VecToString[types.Rowid](v) 1625 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 1626 col := MustStrCols(v) 1627 if len(col) == 1 { 1628 if nulls.Contains(v.Nsp, 0) { 1629 return "null" 1630 } else { 1631 return col[0] 1632 } 1633 } 1634 return fmt.Sprintf("%v-%s", v.Col, v.Nsp) 1635 1636 default: 1637 panic("vec to string unknown types.") 1638 } 1639 } 1640 1641 func GetInitConstVal(typ types.Type) any { 1642 switch typ.Oid { 1643 case types.T_bool: 1644 return false 1645 case types.T_int8: 1646 return int8(0) 1647 case types.T_int16: 1648 return int16(0) 1649 case types.T_int32: 1650 return int32(0) 1651 case types.T_int64: 1652 return int64(0) 1653 case types.T_uint8: 1654 return uint8(0) 1655 case types.T_uint16: 1656 return uint16(0) 1657 case types.T_uint32: 1658 return uint32(0) 1659 case types.T_uint64: 1660 return uint64(0) 1661 case types.T_float32: 1662 return float32(0) 1663 case types.T_float64: 1664 return float64(0) 1665 case types.T_date: 1666 return types.Date(0) 1667 case types.T_time: 1668 return types.Time(0) 1669 case types.T_datetime: 1670 return types.Datetime(0) 1671 case types.T_timestamp: 1672 return types.Timestamp(0) 1673 case types.T_decimal64: 1674 return types.Decimal64{} 1675 case types.T_decimal128: 1676 return types.Decimal128{} 1677 case types.T_uuid: 1678 var emptyUuid [16]byte 1679 return emptyUuid[:] 1680 case types.T_TS: 1681 var emptyTs [types.TxnTsSize]byte 1682 return emptyTs[:] 1683 case types.T_Rowid: 1684 var emptyRowid [types.RowidSize]byte 1685 return emptyRowid[:] 1686 case types.T_char, types.T_varchar, types.T_blob, types.T_json, types.T_text: 1687 var emptyVarlena [types.VarlenaSize]byte 1688 return emptyVarlena[:] 1689 default: 1690 //T_any T_star T_tuple T_interval 1691 return int64(0) 1692 } 1693 } 1694 1695 func CopyConst(toVec, fromVec *Vector, length int, m *mpool.MPool) error { 1696 typ := fromVec.Typ 1697 var item any 1698 switch typ.Oid { 1699 case types.T_bool: 1700 item = MustTCols[bool](fromVec)[0] 1701 case types.T_int8: 1702 item = MustTCols[int8](fromVec)[0] 1703 case types.T_int16: 1704 item = MustTCols[int16](fromVec)[0] 1705 case types.T_int32: 1706 item = MustTCols[int32](fromVec)[0] 1707 case types.T_int64: 1708 item = MustTCols[int64](fromVec)[0] 1709 case types.T_uint8: 1710 item = MustTCols[uint8](fromVec)[0] 1711 case types.T_uint16: 1712 item = MustTCols[uint16](fromVec)[0] 1713 case types.T_uint32: 1714 item = MustTCols[uint32](fromVec)[0] 1715 case types.T_uint64: 1716 item = MustTCols[uint64](fromVec)[0] 1717 case types.T_float32: 1718 item = MustTCols[float32](fromVec)[0] 1719 case types.T_float64: 1720 item = MustTCols[float64](fromVec)[0] 1721 case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text: 1722 item = MustBytesCols(fromVec)[0] 1723 case types.T_date: 1724 item = MustTCols[types.Date](fromVec)[0] 1725 case types.T_datetime: 1726 item = MustTCols[types.Datetime](fromVec)[0] 1727 case types.T_time: 1728 item = MustTCols[types.Time](fromVec)[0] 1729 case types.T_timestamp: 1730 item = MustTCols[types.Timestamp](fromVec)[0] 1731 case types.T_decimal64: 1732 item = MustTCols[types.Decimal64](fromVec)[0] 1733 case types.T_decimal128: 1734 item = MustTCols[types.Decimal128](fromVec)[0] 1735 case types.T_uuid: 1736 item = MustTCols[types.Uuid](fromVec)[0] 1737 default: 1738 return moerr.NewInternalErrorNoCtx(fmt.Sprintf("vec %v can not copy", fromVec)) 1739 } 1740 1741 for i := 0; i < length; i++ { 1742 toVec.Append(item, false, m) 1743 } 1744 1745 return nil 1746 }