github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/containers/batch.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package containers 16 17 import ( 18 "bytes" 19 "fmt" 20 "io" 21 "unsafe" 22 23 "github.com/RoaringBitmap/roaring" 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/common/mpool" 26 "github.com/matrixorigin/matrixone/pkg/container/nulls" 27 "github.com/matrixorigin/matrixone/pkg/container/types" 28 "github.com/matrixorigin/matrixone/pkg/container/vector" 29 "github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common" 30 ) 31 32 var EMPTY_VECTOR Vector 33 34 func init() { 35 EMPTY_VECTOR = &emptyVector{ 36 Vector: MakeVector(types.T_int8.ToType(), common.DefaultAllocator), 37 } 38 } 39 40 type emptyVector struct { 41 Vector 42 } 43 44 // do not close 45 func (v *emptyVector) Close() {} 46 47 func (v *emptyVector) Append(x any, isNull bool) { 48 panic("not implemented") // TODO: Implement 49 } 50 51 func (v *emptyVector) Compact(_ *roaring.Bitmap) { 52 panic("not implemented") // TODO: Implement 53 } 54 55 func (v *emptyVector) Extend(o Vector) { 56 panic("not implemented") // TODO: Implement 57 } 58 59 func (v *emptyVector) ExtendWithOffset(src Vector, srcOff int, srcLen int) { 60 panic("not implemented") // TODO: Implement 61 } 62 63 func NewBatch() *Batch { 64 return &Batch{ 65 Attrs: make([]string, 0), 66 Nameidx: make(map[string]int), 67 Vecs: make([]Vector, 0), 68 } 69 } 70 71 func NewBatchWithCapacity(cap int) *Batch { 72 return &Batch{ 73 Attrs: make([]string, 0, cap), 74 Nameidx: make(map[string]int, cap), 75 Vecs: make([]Vector, 0, cap), 76 } 77 } 78 79 func (bat *Batch) AddVector(attr string, vec Vector) { 80 if _, exist := bat.Nameidx[attr]; exist { 81 panic(moerr.NewInternalErrorNoCtx("duplicate vector %s", attr)) 82 } 83 idx := len(bat.Vecs) 84 bat.Nameidx[attr] = idx 85 bat.Attrs = append(bat.Attrs, attr) 86 bat.Vecs = append(bat.Vecs, vec) 87 } 88 89 // AddPlaceholder is used to consctruct batch sent to CN. 90 // The vectors in the batch are sorted by seqnum, if the seqnum was dropped, a 91 // zero value will be fill as placeholder. This is space-time tradeoff. 92 func (bat *Batch) AppendPlaceholder() { 93 bat.Attrs = append(bat.Attrs, "") 94 bat.Vecs = append(bat.Vecs, EMPTY_VECTOR) 95 } 96 97 func (bat *Batch) GetVectorByName(name string) Vector { 98 pos, ok := bat.Nameidx[name] 99 if !ok { 100 panic(fmt.Sprintf("vector %s not found", name)) 101 } 102 return bat.Vecs[pos] 103 } 104 105 func (bat *Batch) RangeDelete(start, end int) { 106 if bat.Deletes == nil { 107 bat.Deletes = nulls.NewWithSize(end) 108 } 109 bat.Deletes.AddRange(uint64(start), uint64(end)) 110 } 111 112 func (bat *Batch) Delete(i int) { 113 if bat.Deletes == nil { 114 bat.Deletes = nulls.NewWithSize(i) 115 } 116 bat.Deletes.Add(uint64(i)) 117 } 118 119 func (bat *Batch) HasDelete() bool { 120 return !bat.Deletes.IsEmpty() 121 } 122 123 func (bat *Batch) IsDeleted(i int) bool { 124 return bat.Deletes.Contains(uint64(i)) 125 } 126 127 func (bat *Batch) DeleteCnt() int { 128 if !bat.HasDelete() { 129 return 0 130 } 131 return int(bat.Deletes.GetCardinality()) 132 } 133 134 func (bat *Batch) Compact() { 135 if !bat.HasDelete() { 136 return 137 } 138 for _, vec := range bat.Vecs { 139 vec.CompactByBitmap(bat.Deletes) 140 } 141 bat.Deletes = nil 142 } 143 144 func (bat *Batch) Length() int { 145 return bat.Vecs[0].Length() 146 } 147 148 func (bat *Batch) ApproxSize() int { 149 size := 0 150 for _, vec := range bat.Vecs { 151 size += vec.ApproxSize() 152 } 153 return size 154 } 155 156 func (bat *Batch) Allocated() int { 157 allocated := 0 158 for _, vec := range bat.Vecs { 159 allocated += vec.Allocated() 160 } 161 return allocated 162 } 163 164 func (bat *Batch) WindowDeletes(offset, length int, deep bool) *nulls.Bitmap { 165 if bat.Deletes.IsEmpty() || length <= 0 { 166 return nil 167 } 168 start := offset 169 end := offset + length 170 if end > bat.Length() { 171 panic(fmt.Sprintf("out of range: %d, %d", offset, length)) 172 } 173 if start == 0 && end == bat.Length() && !deep { 174 return bat.Deletes 175 } 176 ret := nulls.NewWithSize(length) 177 nulls.Range(bat.Deletes, uint64(start), uint64(end), uint64(start), ret) 178 return ret 179 } 180 181 func (bat *Batch) Window(offset, length int) *Batch { 182 win := new(Batch) 183 win.Attrs = bat.Attrs 184 win.Nameidx = bat.Nameidx 185 win.Deletes = bat.WindowDeletes(offset, length, false) 186 win.Vecs = make([]Vector, len(bat.Vecs)) 187 for i := range win.Vecs { 188 win.Vecs[i] = bat.Vecs[i].Window(offset, length) 189 } 190 return win 191 } 192 193 func (bat *Batch) CloneWindowWithPool(offset, length int, pool *VectorPool) (cloned *Batch) { 194 cloned = new(Batch) 195 cloned.Attrs = make([]string, len(bat.Attrs)) 196 copy(cloned.Attrs, bat.Attrs) 197 cloned.Nameidx = make(map[string]int, len(bat.Nameidx)) 198 for k, v := range bat.Nameidx { 199 cloned.Nameidx[k] = v 200 } 201 cloned.Deletes = bat.WindowDeletes(offset, length, true) 202 cloned.Vecs = make([]Vector, len(bat.Vecs)) 203 for i := range cloned.Vecs { 204 cloned.Vecs[i] = bat.Vecs[i].CloneWindowWithPool(offset, length, pool) 205 } 206 return 207 } 208 209 func (bat *Batch) CloneWindow(offset, length int, allocator ...*mpool.MPool) (cloned *Batch) { 210 cloned = new(Batch) 211 cloned.Attrs = make([]string, len(bat.Attrs)) 212 copy(cloned.Attrs, bat.Attrs) 213 cloned.Nameidx = make(map[string]int, len(bat.Nameidx)) 214 for k, v := range bat.Nameidx { 215 cloned.Nameidx[k] = v 216 } 217 cloned.Deletes = bat.WindowDeletes(offset, length, true) 218 cloned.Vecs = make([]Vector, len(bat.Vecs)) 219 for i := range cloned.Vecs { 220 cloned.Vecs[i] = bat.Vecs[i].CloneWindow(offset, length, allocator...) 221 } 222 return 223 } 224 225 func (bat *Batch) String() string { 226 return bat.PPString(10) 227 } 228 229 func (bat *Batch) PPString(num int) string { 230 var w bytes.Buffer 231 for i, vec := range bat.Vecs { 232 _, _ = w.WriteString(fmt.Sprintf("[Name=%s]", bat.Attrs[i])) 233 _, _ = w.WriteString(vec.PPString(num)) 234 _ = w.WriteByte('\n') 235 } 236 return w.String() 237 } 238 239 func (bat *Batch) Close() { 240 for _, vec := range bat.Vecs { 241 vec.Close() 242 } 243 } 244 245 func (bat *Batch) Reset() { 246 for i, vec := range bat.Vecs { 247 var newVec Vector 248 if bat.Pool != nil { 249 newVec = bat.Pool.GetVector(vec.GetType()) 250 } else { 251 opts := Options{ 252 Allocator: vec.GetAllocator(), 253 } 254 newVec = NewVector(*vec.GetType(), opts) 255 } 256 vec.Close() 257 bat.Vecs[i] = newVec 258 } 259 bat.Deletes = nil 260 } 261 262 func (bat *Batch) Equals(o *Batch) bool { 263 if bat.Length() != o.Length() { 264 return false 265 } 266 if bat.DeleteCnt() != o.DeleteCnt() { 267 return false 268 } 269 if !common.BitmapEqual(bat.Deletes, o.Deletes) { 270 return false 271 } 272 for i := range bat.Vecs { 273 if bat.Attrs[i] != o.Attrs[i] { 274 return false 275 } 276 if !bat.Vecs[i].Equals(o.Vecs[i]) { 277 return false 278 } 279 } 280 return true 281 } 282 283 func (bat *Batch) WriteTo(w io.Writer) (n int64, err error) { 284 var nr int 285 var tmpn int64 286 var buffer Vector 287 if bat.Pool != nil { 288 t := types.T_varchar.ToType() 289 buffer = bat.Pool.GetVector(&t) 290 } else { 291 buffer = MakeVector(types.T_varchar.ToType(), common.DefaultAllocator) 292 } 293 defer buffer.Close() 294 mp := buffer.GetAllocator() 295 bufVec := buffer.GetDownstreamVector() 296 if err = vector.AppendBytes(bufVec, types.EncodeFixed(uint16(len(bat.Vecs))), false, mp); err != nil { 297 return 298 } 299 300 // 2. Types and Names 301 for i, vec := range bat.Vecs { 302 if err = vector.AppendBytes(bufVec, []byte(bat.Attrs[i]), false, mp); err != nil { 303 return 304 } 305 vt := vec.GetType() 306 if err = vector.AppendBytes(bufVec, types.EncodeType(vt), false, mp); err != nil { 307 return 308 } 309 } 310 if tmpn, err = buffer.WriteTo(w); err != nil { 311 return 312 } 313 n += tmpn 314 315 // 3. Vectors 316 for _, vec := range bat.Vecs { 317 if tmpn, err = vec.WriteTo(w); err != nil { 318 return 319 } 320 n += tmpn 321 } 322 // 4. Deletes 323 var buf []byte 324 if bat.Deletes != nil { 325 if buf, err = bat.Deletes.Show(); err != nil { 326 return 327 } 328 } 329 if nr, err = w.Write(types.EncodeFixed(uint32(len(buf)))); err != nil { 330 return 331 } 332 n += int64(nr) 333 if len(buf) == 0 { 334 return 335 } 336 if nr, err = w.Write(buf); err != nil { 337 return 338 } 339 n += int64(nr) 340 341 return 342 } 343 344 func (bat *Batch) ReadFrom(r io.Reader) (n int64, err error) { 345 var tmpn int64 346 buffer := MakeVector(types.T_varchar.ToType(), common.DefaultAllocator) 347 defer buffer.Close() 348 if tmpn, err = buffer.ReadFrom(r); err != nil { 349 return 350 } 351 n += tmpn 352 pos := 0 353 buf := buffer.Get(pos).([]byte) 354 pos++ 355 cnt := types.DecodeFixed[uint16](buf) 356 vecTypes := make([]types.Type, cnt) 357 bat.Attrs = make([]string, cnt) 358 for i := 0; i < int(cnt); i++ { 359 buf = buffer.Get(pos).([]byte) 360 pos++ 361 bat.Attrs[i] = string(buf) 362 bat.Nameidx[bat.Attrs[i]] = i 363 buf = buffer.Get(pos).([]byte) 364 vecTypes[i] = types.DecodeType(buf) 365 pos++ 366 } 367 for _, vecType := range vecTypes { 368 vec := MakeVector(vecType, common.DefaultAllocator) 369 if tmpn, err = vec.ReadFrom(r); err != nil { 370 return 371 } 372 bat.Vecs = append(bat.Vecs, vec) 373 n += tmpn 374 } 375 // XXX Fix the following read, it is a very twisted way of reading uint32. 376 // Read Deletes 377 buf = make([]byte, int(unsafe.Sizeof(uint32(0)))) 378 if _, err = r.Read(buf); err != nil { 379 return 380 } 381 n += int64(len(buf)) 382 size := types.DecodeFixed[uint32](buf) 383 if size == 0 { 384 return 385 } 386 bat.Deletes = &nulls.Bitmap{} 387 buf = make([]byte, size) 388 if _, err = r.Read(buf); err != nil { 389 return 390 } 391 if err = bat.Deletes.ReadNoCopy(buf); err != nil { 392 return 393 } 394 n += int64(size) 395 396 return 397 } 398 399 // in version1, batch.Deletes is roaring.Bitmap 400 func (bat *Batch) ReadFromV1(r io.Reader) (n int64, err error) { 401 var tmpn int64 402 buffer := MakeVector(types.T_varchar.ToType(), common.DefaultAllocator) 403 defer buffer.Close() 404 if tmpn, err = buffer.ReadFrom(r); err != nil { 405 return 406 } 407 n += tmpn 408 pos := 0 409 buf := buffer.Get(pos).([]byte) 410 pos++ 411 cnt := types.DecodeFixed[uint16](buf) 412 vecTypes := make([]types.Type, cnt) 413 bat.Attrs = make([]string, cnt) 414 for i := 0; i < int(cnt); i++ { 415 buf = buffer.Get(pos).([]byte) 416 pos++ 417 bat.Attrs[i] = string(buf) 418 bat.Nameidx[bat.Attrs[i]] = i 419 buf = buffer.Get(pos).([]byte) 420 vecTypes[i] = types.DecodeType(buf) 421 pos++ 422 } 423 for _, vecType := range vecTypes { 424 vec := MakeVector(vecType, common.DefaultAllocator) 425 if tmpn, err = vec.ReadFrom(r); err != nil { 426 return 427 } 428 bat.Vecs = append(bat.Vecs, vec) 429 n += tmpn 430 } 431 // XXX Fix the following read, it is a very twisted way of reading uint32. 432 // Read Deletes 433 buf = make([]byte, int(unsafe.Sizeof(uint32(0)))) 434 if _, err = r.Read(buf); err != nil { 435 return 436 } 437 n += int64(len(buf)) 438 size := types.DecodeFixed[uint32](buf) 439 if size == 0 { 440 return 441 } 442 deletes := roaring.New() 443 if tmpn, err = deletes.ReadFrom(r); err != nil { 444 return 445 } 446 n += tmpn 447 bat.Deletes = common.RoaringToMOBitmap(deletes) 448 449 return 450 } 451 452 func (bat *Batch) Split(cnt int) []*Batch { 453 if cnt == 1 { 454 return []*Batch{bat} 455 } 456 length := bat.Length() 457 rows := length / cnt 458 if length%cnt == 0 { 459 bats := make([]*Batch, 0, cnt) 460 for i := 0; i < cnt; i++ { 461 newBat := bat.Window(i*rows, rows) 462 bats = append(bats, newBat) 463 } 464 return bats 465 } 466 rowArray := make([]int, 0) 467 if length/cnt == 0 { 468 for i := 0; i < length; i++ { 469 rowArray = append(rowArray, 1) 470 } 471 } else { 472 left := length 473 for i := 0; i < cnt; i++ { 474 if left >= rows && i < cnt-1 { 475 rowArray = append(rowArray, rows) 476 } else { 477 rowArray = append(rowArray, left) 478 } 479 left -= rows 480 } 481 } 482 start := 0 483 bats := make([]*Batch, 0, cnt) 484 for _, row := range rowArray { 485 newBat := bat.Window(start, row) 486 start += row 487 bats = append(bats, newBat) 488 } 489 return bats 490 } 491 492 func (bat *Batch) Append(src *Batch) (err error) { 493 for i, vec := range bat.Vecs { 494 vec.Extend(src.Vecs[i]) 495 } 496 return 497 } 498 499 // extend vector with same name, consume src batch 500 func (bat *Batch) Extend(src *Batch) { 501 for i, vec := range bat.Vecs { 502 attr := bat.Attrs[i] 503 if idx, ok := src.Nameidx[attr]; ok { 504 vec.Extend(src.Vecs[idx]) 505 } 506 } 507 src.Close() 508 } 509 510 func (b *BatchWithVersion) Len() int { 511 return len(b.Seqnums) 512 } 513 514 func (b *BatchWithVersion) Swap(i, j int) { 515 b.Seqnums[i], b.Seqnums[j] = b.Seqnums[j], b.Seqnums[i] 516 b.Attrs[i], b.Attrs[j] = b.Attrs[j], b.Attrs[i] 517 b.Vecs[i], b.Vecs[j] = b.Vecs[j], b.Vecs[i] 518 } 519 520 // Sort by seqnum 521 func (b *BatchWithVersion) Less(i, j int) bool { 522 return b.Seqnums[i] < b.Seqnums[j] 523 } 524 525 func NewBatchSplitter(bat *Batch, sliceSize int) *BatchSplitter { 526 if sliceSize <= 0 || bat == nil { 527 panic("sliceSize should not be 0 and bat should not be nil") 528 } 529 return &BatchSplitter{ 530 internal: bat, 531 sliceSize: sliceSize, 532 } 533 } 534 535 func (bs *BatchSplitter) Next() (*Batch, error) { 536 if bs.offset == bs.internal.Length() { 537 return nil, moerr.GetOkExpectedEOB() 538 } 539 length := bs.sliceSize 540 nextOffset := bs.offset + bs.sliceSize 541 if nextOffset >= bs.internal.Length() { 542 nextOffset = bs.internal.Length() 543 length = nextOffset - bs.offset 544 } 545 bat := bs.internal.CloneWindow(bs.offset, length) 546 bs.offset = nextOffset 547 return bat, nil 548 }