github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/containers/batch.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package containers
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"unsafe"
    22  
    23  	"github.com/RoaringBitmap/roaring"
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    26  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    27  	"github.com/matrixorigin/matrixone/pkg/container/types"
    28  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    29  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    30  )
    31  
    32  var EMPTY_VECTOR Vector
    33  
    34  func init() {
    35  	EMPTY_VECTOR = &emptyVector{
    36  		Vector: MakeVector(types.T_int8.ToType(), common.DefaultAllocator),
    37  	}
    38  }
    39  
    40  type emptyVector struct {
    41  	Vector
    42  }
    43  
    44  // do not close
    45  func (v *emptyVector) Close() {}
    46  
    47  func (v *emptyVector) Append(x any, isNull bool) {
    48  	panic("not implemented") // TODO: Implement
    49  }
    50  
    51  func (v *emptyVector) Compact(_ *roaring.Bitmap) {
    52  	panic("not implemented") // TODO: Implement
    53  }
    54  
    55  func (v *emptyVector) Extend(o Vector) {
    56  	panic("not implemented") // TODO: Implement
    57  }
    58  
    59  func (v *emptyVector) ExtendWithOffset(src Vector, srcOff int, srcLen int) {
    60  	panic("not implemented") // TODO: Implement
    61  }
    62  
    63  func NewBatch() *Batch {
    64  	return &Batch{
    65  		Attrs:   make([]string, 0),
    66  		Nameidx: make(map[string]int),
    67  		Vecs:    make([]Vector, 0),
    68  	}
    69  }
    70  
    71  func NewBatchWithCapacity(cap int) *Batch {
    72  	return &Batch{
    73  		Attrs:   make([]string, 0, cap),
    74  		Nameidx: make(map[string]int, cap),
    75  		Vecs:    make([]Vector, 0, cap),
    76  	}
    77  }
    78  
    79  func (bat *Batch) AddVector(attr string, vec Vector) {
    80  	if _, exist := bat.Nameidx[attr]; exist {
    81  		panic(moerr.NewInternalErrorNoCtx("duplicate vector %s", attr))
    82  	}
    83  	idx := len(bat.Vecs)
    84  	bat.Nameidx[attr] = idx
    85  	bat.Attrs = append(bat.Attrs, attr)
    86  	bat.Vecs = append(bat.Vecs, vec)
    87  }
    88  
    89  // AddPlaceholder is used to consctruct batch sent to CN.
    90  // The vectors in the batch are sorted by seqnum, if the seqnum was dropped, a
    91  // zero value will be fill as placeholder. This is space-time tradeoff.
    92  func (bat *Batch) AppendPlaceholder() {
    93  	bat.Attrs = append(bat.Attrs, "")
    94  	bat.Vecs = append(bat.Vecs, EMPTY_VECTOR)
    95  }
    96  
    97  func (bat *Batch) GetVectorByName(name string) Vector {
    98  	pos, ok := bat.Nameidx[name]
    99  	if !ok {
   100  		panic(fmt.Sprintf("vector %s not found", name))
   101  	}
   102  	return bat.Vecs[pos]
   103  }
   104  
   105  func (bat *Batch) RangeDelete(start, end int) {
   106  	if bat.Deletes == nil {
   107  		bat.Deletes = nulls.NewWithSize(end)
   108  	}
   109  	bat.Deletes.AddRange(uint64(start), uint64(end))
   110  }
   111  
   112  func (bat *Batch) Delete(i int) {
   113  	if bat.Deletes == nil {
   114  		bat.Deletes = nulls.NewWithSize(i)
   115  	}
   116  	bat.Deletes.Add(uint64(i))
   117  }
   118  
   119  func (bat *Batch) HasDelete() bool {
   120  	return !bat.Deletes.IsEmpty()
   121  }
   122  
   123  func (bat *Batch) IsDeleted(i int) bool {
   124  	return bat.Deletes.Contains(uint64(i))
   125  }
   126  
   127  func (bat *Batch) DeleteCnt() int {
   128  	if !bat.HasDelete() {
   129  		return 0
   130  	}
   131  	return int(bat.Deletes.GetCardinality())
   132  }
   133  
   134  func (bat *Batch) Compact() {
   135  	if !bat.HasDelete() {
   136  		return
   137  	}
   138  	for _, vec := range bat.Vecs {
   139  		vec.CompactByBitmap(bat.Deletes)
   140  	}
   141  	bat.Deletes = nil
   142  }
   143  
   144  func (bat *Batch) Length() int {
   145  	return bat.Vecs[0].Length()
   146  }
   147  
   148  func (bat *Batch) ApproxSize() int {
   149  	size := 0
   150  	for _, vec := range bat.Vecs {
   151  		size += vec.ApproxSize()
   152  	}
   153  	return size
   154  }
   155  
   156  func (bat *Batch) Allocated() int {
   157  	allocated := 0
   158  	for _, vec := range bat.Vecs {
   159  		allocated += vec.Allocated()
   160  	}
   161  	return allocated
   162  }
   163  
   164  func (bat *Batch) WindowDeletes(offset, length int, deep bool) *nulls.Bitmap {
   165  	if bat.Deletes.IsEmpty() || length <= 0 {
   166  		return nil
   167  	}
   168  	start := offset
   169  	end := offset + length
   170  	if end > bat.Length() {
   171  		panic(fmt.Sprintf("out of range: %d, %d", offset, length))
   172  	}
   173  	if start == 0 && end == bat.Length() && !deep {
   174  		return bat.Deletes
   175  	}
   176  	ret := nulls.NewWithSize(length)
   177  	nulls.Range(bat.Deletes, uint64(start), uint64(end), uint64(start), ret)
   178  	return ret
   179  }
   180  
   181  func (bat *Batch) Window(offset, length int) *Batch {
   182  	win := new(Batch)
   183  	win.Attrs = bat.Attrs
   184  	win.Nameidx = bat.Nameidx
   185  	win.Deletes = bat.WindowDeletes(offset, length, false)
   186  	win.Vecs = make([]Vector, len(bat.Vecs))
   187  	for i := range win.Vecs {
   188  		win.Vecs[i] = bat.Vecs[i].Window(offset, length)
   189  	}
   190  	return win
   191  }
   192  
   193  func (bat *Batch) CloneWindowWithPool(offset, length int, pool *VectorPool) (cloned *Batch) {
   194  	cloned = new(Batch)
   195  	cloned.Attrs = make([]string, len(bat.Attrs))
   196  	copy(cloned.Attrs, bat.Attrs)
   197  	cloned.Nameidx = make(map[string]int, len(bat.Nameidx))
   198  	for k, v := range bat.Nameidx {
   199  		cloned.Nameidx[k] = v
   200  	}
   201  	cloned.Deletes = bat.WindowDeletes(offset, length, true)
   202  	cloned.Vecs = make([]Vector, len(bat.Vecs))
   203  	for i := range cloned.Vecs {
   204  		cloned.Vecs[i] = bat.Vecs[i].CloneWindowWithPool(offset, length, pool)
   205  	}
   206  	return
   207  }
   208  
   209  func (bat *Batch) CloneWindow(offset, length int, allocator ...*mpool.MPool) (cloned *Batch) {
   210  	cloned = new(Batch)
   211  	cloned.Attrs = make([]string, len(bat.Attrs))
   212  	copy(cloned.Attrs, bat.Attrs)
   213  	cloned.Nameidx = make(map[string]int, len(bat.Nameidx))
   214  	for k, v := range bat.Nameidx {
   215  		cloned.Nameidx[k] = v
   216  	}
   217  	cloned.Deletes = bat.WindowDeletes(offset, length, true)
   218  	cloned.Vecs = make([]Vector, len(bat.Vecs))
   219  	for i := range cloned.Vecs {
   220  		cloned.Vecs[i] = bat.Vecs[i].CloneWindow(offset, length, allocator...)
   221  	}
   222  	return
   223  }
   224  
   225  func (bat *Batch) String() string {
   226  	return bat.PPString(10)
   227  }
   228  
   229  func (bat *Batch) PPString(num int) string {
   230  	var w bytes.Buffer
   231  	for i, vec := range bat.Vecs {
   232  		_, _ = w.WriteString(fmt.Sprintf("[Name=%s]", bat.Attrs[i]))
   233  		_, _ = w.WriteString(vec.PPString(num))
   234  		_ = w.WriteByte('\n')
   235  	}
   236  	return w.String()
   237  }
   238  
   239  func (bat *Batch) Close() {
   240  	for _, vec := range bat.Vecs {
   241  		vec.Close()
   242  	}
   243  }
   244  
   245  func (bat *Batch) Reset() {
   246  	for i, vec := range bat.Vecs {
   247  		var newVec Vector
   248  		if bat.Pool != nil {
   249  			newVec = bat.Pool.GetVector(vec.GetType())
   250  		} else {
   251  			opts := Options{
   252  				Allocator: vec.GetAllocator(),
   253  			}
   254  			newVec = NewVector(*vec.GetType(), opts)
   255  		}
   256  		vec.Close()
   257  		bat.Vecs[i] = newVec
   258  	}
   259  	bat.Deletes = nil
   260  }
   261  
   262  func (bat *Batch) Equals(o *Batch) bool {
   263  	if bat.Length() != o.Length() {
   264  		return false
   265  	}
   266  	if bat.DeleteCnt() != o.DeleteCnt() {
   267  		return false
   268  	}
   269  	if !common.BitmapEqual(bat.Deletes, o.Deletes) {
   270  		return false
   271  	}
   272  	for i := range bat.Vecs {
   273  		if bat.Attrs[i] != o.Attrs[i] {
   274  			return false
   275  		}
   276  		if !bat.Vecs[i].Equals(o.Vecs[i]) {
   277  			return false
   278  		}
   279  	}
   280  	return true
   281  }
   282  
   283  func (bat *Batch) WriteTo(w io.Writer) (n int64, err error) {
   284  	var nr int
   285  	var tmpn int64
   286  	var buffer Vector
   287  	if bat.Pool != nil {
   288  		t := types.T_varchar.ToType()
   289  		buffer = bat.Pool.GetVector(&t)
   290  	} else {
   291  		buffer = MakeVector(types.T_varchar.ToType(), common.DefaultAllocator)
   292  	}
   293  	defer buffer.Close()
   294  	mp := buffer.GetAllocator()
   295  	bufVec := buffer.GetDownstreamVector()
   296  	if err = vector.AppendBytes(bufVec, types.EncodeFixed(uint16(len(bat.Vecs))), false, mp); err != nil {
   297  		return
   298  	}
   299  
   300  	// 2. Types and Names
   301  	for i, vec := range bat.Vecs {
   302  		if err = vector.AppendBytes(bufVec, []byte(bat.Attrs[i]), false, mp); err != nil {
   303  			return
   304  		}
   305  		vt := vec.GetType()
   306  		if err = vector.AppendBytes(bufVec, types.EncodeType(vt), false, mp); err != nil {
   307  			return
   308  		}
   309  	}
   310  	if tmpn, err = buffer.WriteTo(w); err != nil {
   311  		return
   312  	}
   313  	n += tmpn
   314  
   315  	// 3. Vectors
   316  	for _, vec := range bat.Vecs {
   317  		if tmpn, err = vec.WriteTo(w); err != nil {
   318  			return
   319  		}
   320  		n += tmpn
   321  	}
   322  	// 4. Deletes
   323  	var buf []byte
   324  	if bat.Deletes != nil {
   325  		if buf, err = bat.Deletes.Show(); err != nil {
   326  			return
   327  		}
   328  	}
   329  	if nr, err = w.Write(types.EncodeFixed(uint32(len(buf)))); err != nil {
   330  		return
   331  	}
   332  	n += int64(nr)
   333  	if len(buf) == 0 {
   334  		return
   335  	}
   336  	if nr, err = w.Write(buf); err != nil {
   337  		return
   338  	}
   339  	n += int64(nr)
   340  
   341  	return
   342  }
   343  
   344  func (bat *Batch) ReadFrom(r io.Reader) (n int64, err error) {
   345  	var tmpn int64
   346  	buffer := MakeVector(types.T_varchar.ToType(), common.DefaultAllocator)
   347  	defer buffer.Close()
   348  	if tmpn, err = buffer.ReadFrom(r); err != nil {
   349  		return
   350  	}
   351  	n += tmpn
   352  	pos := 0
   353  	buf := buffer.Get(pos).([]byte)
   354  	pos++
   355  	cnt := types.DecodeFixed[uint16](buf)
   356  	vecTypes := make([]types.Type, cnt)
   357  	bat.Attrs = make([]string, cnt)
   358  	for i := 0; i < int(cnt); i++ {
   359  		buf = buffer.Get(pos).([]byte)
   360  		pos++
   361  		bat.Attrs[i] = string(buf)
   362  		bat.Nameidx[bat.Attrs[i]] = i
   363  		buf = buffer.Get(pos).([]byte)
   364  		vecTypes[i] = types.DecodeType(buf)
   365  		pos++
   366  	}
   367  	for _, vecType := range vecTypes {
   368  		vec := MakeVector(vecType, common.DefaultAllocator)
   369  		if tmpn, err = vec.ReadFrom(r); err != nil {
   370  			return
   371  		}
   372  		bat.Vecs = append(bat.Vecs, vec)
   373  		n += tmpn
   374  	}
   375  	// XXX Fix the following read, it is a very twisted way of reading uint32.
   376  	// Read Deletes
   377  	buf = make([]byte, int(unsafe.Sizeof(uint32(0))))
   378  	if _, err = r.Read(buf); err != nil {
   379  		return
   380  	}
   381  	n += int64(len(buf))
   382  	size := types.DecodeFixed[uint32](buf)
   383  	if size == 0 {
   384  		return
   385  	}
   386  	bat.Deletes = &nulls.Bitmap{}
   387  	buf = make([]byte, size)
   388  	if _, err = r.Read(buf); err != nil {
   389  		return
   390  	}
   391  	if err = bat.Deletes.ReadNoCopy(buf); err != nil {
   392  		return
   393  	}
   394  	n += int64(size)
   395  
   396  	return
   397  }
   398  
   399  // in version1, batch.Deletes is roaring.Bitmap
   400  func (bat *Batch) ReadFromV1(r io.Reader) (n int64, err error) {
   401  	var tmpn int64
   402  	buffer := MakeVector(types.T_varchar.ToType(), common.DefaultAllocator)
   403  	defer buffer.Close()
   404  	if tmpn, err = buffer.ReadFrom(r); err != nil {
   405  		return
   406  	}
   407  	n += tmpn
   408  	pos := 0
   409  	buf := buffer.Get(pos).([]byte)
   410  	pos++
   411  	cnt := types.DecodeFixed[uint16](buf)
   412  	vecTypes := make([]types.Type, cnt)
   413  	bat.Attrs = make([]string, cnt)
   414  	for i := 0; i < int(cnt); i++ {
   415  		buf = buffer.Get(pos).([]byte)
   416  		pos++
   417  		bat.Attrs[i] = string(buf)
   418  		bat.Nameidx[bat.Attrs[i]] = i
   419  		buf = buffer.Get(pos).([]byte)
   420  		vecTypes[i] = types.DecodeType(buf)
   421  		pos++
   422  	}
   423  	for _, vecType := range vecTypes {
   424  		vec := MakeVector(vecType, common.DefaultAllocator)
   425  		if tmpn, err = vec.ReadFrom(r); err != nil {
   426  			return
   427  		}
   428  		bat.Vecs = append(bat.Vecs, vec)
   429  		n += tmpn
   430  	}
   431  	// XXX Fix the following read, it is a very twisted way of reading uint32.
   432  	// Read Deletes
   433  	buf = make([]byte, int(unsafe.Sizeof(uint32(0))))
   434  	if _, err = r.Read(buf); err != nil {
   435  		return
   436  	}
   437  	n += int64(len(buf))
   438  	size := types.DecodeFixed[uint32](buf)
   439  	if size == 0 {
   440  		return
   441  	}
   442  	deletes := roaring.New()
   443  	if tmpn, err = deletes.ReadFrom(r); err != nil {
   444  		return
   445  	}
   446  	n += tmpn
   447  	bat.Deletes = common.RoaringToMOBitmap(deletes)
   448  
   449  	return
   450  }
   451  
   452  func (bat *Batch) Split(cnt int) []*Batch {
   453  	if cnt == 1 {
   454  		return []*Batch{bat}
   455  	}
   456  	length := bat.Length()
   457  	rows := length / cnt
   458  	if length%cnt == 0 {
   459  		bats := make([]*Batch, 0, cnt)
   460  		for i := 0; i < cnt; i++ {
   461  			newBat := bat.Window(i*rows, rows)
   462  			bats = append(bats, newBat)
   463  		}
   464  		return bats
   465  	}
   466  	rowArray := make([]int, 0)
   467  	if length/cnt == 0 {
   468  		for i := 0; i < length; i++ {
   469  			rowArray = append(rowArray, 1)
   470  		}
   471  	} else {
   472  		left := length
   473  		for i := 0; i < cnt; i++ {
   474  			if left >= rows && i < cnt-1 {
   475  				rowArray = append(rowArray, rows)
   476  			} else {
   477  				rowArray = append(rowArray, left)
   478  			}
   479  			left -= rows
   480  		}
   481  	}
   482  	start := 0
   483  	bats := make([]*Batch, 0, cnt)
   484  	for _, row := range rowArray {
   485  		newBat := bat.Window(start, row)
   486  		start += row
   487  		bats = append(bats, newBat)
   488  	}
   489  	return bats
   490  }
   491  
   492  func (bat *Batch) Append(src *Batch) (err error) {
   493  	for i, vec := range bat.Vecs {
   494  		vec.Extend(src.Vecs[i])
   495  	}
   496  	return
   497  }
   498  
   499  // extend vector with same name, consume src batch
   500  func (bat *Batch) Extend(src *Batch) {
   501  	for i, vec := range bat.Vecs {
   502  		attr := bat.Attrs[i]
   503  		if idx, ok := src.Nameidx[attr]; ok {
   504  			vec.Extend(src.Vecs[idx])
   505  		}
   506  	}
   507  	src.Close()
   508  }
   509  
   510  func (b *BatchWithVersion) Len() int {
   511  	return len(b.Seqnums)
   512  }
   513  
   514  func (b *BatchWithVersion) Swap(i, j int) {
   515  	b.Seqnums[i], b.Seqnums[j] = b.Seqnums[j], b.Seqnums[i]
   516  	b.Attrs[i], b.Attrs[j] = b.Attrs[j], b.Attrs[i]
   517  	b.Vecs[i], b.Vecs[j] = b.Vecs[j], b.Vecs[i]
   518  }
   519  
   520  // Sort by seqnum
   521  func (b *BatchWithVersion) Less(i, j int) bool {
   522  	return b.Seqnums[i] < b.Seqnums[j]
   523  }
   524  
   525  func NewBatchSplitter(bat *Batch, sliceSize int) *BatchSplitter {
   526  	if sliceSize <= 0 || bat == nil {
   527  		panic("sliceSize should not be 0 and bat should not be nil")
   528  	}
   529  	return &BatchSplitter{
   530  		internal:  bat,
   531  		sliceSize: sliceSize,
   532  	}
   533  }
   534  
   535  func (bs *BatchSplitter) Next() (*Batch, error) {
   536  	if bs.offset == bs.internal.Length() {
   537  		return nil, moerr.GetOkExpectedEOB()
   538  	}
   539  	length := bs.sliceSize
   540  	nextOffset := bs.offset + bs.sliceSize
   541  	if nextOffset >= bs.internal.Length() {
   542  		nextOffset = bs.internal.Length()
   543  		length = nextOffset - bs.offset
   544  	}
   545  	bat := bs.internal.CloneWindow(bs.offset, length)
   546  	bs.offset = nextOffset
   547  	return bat, nil
   548  }