github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/containers/batch.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package containers
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"unsafe"
    22  
    23  	"github.com/RoaringBitmap/roaring"
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    28  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/stl/containers"
    29  )
    30  
    31  func NewBatch() *Batch {
    32  	return &Batch{
    33  		Attrs:   make([]string, 0),
    34  		nameidx: make(map[string]int),
    35  		Vecs:    make([]Vector, 0),
    36  	}
    37  }
    38  
    39  func (bat *Batch) AddVector(attr string, vec Vector) {
    40  	if _, exist := bat.nameidx[attr]; exist {
    41  		panic(moerr.NewInternalErrorNoCtx("duplicate vector %s", attr))
    42  	}
    43  	idx := len(bat.Vecs)
    44  	bat.nameidx[attr] = idx
    45  	bat.Attrs = append(bat.Attrs, attr)
    46  	bat.Vecs = append(bat.Vecs, vec)
    47  }
    48  
    49  func (bat *Batch) GetVectorByName(name string) Vector {
    50  	pos := bat.nameidx[name]
    51  	return bat.Vecs[pos]
    52  }
    53  
    54  func (bat *Batch) RangeDelete(start, end int) {
    55  	if bat.Deletes == nil {
    56  		bat.Deletes = roaring.New()
    57  	}
    58  	bat.Deletes.AddRange(uint64(start), uint64(end))
    59  }
    60  
    61  func (bat *Batch) Delete(i int) {
    62  	if bat.Deletes == nil {
    63  		bat.Deletes = roaring.BitmapOf(uint32(i))
    64  	} else {
    65  		bat.Deletes.Add(uint32(i))
    66  	}
    67  }
    68  
    69  func (bat *Batch) HasDelete() bool {
    70  	return bat.Deletes != nil && !bat.Deletes.IsEmpty()
    71  }
    72  
    73  func (bat *Batch) IsDeleted(i int) bool {
    74  	if !bat.HasDelete() {
    75  		return false
    76  	}
    77  	return bat.Deletes.ContainsInt(i)
    78  }
    79  
    80  func (bat *Batch) DeleteCnt() int {
    81  	if !bat.HasDelete() {
    82  		return 0
    83  	}
    84  	return int(bat.Deletes.GetCardinality())
    85  }
    86  
    87  func (bat *Batch) Compact() {
    88  	if !bat.HasDelete() {
    89  		return
    90  	}
    91  	for _, vec := range bat.Vecs {
    92  		vec.Compact(bat.Deletes)
    93  	}
    94  	bat.Deletes = nil
    95  }
    96  
    97  func (bat *Batch) Length() int {
    98  	return bat.Vecs[0].Length()
    99  }
   100  
   101  func (bat *Batch) Capacity() int {
   102  	return bat.Vecs[0].Capacity()
   103  }
   104  
   105  func (bat *Batch) Allocated() int {
   106  	allocated := 0
   107  	for _, vec := range bat.Vecs {
   108  		allocated += vec.Allocated()
   109  	}
   110  	return allocated
   111  }
   112  
   113  func (bat *Batch) Window(offset, length int) *Batch {
   114  	win := new(Batch)
   115  	win.Attrs = bat.Attrs
   116  	win.nameidx = bat.nameidx
   117  	if bat.Deletes != nil && offset+length != bat.Length() {
   118  		win.Deletes = common.BM32Window(bat.Deletes, offset, offset+length)
   119  	} else {
   120  		win.Deletes = bat.Deletes
   121  	}
   122  	win.Vecs = make([]Vector, len(bat.Vecs))
   123  	for i := range win.Vecs {
   124  		win.Vecs[i] = bat.Vecs[i].Window(offset, length)
   125  	}
   126  	return win
   127  }
   128  
   129  func (bat *Batch) CloneWindow(offset, length int, allocator ...*mpool.MPool) (cloned *Batch) {
   130  	cloned = new(Batch)
   131  	cloned.Attrs = make([]string, len(bat.Attrs))
   132  	copy(cloned.Attrs, bat.Attrs)
   133  	cloned.nameidx = make(map[string]int, len(bat.nameidx))
   134  	for k, v := range bat.nameidx {
   135  		cloned.nameidx[k] = v
   136  	}
   137  	if bat.Deletes != nil {
   138  		cloned.Deletes = common.BM32Window(bat.Deletes, offset, offset+length)
   139  	}
   140  	cloned.Vecs = make([]Vector, len(bat.Vecs))
   141  	for i := range cloned.Vecs {
   142  		cloned.Vecs[i] = bat.Vecs[i].CloneWindow(offset, length, allocator...)
   143  	}
   144  	return
   145  }
   146  
   147  func (bat *Batch) String() string {
   148  	return bat.PPString(10)
   149  }
   150  
   151  func (bat *Batch) PPString(num int) string {
   152  	var w bytes.Buffer
   153  	for i, vec := range bat.Vecs {
   154  		_, _ = w.WriteString(fmt.Sprintf("[Name=%s]", bat.Attrs[i]))
   155  		_, _ = w.WriteString(vec.PPString(num))
   156  		_ = w.WriteByte('\n')
   157  	}
   158  	return w.String()
   159  }
   160  
   161  func (bat *Batch) Close() {
   162  	for _, vec := range bat.Vecs {
   163  		vec.Close()
   164  	}
   165  }
   166  
   167  func (bat *Batch) Equals(o *Batch) bool {
   168  	if bat.Length() != o.Length() {
   169  		return false
   170  	}
   171  	if bat.DeleteCnt() != o.DeleteCnt() {
   172  		return false
   173  	}
   174  	if bat.HasDelete() {
   175  		if !bat.Deletes.Equals(o.Deletes) {
   176  			return false
   177  		}
   178  	}
   179  	for i := range bat.Vecs {
   180  		if bat.Attrs[i] != o.Attrs[i] {
   181  			return false
   182  		}
   183  		if !bat.Vecs[i].Equals(o.Vecs[i]) {
   184  			return false
   185  		}
   186  	}
   187  	return true
   188  }
   189  
   190  func (bat *Batch) WriteTo(w io.Writer) (n int64, err error) {
   191  	var nr int
   192  	var tmpn int64
   193  	buffer := containers.NewVector[[]byte]()
   194  	defer buffer.Close()
   195  	// 1. Vector cnt
   196  	// if nr, err = w.Write(types.EncodeFixed(uint16(len(bat.Vecs)))); err != nil {
   197  	// 	return
   198  	// }
   199  	// n += int64(nr)
   200  	buffer.Append(types.EncodeFixed(uint16(len(bat.Vecs))))
   201  
   202  	// 2. Types and Names
   203  	for i, vec := range bat.Vecs {
   204  		buffer.Append([]byte(bat.Attrs[i]))
   205  		vt := vec.GetType()
   206  		buffer.Append(types.EncodeType(&vt))
   207  	}
   208  	if tmpn, err = buffer.WriteTo(w); err != nil {
   209  		return
   210  	}
   211  	n += tmpn
   212  
   213  	// 3. Vectors
   214  	for _, vec := range bat.Vecs {
   215  		if tmpn, err = vec.WriteTo(w); err != nil {
   216  			return
   217  		}
   218  		n += tmpn
   219  	}
   220  	// 4. Deletes
   221  	var buf []byte
   222  	if bat.Deletes != nil {
   223  		if buf, err = bat.Deletes.ToBytes(); err != nil {
   224  			return
   225  		}
   226  	}
   227  	if nr, err = w.Write(types.EncodeFixed(uint32(len(buf)))); err != nil {
   228  		return
   229  	}
   230  	n += int64(nr)
   231  	if len(buf) == 0 {
   232  		return
   233  	}
   234  	if nr, err = w.Write(buf); err != nil {
   235  		return
   236  	}
   237  	n += int64(nr)
   238  
   239  	return
   240  }
   241  
   242  func (bat *Batch) ReadFrom(r io.Reader) (n int64, err error) {
   243  	var tmpn int64
   244  	buffer := containers.NewVector[[]byte]()
   245  	defer buffer.Close()
   246  	if tmpn, err = buffer.ReadFrom(r); err != nil {
   247  		return
   248  	}
   249  	n += tmpn
   250  	pos := 0
   251  	buf := buffer.Get(pos)
   252  	pos++
   253  	cnt := types.DecodeFixed[uint16](buf)
   254  	vecTypes := make([]types.Type, cnt)
   255  	bat.Attrs = make([]string, cnt)
   256  	for i := 0; i < int(cnt); i++ {
   257  		buf = buffer.Get(pos)
   258  		pos++
   259  		bat.Attrs[i] = string(buf)
   260  		bat.nameidx[bat.Attrs[i]] = i
   261  		buf = buffer.Get(pos)
   262  		vecTypes[i] = types.DecodeType(buf)
   263  		pos++
   264  	}
   265  	for _, vecType := range vecTypes {
   266  		vec := MakeVector(vecType, true)
   267  		if tmpn, err = vec.ReadFrom(r); err != nil {
   268  			return
   269  		}
   270  		bat.Vecs = append(bat.Vecs, vec)
   271  		n += tmpn
   272  	}
   273  	// Read Deletes
   274  	buf = make([]byte, int(unsafe.Sizeof(uint32(0))))
   275  	if _, err = r.Read(buf); err != nil {
   276  		return
   277  	}
   278  	n += int64(len(buf))
   279  	size := types.DecodeFixed[uint32](buf)
   280  	if size == 0 {
   281  		return
   282  	}
   283  	bat.Deletes = roaring.New()
   284  	if tmpn, err = bat.Deletes.ReadFrom(r); err != nil {
   285  		return
   286  	}
   287  	n += tmpn
   288  
   289  	return
   290  }
   291  
   292  func (bat *Batch) Split(cnt int) []*Batch {
   293  	if cnt == 1 {
   294  		return []*Batch{bat}
   295  	}
   296  	length := bat.Length()
   297  	rows := length / cnt
   298  	if length%cnt == 0 {
   299  		bats := make([]*Batch, 0, cnt)
   300  		for i := 0; i < cnt; i++ {
   301  			newBat := bat.Window(i*rows, rows)
   302  			bats = append(bats, newBat)
   303  		}
   304  		return bats
   305  	}
   306  	rowArray := make([]int, 0)
   307  	if length/cnt == 0 {
   308  		for i := 0; i < length; i++ {
   309  			rowArray = append(rowArray, 1)
   310  		}
   311  	} else {
   312  		left := length
   313  		for i := 0; i < cnt; i++ {
   314  			if left >= rows && i < cnt-1 {
   315  				rowArray = append(rowArray, rows)
   316  			} else {
   317  				rowArray = append(rowArray, left)
   318  			}
   319  			left -= rows
   320  		}
   321  	}
   322  	start := 0
   323  	bats := make([]*Batch, 0, cnt)
   324  	for _, row := range rowArray {
   325  		newBat := bat.Window(start, row)
   326  		start += row
   327  		bats = append(bats, newBat)
   328  	}
   329  	return bats
   330  }
   331  
   332  func (bat *Batch) Append(src *Batch) (err error) {
   333  	for i, vec := range bat.Vecs {
   334  		vec.Extend(src.Vecs[i])
   335  	}
   336  	return
   337  }
   338  
   339  // extend vector with same name, consume src batch
   340  func (bat *Batch) Extend(src *Batch) {
   341  	for i, vec := range bat.Vecs {
   342  		attr := bat.Attrs[i]
   343  		if idx, ok := src.nameidx[attr]; ok {
   344  			vec.Extend(src.Vecs[idx])
   345  		}
   346  	}
   347  	src.Close()
   348  }