github.com/matrixorigin/matrixone@v1.2.0/pkg/container/batch/batch.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package batch
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"github.com/matrixorigin/matrixone/pkg/sql/colexec/aggexec"
    22  	"sync/atomic"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/hashmap"
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    27  	"github.com/matrixorigin/matrixone/pkg/container/types"
    28  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    29  	"github.com/matrixorigin/matrixone/pkg/logutil"
    30  )
    31  
    32  func New(ro bool, attrs []string) *Batch {
    33  	return &Batch{
    34  		Ro:       ro,
    35  		Cnt:      1,
    36  		Attrs:    attrs,
    37  		Vecs:     make([]*vector.Vector, len(attrs)),
    38  		rowCount: 0,
    39  	}
    40  }
    41  
    42  func NewWithSize(n int) *Batch {
    43  	return &Batch{
    44  		Cnt:      1,
    45  		Vecs:     make([]*vector.Vector, n),
    46  		rowCount: 0,
    47  	}
    48  }
    49  
    50  func SetLength(bat *Batch, n int) {
    51  	for _, vec := range bat.Vecs {
    52  		vec.SetLength(n)
    53  	}
    54  	bat.rowCount = n
    55  }
    56  
    57  func (bat *Batch) MarshalBinary() ([]byte, error) {
    58  	aggInfos := make([][]byte, len(bat.Aggs))
    59  	for i, exec := range bat.Aggs {
    60  		data, err := aggexec.MarshalAggFuncExec(exec)
    61  		if err != nil {
    62  			return nil, err
    63  		}
    64  		aggInfos[i] = data
    65  	}
    66  
    67  	return types.Encode(&EncodeBatch{
    68  		rowCount:  int64(bat.rowCount),
    69  		Vecs:      bat.Vecs,
    70  		Attrs:     bat.Attrs,
    71  		AggInfos:  aggInfos,
    72  		Recursive: bat.Recursive,
    73  	})
    74  }
    75  
    76  func (bat *Batch) UnmarshalBinary(data []byte) (err error) {
    77  	return bat.unmarshalBinaryWithAnyMp(data, nil)
    78  }
    79  
    80  func (bat *Batch) UnmarshalBinaryWithCopy(data []byte, mp *mpool.MPool) error {
    81  	return bat.unmarshalBinaryWithAnyMp(data, mp)
    82  }
    83  
    84  func (bat *Batch) unmarshalBinaryWithAnyMp(data []byte, mp *mpool.MPool) (err error) {
    85  	rbat := new(EncodeBatch)
    86  	if err = rbat.UnmarshalBinaryWithCopy(data, mp); err != nil {
    87  		return err
    88  	}
    89  
    90  	bat.Recursive = rbat.Recursive
    91  	bat.Cnt = 1
    92  	bat.rowCount = int(rbat.rowCount)
    93  	bat.Vecs = rbat.Vecs
    94  	bat.Attrs = append(bat.Attrs, rbat.Attrs...)
    95  
    96  	if len(rbat.AggInfos) > 0 {
    97  		bat.Aggs = make([]aggexec.AggFuncExec, len(rbat.AggInfos))
    98  		var aggMemoryManager aggexec.AggMemoryManager = nil
    99  		if mp != nil {
   100  			aggMemoryManager = aggexec.NewSimpleAggMemoryManager(mp)
   101  		}
   102  
   103  		for i, info := range rbat.AggInfos {
   104  			if bat.Aggs[i], err = aggexec.UnmarshalAggFuncExec(aggMemoryManager, info); err != nil {
   105  				return err
   106  			}
   107  		}
   108  	}
   109  	return nil
   110  }
   111  
   112  func (bat *Batch) Shrink(sels []int64, negate bool) {
   113  	if !negate {
   114  		if len(sels) == bat.rowCount {
   115  			return
   116  		}
   117  	}
   118  	for _, vec := range bat.Vecs {
   119  		vec.Shrink(sels, negate)
   120  	}
   121  	if negate {
   122  		bat.rowCount -= len(sels)
   123  		return
   124  	}
   125  	bat.rowCount = len(sels)
   126  }
   127  
   128  func (bat *Batch) Shuffle(sels []int64, m *mpool.MPool) error {
   129  	if len(sels) > 0 {
   130  		mp := make(map[*vector.Vector]uint8)
   131  		for _, vec := range bat.Vecs {
   132  			if _, ok := mp[vec]; ok {
   133  				continue
   134  			}
   135  			mp[vec]++
   136  			if err := vec.Shuffle(sels, m); err != nil {
   137  				return err
   138  			}
   139  		}
   140  		bat.rowCount = len(sels)
   141  	}
   142  	return nil
   143  }
   144  
   145  func (bat *Batch) Size() int {
   146  	var size int
   147  
   148  	for _, vec := range bat.Vecs {
   149  		size += vec.Size()
   150  	}
   151  	return size
   152  }
   153  
   154  func (bat *Batch) RowCount() int {
   155  	return bat.rowCount
   156  }
   157  
   158  func (bat *Batch) VectorCount() int {
   159  	return len(bat.Vecs)
   160  }
   161  
   162  func (bat *Batch) Prefetch(poses []int32, vecs []*vector.Vector) {
   163  	for i, pos := range poses {
   164  		vecs[i] = bat.GetVector(pos)
   165  	}
   166  }
   167  
   168  func (bat *Batch) SetAttributes(attrs []string) {
   169  	bat.Attrs = attrs
   170  }
   171  
   172  func (bat *Batch) SetVector(pos int32, vec *vector.Vector) {
   173  	bat.Vecs[pos] = vec
   174  }
   175  
   176  func (bat *Batch) GetVector(pos int32) *vector.Vector {
   177  	return bat.Vecs[pos]
   178  }
   179  
   180  func (bat *Batch) GetSubBatch(cols []string) *Batch {
   181  	mp := make(map[string]int)
   182  	for i, attr := range bat.Attrs {
   183  		mp[attr] = i
   184  	}
   185  	rbat := NewWithSize(len(cols))
   186  	for i, col := range cols {
   187  		rbat.Vecs[i] = bat.Vecs[mp[col]]
   188  	}
   189  	rbat.rowCount = bat.rowCount
   190  	return rbat
   191  }
   192  
   193  func (bat *Batch) Clean(m *mpool.MPool) {
   194  	if bat == EmptyBatch {
   195  		return
   196  	}
   197  	if atomic.LoadInt64(&bat.Cnt) == 0 {
   198  		// panic("batch is already cleaned")
   199  		return
   200  	}
   201  	if atomic.AddInt64(&bat.Cnt, -1) > 0 {
   202  		return
   203  	}
   204  	for _, vec := range bat.Vecs {
   205  		if vec != nil {
   206  			vec.Free(m)
   207  		}
   208  	}
   209  	for _, agg := range bat.Aggs {
   210  		if agg != nil {
   211  			agg.Free()
   212  		}
   213  	}
   214  	bat.Attrs = nil
   215  	bat.rowCount = 0
   216  	bat.Vecs = nil
   217  }
   218  
   219  func (bat *Batch) Last() bool {
   220  	return bat.Recursive > 0
   221  }
   222  
   223  func (bat *Batch) SetEnd() {
   224  	bat.Recursive = 2
   225  }
   226  
   227  func (bat *Batch) SetLast() {
   228  	bat.Recursive = 1
   229  }
   230  
   231  func (bat *Batch) End() bool {
   232  	return bat.Recursive == 2
   233  }
   234  
   235  func (bat *Batch) CleanOnlyData() {
   236  	for _, vec := range bat.Vecs {
   237  		if vec != nil {
   238  			vec.CleanOnlyData()
   239  		}
   240  	}
   241  	bat.rowCount = 0
   242  }
   243  
   244  func (bat *Batch) String() string {
   245  	var buf bytes.Buffer
   246  
   247  	for i, vec := range bat.Vecs {
   248  		buf.WriteString(fmt.Sprintf("%d : %s\n", i, vec.String()))
   249  	}
   250  	return buf.String()
   251  }
   252  
   253  func (bat *Batch) Log(tag string) {
   254  	if bat == nil || bat.rowCount < 1 {
   255  		return
   256  	}
   257  	logutil.Infof("\n" + tag + "\n" + bat.String())
   258  }
   259  
   260  func (bat *Batch) Dup(mp *mpool.MPool) (*Batch, error) {
   261  	var err error
   262  
   263  	rbat := NewWithSize(len(bat.Vecs))
   264  	rbat.SetAttributes(bat.Attrs)
   265  	rbat.Recursive = bat.Recursive
   266  	for j, vec := range bat.Vecs {
   267  		typ := *bat.GetVector(int32(j)).GetType()
   268  		rvec := vector.NewVec(typ)
   269  		if err = vector.GetUnionAllFunction(typ, mp)(rvec, vec); err != nil {
   270  			rbat.Clean(mp)
   271  			return nil, err
   272  		}
   273  		rbat.SetVector(int32(j), rvec)
   274  	}
   275  	rbat.rowCount = bat.rowCount
   276  
   277  	//if len(bat.Aggs) > 0 {
   278  	//	rbat.Aggs = make([]aggexec.AggFuncExec, len(bat.Aggs))
   279  	//	aggMemoryManager := aggexec.NewSimpleAggMemoryManager(mp)
   280  	//
   281  	//	for i, agg := range bat.Aggs {
   282  	//		rbat.Aggs[i], err = aggexec.CopyAggFuncExec(aggMemoryManager, agg)
   283  	//		if err != nil {
   284  	//			rbat.Clean(mp)
   285  	//			return nil, err
   286  	//		}
   287  	//	}
   288  	//}
   289  	// if bat.AuxData != nil {
   290  	// 	if m, ok := bat.AuxData.(*hashmap.JoinMap); ok {
   291  	// rbat.AuxData = &hashmap.JoinMap{
   292  	// 	cnt: m
   293  	// }
   294  	// 	}
   295  	// }
   296  	return rbat, nil
   297  }
   298  
   299  func (bat *Batch) PreExtend(m *mpool.MPool, rows int) error {
   300  	for i := range bat.Vecs {
   301  		if err := bat.Vecs[i].PreExtend(rows, m); err != nil {
   302  			return err
   303  		}
   304  	}
   305  	return nil
   306  }
   307  
   308  func (bat *Batch) AppendWithCopy(ctx context.Context, mh *mpool.MPool, b *Batch) (*Batch, error) {
   309  	if bat == nil {
   310  		return b.Dup(mh)
   311  	}
   312  	if len(bat.Vecs) != len(b.Vecs) {
   313  		return nil, moerr.NewInternalError(ctx, "unexpected error happens in batch append")
   314  	}
   315  	if len(bat.Vecs) == 0 {
   316  		return bat, nil
   317  	}
   318  
   319  	for i := range bat.Vecs {
   320  		if err := bat.Vecs[i].UnionBatch(b.Vecs[i], 0, b.Vecs[i].Length(), nil, mh); err != nil {
   321  			return bat, err
   322  		}
   323  		bat.Vecs[i].SetSorted(false)
   324  	}
   325  	bat.rowCount += b.rowCount
   326  	return bat, nil
   327  }
   328  
   329  func (bat *Batch) Append(ctx context.Context, mh *mpool.MPool, b *Batch) (*Batch, error) {
   330  	if bat == nil {
   331  		return b, nil
   332  	}
   333  	if len(bat.Vecs) != len(b.Vecs) {
   334  		return nil, moerr.NewInternalError(ctx, "unexpected error happens in batch append")
   335  	}
   336  	if len(bat.Vecs) == 0 {
   337  		return bat, nil
   338  	}
   339  
   340  	for i := range bat.Vecs {
   341  		if err := bat.Vecs[i].UnionBatch(b.Vecs[i], 0, b.Vecs[i].Length(), nil, mh); err != nil {
   342  			return bat, err
   343  		}
   344  		bat.Vecs[i].SetSorted(false)
   345  	}
   346  	bat.rowCount += b.rowCount
   347  	return bat, nil
   348  }
   349  
   350  func (bat *Batch) AddRowCount(rowCount int) {
   351  	bat.rowCount += rowCount
   352  }
   353  
   354  func (bat *Batch) SetRowCount(rowCount int) {
   355  	bat.rowCount = rowCount
   356  }
   357  
   358  func (bat *Batch) AddCnt(cnt int) {
   359  	atomic.AddInt64(&bat.Cnt, int64(cnt))
   360  }
   361  
   362  // func (bat *Batch) SubCnt(cnt int) {
   363  // 	atomic.StoreInt64(&bat.Cnt, bat.Cnt-int64(cnt))
   364  // }
   365  
   366  func (bat *Batch) SetCnt(cnt int64) {
   367  	atomic.StoreInt64(&bat.Cnt, cnt)
   368  }
   369  
   370  func (bat *Batch) GetCnt() int64 {
   371  	return atomic.LoadInt64(&bat.Cnt)
   372  }
   373  
   374  func (bat *Batch) ReplaceVector(oldVec *vector.Vector, newVec *vector.Vector) {
   375  	for i, vec := range bat.Vecs {
   376  		if vec == oldVec {
   377  			bat.SetVector(int32(i), newVec)
   378  		}
   379  	}
   380  }
   381  
   382  func (bat *Batch) IsEmpty() bool {
   383  	return bat.rowCount == 0 && bat.AuxData == nil && len(bat.Aggs) == 0
   384  }
   385  
   386  func (bat *Batch) DupJmAuxData() (ret *hashmap.JoinMap) {
   387  	if bat.AuxData == nil {
   388  		return
   389  	}
   390  	jm := bat.AuxData.(*hashmap.JoinMap)
   391  	if jm.IsDup() {
   392  		ret = jm.Dup()
   393  	} else {
   394  		ret = jm
   395  		bat.AuxData = nil
   396  	}
   397  	return
   398  }