github.com/matrixorigin/matrixone@v0.7.0/pkg/container/batch/batch.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package batch
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"sync/atomic"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/sql/colexec/agg"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    27  	"github.com/matrixorigin/matrixone/pkg/container/index"
    28  	"github.com/matrixorigin/matrixone/pkg/container/types"
    29  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    30  	"github.com/matrixorigin/matrixone/pkg/util/fault"
    31  	"github.com/matrixorigin/matrixone/pkg/vectorize/shuffle"
    32  )
    33  
    34  func New(ro bool, attrs []string) *Batch {
    35  	return &Batch{
    36  		Ro:    ro,
    37  		Attrs: attrs,
    38  		Vecs:  make([]*vector.Vector, len(attrs)),
    39  	}
    40  }
    41  
    42  func Reorder(bat *Batch, attrs []string) {
    43  	if bat.Ro {
    44  		Cow(bat)
    45  	}
    46  	for i, name := range attrs {
    47  		for j, attr := range bat.Attrs {
    48  			if name == attr {
    49  				bat.Vecs[i], bat.Vecs[j] = bat.Vecs[j], bat.Vecs[i]
    50  				bat.Attrs[i], bat.Attrs[j] = bat.Attrs[j], bat.Attrs[i]
    51  			}
    52  		}
    53  	}
    54  }
    55  
    56  func SetLength(bat *Batch, n int) {
    57  	for _, vec := range bat.Vecs {
    58  		vector.SetLength(vec, n)
    59  	}
    60  	bat.Zs = bat.Zs[:n]
    61  }
    62  
    63  func Length(bat *Batch) int {
    64  	return len(bat.Zs)
    65  }
    66  
    67  func Cow(bat *Batch) {
    68  	attrs := make([]string, len(bat.Attrs))
    69  	copy(attrs, bat.Attrs)
    70  	bat.Ro = false
    71  	bat.Attrs = attrs
    72  }
    73  
    74  func NewWithSize(n int) *Batch {
    75  	return &Batch{
    76  		Cnt:  1,
    77  		Vecs: make([]*vector.Vector, n),
    78  	}
    79  }
    80  
    81  func (info *aggInfo) MarshalBinary() ([]byte, error) {
    82  	var buf bytes.Buffer
    83  	i32 := int32(info.Op)
    84  	buf.Write(types.EncodeInt32(&i32))
    85  	buf.Write(types.EncodeBool(&info.Dist))
    86  	buf.Write(types.EncodeType(&info.inputTypes))
    87  	data, err := types.Encode(info.Agg)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  	buf.Write(data)
    92  	return buf.Bytes(), nil
    93  }
    94  
    95  func (info *aggInfo) UnmarshalBinary(data []byte) error {
    96  	info.Op = int(types.DecodeInt32(data[:4]))
    97  	data = data[4:]
    98  	info.Dist = types.DecodeBool(data[:1])
    99  	data = data[1:]
   100  	info.inputTypes = types.DecodeType(data[:types.TSize])
   101  	data = data[types.TSize:]
   102  	aggregate, err := agg.New(info.Op, info.Dist, info.inputTypes)
   103  	if err != nil {
   104  		return err
   105  	}
   106  	info.Agg = aggregate
   107  	return types.Decode(data, info.Agg)
   108  }
   109  
   110  func (bat *Batch) MarshalBinary() ([]byte, error) {
   111  	aggInfo := make([]aggInfo, len(bat.Aggs))
   112  	for i := range aggInfo {
   113  		aggInfo[i].Op = bat.Aggs[i].GetOperatorId()
   114  		aggInfo[i].inputTypes = bat.Aggs[i].GetInputTypes()[0]
   115  		aggInfo[i].Dist = bat.Aggs[i].IsDistinct()
   116  		aggInfo[i].Agg = bat.Aggs[i]
   117  	}
   118  	return types.Encode(&EncodeBatch{
   119  		Zs:       bat.Zs,
   120  		Vecs:     bat.Vecs,
   121  		Attrs:    bat.Attrs,
   122  		AggInfos: aggInfo,
   123  	})
   124  }
   125  
   126  func (bat *Batch) UnmarshalBinary(data []byte) error {
   127  	rbat := new(EncodeBatch)
   128  
   129  	if err := types.Decode(data, rbat); err != nil {
   130  		return err
   131  	}
   132  	bat.Cnt = 1
   133  	bat.Zs = rbat.Zs // if you drop rbat.Zs is ok, if you need return rbat,  you must deepcopy Zs.
   134  	bat.Vecs = rbat.Vecs
   135  	bat.Attrs = rbat.Attrs
   136  	bat.Aggs = make([]agg.Agg[any], len(rbat.AggInfos))
   137  	for i, info := range rbat.AggInfos {
   138  		bat.Aggs[i] = info.Agg
   139  	}
   140  	return nil
   141  }
   142  
   143  func (bat *Batch) ExpandNulls() {
   144  	if len(bat.Zs) > 0 {
   145  		for i := range bat.Vecs {
   146  			bat.Vecs[i].TryExpandNulls(len(bat.Zs))
   147  		}
   148  	}
   149  }
   150  
   151  func (bat *Batch) Shrink(sels []int64) {
   152  	mp := make(map[*vector.Vector]uint8)
   153  	for _, vec := range bat.Vecs {
   154  		if _, ok := mp[vec]; ok {
   155  			continue
   156  		}
   157  		mp[vec]++
   158  		vector.Shrink(vec, sels)
   159  	}
   160  	vs := bat.Zs
   161  	for i, sel := range sels {
   162  		vs[i] = vs[sel]
   163  	}
   164  	bat.Zs = bat.Zs[:len(sels)]
   165  }
   166  
   167  func (bat *Batch) Shuffle(sels []int64, m *mpool.MPool) error {
   168  	if len(sels) > 0 {
   169  		mp := make(map[*vector.Vector]uint8)
   170  		for _, vec := range bat.Vecs {
   171  			if _, ok := mp[vec]; ok {
   172  				continue
   173  			}
   174  			mp[vec]++
   175  			if err := vector.Shuffle(vec, sels, m); err != nil {
   176  				return err
   177  			}
   178  		}
   179  
   180  		ws := make([]int64, len(sels))
   181  		bat.Zs = shuffle.FixedLengthShuffle(bat.Zs, ws, sels)
   182  	}
   183  	return nil
   184  }
   185  
   186  func (bat *Batch) Size() int {
   187  	var size int
   188  
   189  	for _, vec := range bat.Vecs {
   190  		size += vec.Size()
   191  	}
   192  	return size
   193  }
   194  
   195  func (bat *Batch) Length() int {
   196  	return len(bat.Zs)
   197  }
   198  
   199  func (bat *Batch) VectorCount() int {
   200  	return len(bat.Vecs)
   201  }
   202  
   203  func (bat *Batch) Prefetch(poses []int32, vecs []*vector.Vector) {
   204  	for i, pos := range poses {
   205  		vecs[i] = bat.GetVector(pos)
   206  	}
   207  }
   208  
   209  func (bat *Batch) SetAttributes(attrs []string) {
   210  	bat.Attrs = attrs
   211  }
   212  
   213  func (bat *Batch) SetVector(pos int32, vec *vector.Vector) {
   214  	bat.Vecs[pos] = vec
   215  }
   216  
   217  func (bat *Batch) GetVector(pos int32) *vector.Vector {
   218  	return bat.Vecs[pos]
   219  }
   220  
   221  func (bat *Batch) GetSubBatch(cols []string) *Batch {
   222  	mp := make(map[string]int)
   223  	for i, attr := range bat.Attrs {
   224  		mp[attr] = i
   225  	}
   226  	rbat := NewWithSize(len(cols))
   227  	for i, col := range cols {
   228  		rbat.Vecs[i] = bat.Vecs[mp[col]]
   229  	}
   230  	rbat.Zs = append([]int64{}, bat.Zs...)
   231  	return rbat
   232  }
   233  
   234  func (bat *Batch) Clean(m *mpool.MPool) {
   235  	if atomic.AddInt64(&bat.Cnt, -1) != 0 {
   236  		return
   237  	}
   238  	for _, vec := range bat.Vecs {
   239  		if vec != nil {
   240  			vec.Free(m)
   241  			if vec.IsLowCardinality() {
   242  				vec.Index().(*index.LowCardinalityIndex).Free()
   243  			}
   244  		}
   245  	}
   246  	for _, agg := range bat.Aggs {
   247  		if agg != nil {
   248  			agg.Free(m)
   249  		}
   250  	}
   251  	if len(bat.Zs) != 0 {
   252  		m.PutSels(bat.Zs)
   253  		bat.Zs = nil
   254  	}
   255  	bat.Vecs = nil
   256  }
   257  
   258  func (bat *Batch) String() string {
   259  	var buf bytes.Buffer
   260  
   261  	for i, vec := range bat.Vecs {
   262  		buf.WriteString(fmt.Sprintf("%v\n", i))
   263  		if len(bat.Zs) > 0 {
   264  			buf.WriteString(fmt.Sprintf("\t%s\n", vec))
   265  		}
   266  	}
   267  	return buf.String()
   268  }
   269  
   270  func (bat *Batch) Append(ctx context.Context, mh *mpool.MPool, b *Batch) (*Batch, error) {
   271  	if bat == nil {
   272  		return b, nil
   273  	}
   274  	if len(bat.Vecs) != len(b.Vecs) {
   275  		return nil, moerr.NewInternalError(ctx, "unexpected error happens in batch append")
   276  	}
   277  	if len(bat.Vecs) == 0 {
   278  		return bat, nil
   279  	}
   280  
   281  	// XXX Here is a good place to trigger an panic for fault injection.
   282  	// fault.AddFaultPoint("panic_in_batch_append", ":::", "PANIC", 0, "")
   283  	fault.TriggerFault("panic_in_batch_append")
   284  
   285  	flags := make([]uint8, vector.Length(b.Vecs[0]))
   286  	for i := range flags {
   287  		flags[i]++
   288  	}
   289  	for i := range bat.Vecs {
   290  		if err := vector.UnionBatch(bat.Vecs[i], b.Vecs[i], 0, vector.Length(b.Vecs[i]), flags[:vector.Length(b.Vecs[i])], mh); err != nil {
   291  			return bat, err
   292  		}
   293  		if b.Vecs[i].IsLowCardinality() {
   294  			idx := b.Vecs[i].Index().(*index.LowCardinalityIndex)
   295  			if bat.Vecs[i].Index() == nil {
   296  				bat.Vecs[i].SetIndex(idx.Dup())
   297  			} else {
   298  				appendIdx := bat.Vecs[i].Index().(*index.LowCardinalityIndex)
   299  				dst, src := appendIdx.GetPoses(), idx.GetPoses()
   300  				if err := vector.UnionBatch(dst, src, 0, vector.Length(src), flags[:vector.Length(src)], mh); err != nil {
   301  					return bat, err
   302  				}
   303  			}
   304  		}
   305  	}
   306  	bat.Zs = append(bat.Zs, b.Zs...)
   307  	return bat, nil
   308  }
   309  
   310  // XXX I will slowly remove all code that uses InitZsone.
   311  func (bat *Batch) SetZs(len int, m *mpool.MPool) {
   312  	bat.Zs = m.GetSels()
   313  	for i := 0; i < len; i++ {
   314  		bat.Zs = append(bat.Zs, 1)
   315  	}
   316  }
   317  
   318  // InitZsOne init Batch.Zs and values are all 1
   319  func (bat *Batch) InitZsOne(len int) {
   320  	bat.Zs = make([]int64, len)
   321  	for i := range bat.Zs {
   322  		bat.Zs[i]++
   323  	}
   324  }