github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/process/process.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package process
    16  
    17  import (
    18  	"context"
    19  	"sync/atomic"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/logservice"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    27  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    28  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    29  	"github.com/matrixorigin/matrixone/pkg/container/types"
    30  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    31  	"github.com/matrixorigin/matrixone/pkg/defines"
    32  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    33  	"github.com/matrixorigin/matrixone/pkg/incrservice"
    34  	"github.com/matrixorigin/matrixone/pkg/lockservice"
    35  	qclient "github.com/matrixorigin/matrixone/pkg/queryservice/client"
    36  	"github.com/matrixorigin/matrixone/pkg/txn/client"
    37  	"github.com/matrixorigin/matrixone/pkg/udf"
    38  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    39  )
    40  
    41  const DefaultBatchSize = 8192
    42  
    43  // New creates a new Process.
    44  // A process stores the execution context.
    45  func New(
    46  	ctx context.Context,
    47  	m *mpool.MPool,
    48  	txnClient client.TxnClient,
    49  	txnOperator client.TxnOperator,
    50  	fileService fileservice.FileService,
    51  	lockService lockservice.LockService,
    52  	queryClient qclient.QueryClient,
    53  	hakeeper logservice.CNHAKeeperClient,
    54  	udfService udf.Service,
    55  	aicm *defines.AutoIncrCacheManager) *Process {
    56  	return &Process{
    57  		mp:           m,
    58  		Ctx:          ctx,
    59  		TxnClient:    txnClient,
    60  		TxnOperator:  txnOperator,
    61  		FileService:  fileService,
    62  		IncrService:  incrservice.GetAutoIncrementService(ctx),
    63  		UnixTime:     time.Now().UnixNano(),
    64  		LastInsertID: new(uint64),
    65  		LockService:  lockService,
    66  		Aicm:         aicm,
    67  		vp: &vectorPool{
    68  			vecs:  make(map[uint8][]*vector.Vector),
    69  			Limit: VectorLimit,
    70  		},
    71  		valueScanBatch: make(map[[16]byte]*batch.Batch),
    72  		QueryClient:    queryClient,
    73  		Hakeeper:       hakeeper,
    74  		UdfService:     udfService,
    75  	}
    76  }
    77  
    78  func NewWithAnalyze(p *Process, ctx context.Context, regNumber int, anals []*AnalyzeInfo) *Process {
    79  	proc := NewFromProc(p, ctx, regNumber)
    80  	proc.AnalInfos = make([]*AnalyzeInfo, len(anals))
    81  	copy(proc.AnalInfos, anals)
    82  	return proc
    83  }
    84  
    85  // NewFromProc create a new Process based on another process.
    86  func NewFromProc(p *Process, ctx context.Context, regNumber int) *Process {
    87  	proc := new(Process)
    88  	newctx, cancel := context.WithCancel(ctx)
    89  	proc.Id = p.Id
    90  	proc.vp = p.vp
    91  	proc.mp = p.Mp()
    92  	proc.prepareBatch = p.prepareBatch
    93  	proc.prepareExprList = p.prepareExprList
    94  	proc.Lim = p.Lim
    95  	proc.TxnClient = p.TxnClient
    96  	proc.TxnOperator = p.TxnOperator
    97  	proc.AnalInfos = p.AnalInfos
    98  	proc.SessionInfo = p.SessionInfo
    99  	proc.FileService = p.FileService
   100  	proc.IncrService = p.IncrService
   101  	proc.QueryClient = p.QueryClient
   102  	proc.Hakeeper = p.Hakeeper
   103  	proc.UdfService = p.UdfService
   104  	proc.UnixTime = p.UnixTime
   105  	proc.LastInsertID = p.LastInsertID
   106  	proc.LockService = p.LockService
   107  	proc.Aicm = p.Aicm
   108  	proc.LoadTag = p.LoadTag
   109  	proc.MessageBoard = p.MessageBoard
   110  
   111  	proc.prepareParams = p.prepareParams
   112  	proc.resolveVariableFunc = p.resolveVariableFunc
   113  
   114  	// reg and cancel
   115  	proc.Ctx = newctx
   116  	proc.Cancel = cancel
   117  	proc.Reg.MergeReceivers = make([]*WaitRegister, regNumber)
   118  	for i := 0; i < regNumber; i++ {
   119  		proc.Reg.MergeReceivers[i] = &WaitRegister{
   120  			Ctx: newctx,
   121  			Ch:  make(chan *batch.Batch, 1),
   122  		}
   123  	}
   124  	proc.DispatchNotifyCh = make(chan WrapCs)
   125  	proc.LoadLocalReader = p.LoadLocalReader
   126  	proc.WaitPolicy = p.WaitPolicy
   127  	return proc
   128  }
   129  
   130  func (wreg *WaitRegister) CleanChannel(m *mpool.MPool) {
   131  	for len(wreg.Ch) > 0 {
   132  		bat := <-wreg.Ch
   133  		if bat != nil {
   134  			bat.Clean(m)
   135  		}
   136  	}
   137  }
   138  
   139  func (wreg *WaitRegister) MarshalBinary() ([]byte, error) {
   140  	return nil, nil
   141  }
   142  
   143  func (wreg *WaitRegister) UnmarshalBinary(_ []byte) error {
   144  	return nil
   145  }
   146  
   147  func (proc *Process) MarshalBinary() ([]byte, error) {
   148  	return nil, nil
   149  }
   150  
   151  func (proc *Process) UnmarshalBinary(_ []byte) error {
   152  	return nil
   153  }
   154  
   155  func (proc *Process) QueryId() string {
   156  	return proc.Id
   157  }
   158  
   159  func (proc *Process) SetQueryId(id string) {
   160  	proc.Id = id
   161  }
   162  
   163  // XXX MPOOL
   164  // Some times we call an expr eval function without a proc (test only?)
   165  // in that case, all expr eval code get an nil mp which is wrong.
   166  // so far the most cases come from
   167  // plan.ConstantFold -> colexec.EvalExpr, busted.
   168  // hack in a fall back mpool.  This is by design a Zero MP so that there
   169  // will not be real leaks, except we leak counters in globalStats
   170  var xxxProcMp = mpool.MustNewNoFixed("fallback_proc_mp")
   171  
   172  func (proc *Process) GetMPool() *mpool.MPool {
   173  	if proc == nil {
   174  		return xxxProcMp
   175  	}
   176  	return proc.mp
   177  }
   178  
   179  func (proc *Process) Mp() *mpool.MPool {
   180  	return proc.GetMPool()
   181  }
   182  
   183  func (proc *Process) GetPrepareParams() *vector.Vector {
   184  	return proc.prepareParams
   185  }
   186  
   187  func (proc *Process) SetPrepareParams(prepareParams *vector.Vector) {
   188  	proc.prepareParams = prepareParams
   189  }
   190  
   191  func (proc *Process) SetPrepareBatch(bat *batch.Batch) {
   192  	proc.prepareBatch = bat
   193  }
   194  
   195  func (proc *Process) GetPrepareBatch() *batch.Batch {
   196  	return proc.prepareBatch
   197  }
   198  
   199  func (proc *Process) SetPrepareExprList(exprList any) {
   200  	proc.prepareExprList = exprList
   201  }
   202  
   203  func (proc *Process) GetPrepareExprList() any {
   204  	return proc.prepareExprList
   205  }
   206  
   207  func (proc *Process) OperatorOutofMemory(size int64) bool {
   208  	return proc.Mp().Cap() < size
   209  }
   210  
   211  func (proc *Process) SetInputBatch(bat *batch.Batch) {
   212  	proc.Reg.InputBatch = bat
   213  }
   214  
   215  func (proc *Process) InputBatch() *batch.Batch {
   216  	return proc.Reg.InputBatch
   217  }
   218  
   219  func (proc *Process) ResetContextFromParent(parent context.Context) context.Context {
   220  	newctx, cancel := context.WithCancel(parent)
   221  
   222  	proc.Ctx = newctx
   223  	proc.Cancel = cancel
   224  
   225  	for i := range proc.Reg.MergeReceivers {
   226  		proc.Reg.MergeReceivers[i].Ctx = newctx
   227  	}
   228  	return newctx
   229  }
   230  
   231  func (proc *Process) GetAnalyze(idx, parallelIdx int, parallelMajor bool) Analyze {
   232  	if idx >= len(proc.AnalInfos) || idx < 0 {
   233  		return &analyze{analInfo: nil, parallelIdx: parallelIdx, parallelMajor: parallelMajor}
   234  	}
   235  	return &analyze{analInfo: proc.AnalInfos[idx], wait: 0, parallelIdx: parallelIdx, parallelMajor: parallelMajor}
   236  }
   237  
   238  func (proc *Process) AllocVectorOfRows(typ types.Type, nele int, nsp *nulls.Nulls) (*vector.Vector, error) {
   239  	vec := proc.GetVector(typ)
   240  	err := vec.PreExtend(nele, proc.Mp())
   241  	if err != nil {
   242  		return nil, err
   243  	}
   244  	vec.SetLength(nele)
   245  	if nsp != nil {
   246  		nulls.Set(vec.GetNulls(), nsp)
   247  	}
   248  	return vec, nil
   249  }
   250  
   251  func (proc *Process) WithSpanContext(sc trace.SpanContext) {
   252  	proc.Ctx = trace.ContextWithSpanContext(proc.Ctx, sc)
   253  }
   254  
   255  func (proc *Process) CopyValueScanBatch(src *Process) {
   256  	proc.valueScanBatch = src.valueScanBatch
   257  }
   258  
   259  func (proc *Process) SetVectorPoolSize(limit int) {
   260  	proc.vp.Limit = limit
   261  }
   262  
   263  func (proc *Process) CopyVectorPool(src *Process) {
   264  	proc.vp = src.vp
   265  }
   266  
   267  func (proc *Process) NewBatchFromSrc(src *batch.Batch, preAllocSize int) (*batch.Batch, error) {
   268  	bat := batch.NewWithSize(len(src.Vecs))
   269  	bat.SetAttributes(src.Attrs)
   270  	bat.Recursive = src.Recursive
   271  	for i := range bat.Vecs {
   272  		v := proc.GetVector(*src.Vecs[i].GetType())
   273  		if v.Capacity() < preAllocSize {
   274  			err := v.PreExtend(preAllocSize, proc.Mp())
   275  			if err != nil {
   276  				return nil, err
   277  			}
   278  		}
   279  		bat.Vecs[i] = v
   280  	}
   281  	return bat, nil
   282  }
   283  
   284  func (proc *Process) AppendToFixedSizeFromOffset(dst *batch.Batch, src *batch.Batch, offset int) (*batch.Batch, int, error) {
   285  	var err error
   286  	if dst == nil {
   287  		dst, err = proc.NewBatchFromSrc(src, 0)
   288  		if err != nil {
   289  			return nil, 0, err
   290  		}
   291  	}
   292  	if dst.RowCount() >= DefaultBatchSize {
   293  		panic("can't call AppendToFixedSizeFromOffset when batch is full!")
   294  	}
   295  	if len(dst.Vecs) != len(src.Vecs) {
   296  		return nil, 0, moerr.NewInternalError(proc.Ctx, "unexpected error happens in batch append")
   297  	}
   298  	length := DefaultBatchSize - dst.RowCount()
   299  	if length+offset > src.RowCount() {
   300  		length = src.RowCount() - offset
   301  	}
   302  	for i := range dst.Vecs {
   303  		if err = dst.Vecs[i].UnionBatch(src.Vecs[i], int64(offset), length, nil, proc.Mp()); err != nil {
   304  			return dst, 0, err
   305  		}
   306  		dst.Vecs[i].SetSorted(false)
   307  	}
   308  	dst.AddRowCount(length)
   309  	return dst, length, nil
   310  }
   311  
   312  func (proc *Process) PutBatch(bat *batch.Batch) {
   313  	if bat == batch.EmptyBatch {
   314  		return
   315  	}
   316  	if atomic.LoadInt64(&bat.Cnt) == 0 {
   317  		panic("put batch with zero cnt")
   318  	}
   319  	if atomic.AddInt64(&bat.Cnt, -1) > 0 {
   320  		return
   321  	}
   322  	for _, vec := range bat.Vecs {
   323  		if vec != nil {
   324  			// very large vectors should not put back into pool, which cause these memory can not release.
   325  			// XXX I left the old logic here. But it's unreasonable to use the number of rows to determine if a vector's size.
   326  			// use Allocated() may suitable.
   327  			if vec.IsConst() || vec.NeedDup() || vec.Allocated() > 8192*64 {
   328  				vec.Free(proc.mp)
   329  				bat.ReplaceVector(vec, nil)
   330  				continue
   331  			}
   332  
   333  			if !proc.vp.putVector(vec) {
   334  				vec.Free(proc.mp)
   335  			}
   336  			bat.ReplaceVector(vec, nil)
   337  		}
   338  	}
   339  	for _, agg := range bat.Aggs {
   340  		if agg != nil {
   341  			agg.Free()
   342  		}
   343  	}
   344  	bat.Vecs = nil
   345  	bat.Attrs = nil
   346  	bat.SetRowCount(0)
   347  }
   348  
   349  func (proc *Process) FreeVectors() {
   350  	proc.vp.freeVectors(proc.Mp())
   351  }
   352  
   353  func (proc *Process) PutVector(vec *vector.Vector) {
   354  	if !proc.vp.putVector(vec) {
   355  		vec.Free(proc.Mp())
   356  	}
   357  }
   358  
   359  func (proc *Process) GetVector(typ types.Type) *vector.Vector {
   360  	if vec := proc.vp.getVector(typ); vec != nil {
   361  		vec.Reset(typ)
   362  		return vec
   363  	}
   364  	return vector.NewVec(typ)
   365  }
   366  
   367  func (vp *vectorPool) freeVectors(mp *mpool.MPool) {
   368  	vp.Lock()
   369  	defer vp.Unlock()
   370  	for k, vecs := range vp.vecs {
   371  		for _, vec := range vecs {
   372  			vec.Free(mp)
   373  		}
   374  		delete(vp.vecs, k)
   375  	}
   376  }
   377  
   378  func (vp *vectorPool) putVector(vec *vector.Vector) bool {
   379  	vp.Lock()
   380  	defer vp.Unlock()
   381  	key := uint8(vec.GetType().Oid)
   382  	if len(vp.vecs[key]) >= vp.Limit {
   383  		return false
   384  	}
   385  	vp.vecs[key] = append(vp.vecs[key], vec)
   386  	return true
   387  }
   388  
   389  func (vp *vectorPool) getVector(typ types.Type) *vector.Vector {
   390  	vp.Lock()
   391  	defer vp.Unlock()
   392  	key := uint8(typ.Oid)
   393  	if vecs := vp.vecs[key]; len(vecs) > 0 {
   394  		vec := vecs[len(vecs)-1]
   395  		vp.vecs[key] = vecs[:len(vecs)-1]
   396  		return vec
   397  	}
   398  	return nil
   399  }