github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/mergetop/top.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mergetop
    16  
    17  import (
    18  	"bytes"
    19  	"container/heap"
    20  	"fmt"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/compare"
    23  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    24  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    25  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    26  	"github.com/matrixorigin/matrixone/pkg/vm"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    28  )
    29  
    30  const argName = "merge_top"
    31  
    32  func (arg *Argument) String(buf *bytes.Buffer) {
    33  	buf.WriteString(argName)
    34  	ap := arg
    35  	buf.WriteString(": mergetop([")
    36  	for i, f := range ap.Fs {
    37  		if i > 0 {
    38  			buf.WriteString(", ")
    39  		}
    40  		buf.WriteString(f.String())
    41  	}
    42  	buf.WriteString(fmt.Sprintf("], %v)", ap.Limit))
    43  }
    44  
    45  func (arg *Argument) Prepare(proc *process.Process) (err error) {
    46  	ap := arg
    47  	ap.ctr = new(container)
    48  	ap.ctr.InitReceiver(proc, true)
    49  	if ap.Limit > 1024 {
    50  		ap.ctr.sels = make([]int64, 0, 1024)
    51  	} else {
    52  		ap.ctr.sels = make([]int64, 0, ap.Limit)
    53  	}
    54  	ap.ctr.poses = make([]int32, 0, len(ap.Fs))
    55  
    56  	ctr := ap.ctr
    57  	ctr.executorsForOrderList = make([]colexec.ExpressionExecutor, len(ap.Fs))
    58  	for i := range ctr.executorsForOrderList {
    59  		ctr.executorsForOrderList[i], err = colexec.NewExpressionExecutor(proc, ap.Fs[i].Expr)
    60  		if err != nil {
    61  			return err
    62  		}
    63  	}
    64  	return nil
    65  }
    66  
    67  func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) {
    68  	if err, isCancel := vm.CancelCheck(proc); isCancel {
    69  		return vm.CancelResult, err
    70  	}
    71  
    72  	anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor())
    73  	anal.Start()
    74  	defer anal.Stop()
    75  	ap := arg
    76  	ctr := ap.ctr
    77  	result := vm.NewCallResult()
    78  	if ap.Limit == 0 {
    79  		result.Batch = nil
    80  		result.Status = vm.ExecStop
    81  		return result, nil
    82  	}
    83  
    84  	if end, err := ctr.build(ap, proc, anal, arg.GetIsFirst()); err != nil {
    85  		return result, err
    86  	} else if end {
    87  		result.Status = vm.ExecStop
    88  		return result, nil
    89  	}
    90  
    91  	if ctr.bat == nil {
    92  		result.Batch = nil
    93  		result.Status = vm.ExecStop
    94  		return result, nil
    95  	}
    96  	err := ctr.eval(ap.Limit, proc, anal, arg.GetIsLast(), &result)
    97  	if err == nil {
    98  		result.Status = vm.ExecStop
    99  		return result, nil
   100  	}
   101  	return result, err
   102  }
   103  
   104  func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool) (bool, error) {
   105  	for {
   106  		bat, end, err := ctr.ReceiveFromAllRegs(anal)
   107  		if err != nil {
   108  			return true, nil
   109  		}
   110  		if end {
   111  			return false, nil
   112  		}
   113  
   114  		anal.Input(bat, isFirst)
   115  
   116  		ctr.n = len(bat.Vecs)
   117  		ctr.poses = ctr.poses[:0]
   118  		for i := range ctr.executorsForOrderList {
   119  			if ctr.executorsForOrderList[i].IsColumnExpr() {
   120  				colIndex := ctr.executorsForOrderList[i].(*colexec.ColumnExpressionExecutor).GetColIndex()
   121  				ctr.poses = append(ctr.poses, int32(colIndex))
   122  			} else {
   123  				vec, err := ctr.executorsForOrderList[i].EvalWithoutResultReusing(proc, []*batch.Batch{bat})
   124  				if err != nil {
   125  					return false, err
   126  				}
   127  				ctr.poses = append(ctr.poses, int32(len(bat.Vecs)))
   128  				bat.Vecs = append(bat.Vecs, vec)
   129  				anal.Alloc(int64(vec.Size()))
   130  			}
   131  		}
   132  
   133  		if ctr.bat == nil {
   134  			mp := make(map[int]int, len(ctr.poses))
   135  			for i, pos := range ctr.poses {
   136  				mp[int(pos)] = i
   137  			}
   138  			ctr.bat = batch.NewWithSize(len(bat.Vecs))
   139  			for i, vec := range bat.Vecs {
   140  				ctr.bat.Vecs[i] = proc.GetVector(*vec.GetType())
   141  			}
   142  			ctr.cmps = make([]compare.Compare, len(bat.Vecs))
   143  			for i := range ctr.cmps {
   144  				var desc, nullsLast bool
   145  				if pos, ok := mp[i]; ok {
   146  					desc = ap.Fs[pos].Flag&plan.OrderBySpec_DESC != 0
   147  					if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_FIRST != 0 {
   148  						nullsLast = false
   149  					} else if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_LAST != 0 {
   150  						nullsLast = true
   151  					} else {
   152  						nullsLast = desc
   153  					}
   154  				}
   155  				ctr.cmps[i] = compare.New(*bat.Vecs[i].GetType(), desc, nullsLast)
   156  			}
   157  		}
   158  
   159  		if err := ctr.processBatch(ap.Limit, bat, proc); err != nil {
   160  			bat.Clean(proc.Mp())
   161  			return false, err
   162  		}
   163  		proc.PutBatch(bat)
   164  	}
   165  }
   166  
   167  func (ctr *container) processBatch(limit int64, bat *batch.Batch, proc *process.Process) error {
   168  	var start int64
   169  
   170  	length := int64(bat.RowCount())
   171  	if n := int64(len(ctr.sels)); n < limit {
   172  		start = limit - n
   173  		if start > length {
   174  			start = length
   175  		}
   176  		for i := int64(0); i < start; i++ {
   177  			for j, vec := range ctr.bat.Vecs {
   178  				if err := vec.UnionOne(bat.Vecs[j], i, proc.Mp()); err != nil {
   179  					return err
   180  				}
   181  			}
   182  			ctr.sels = append(ctr.sels, n)
   183  			n++
   184  		}
   185  		ctr.bat.AddRowCount(bat.RowCount())
   186  		if n == limit {
   187  			ctr.sort()
   188  		}
   189  	}
   190  	if start == length {
   191  		return nil
   192  	}
   193  
   194  	// bat is still have items
   195  	for i, cmp := range ctr.cmps {
   196  		cmp.Set(1, bat.Vecs[i])
   197  	}
   198  	for i, j := start, length; i < j; i++ {
   199  		if ctr.compare(1, 0, i, ctr.sels[0]) < 0 {
   200  			for _, cmp := range ctr.cmps {
   201  				if err := cmp.Copy(1, 0, i, ctr.sels[0], proc); err != nil {
   202  					return err
   203  				}
   204  			}
   205  			heap.Fix(ctr, 0)
   206  		}
   207  	}
   208  	return nil
   209  }
   210  
   211  func (ctr *container) eval(limit int64, proc *process.Process, anal process.Analyze, isLast bool, result *vm.CallResult) error {
   212  	if int64(len(ctr.sels)) < limit {
   213  		ctr.sort()
   214  	}
   215  	for i, cmp := range ctr.cmps {
   216  		ctr.bat.Vecs[i] = cmp.Vector()
   217  	}
   218  	sels := make([]int64, len(ctr.sels))
   219  	for i, j := 0, len(ctr.sels); i < j; i++ {
   220  		sels[len(sels)-1-i] = heap.Pop(ctr).(int64)
   221  	}
   222  	if err := ctr.bat.Shuffle(sels, proc.Mp()); err != nil {
   223  		return err
   224  	}
   225  	for i := ctr.n; i < len(ctr.bat.Vecs); i++ {
   226  		ctr.bat.Vecs[i].Free(proc.Mp())
   227  	}
   228  	ctr.bat.Vecs = ctr.bat.Vecs[:ctr.n]
   229  	anal.Output(ctr.bat, isLast)
   230  	result.Batch = ctr.bat
   231  	return nil
   232  }
   233  
   234  // do sort work for heap, and result order will be set in container.sels
   235  func (ctr *container) sort() {
   236  	for i, cmp := range ctr.cmps {
   237  		cmp.Set(0, ctr.bat.Vecs[i])
   238  	}
   239  	heap.Init(ctr)
   240  }