github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/mergeorder/order.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mergeorder
    16  
    17  import (
    18  	"bytes"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/compare"
    21  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  	plan2 "github.com/matrixorigin/matrixone/pkg/pb/plan"
    25  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    26  	"github.com/matrixorigin/matrixone/pkg/sql/plan"
    27  	"github.com/matrixorigin/matrixone/pkg/vm"
    28  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    29  )
    30  
    31  const argName = "merge_order"
    32  
    33  func (ctr *container) mergeAndEvaluateOrderColumn(proc *process.Process, bat *batch.Batch) error {
    34  	ctr.batchList = append(ctr.batchList, bat)
    35  	ctr.orderCols = append(ctr.orderCols, nil)
    36  	// if only one batch, no need to evaluate the order column.
    37  	if len(ctr.batchList) == 1 {
    38  		return nil
    39  	}
    40  
    41  	index := len(ctr.orderCols) - 1
    42  	return ctr.evaluateOrderColumn(proc, index)
    43  }
    44  
    45  func (ctr *container) evaluateOrderColumn(proc *process.Process, index int) error {
    46  	inputs := []*batch.Batch{ctr.batchList[index]}
    47  
    48  	ctr.orderCols[index] = make([]*vector.Vector, len(ctr.executors))
    49  	for i := 0; i < len(ctr.executors); i++ {
    50  		vec, err := ctr.executors[i].EvalWithoutResultReusing(proc, inputs)
    51  		if err != nil {
    52  			return err
    53  		}
    54  		ctr.orderCols[index][i] = vec
    55  	}
    56  	return nil
    57  }
    58  
    59  func (ctr *container) generateCompares(fs []*plan.OrderBySpec) {
    60  	var desc, nullsLast bool
    61  
    62  	ctr.compares = make([]compare.Compare, len(fs))
    63  	for i := range ctr.compares {
    64  		desc = fs[i].Flag&plan2.OrderBySpec_DESC != 0
    65  		if fs[i].Flag&plan2.OrderBySpec_NULLS_FIRST != 0 {
    66  			nullsLast = false
    67  		} else if fs[i].Flag&plan2.OrderBySpec_NULLS_LAST != 0 {
    68  			nullsLast = true
    69  		} else {
    70  			nullsLast = desc
    71  		}
    72  
    73  		exprTyp := fs[i].Expr.Typ
    74  		typ := types.New(types.T(exprTyp.Id), exprTyp.Width, exprTyp.Scale)
    75  		ctr.compares[i] = compare.New(typ, desc, nullsLast)
    76  	}
    77  }
    78  
    79  func (ctr *container) pickAndSend(proc *process.Process, result *vm.CallResult) (sendOver bool, err error) {
    80  	if ctr.buf != nil {
    81  		proc.PutBatch(ctr.buf)
    82  		ctr.buf = nil
    83  	}
    84  	ctr.buf = batch.NewWithSize(ctr.batchList[0].VectorCount())
    85  	mp := proc.Mp()
    86  
    87  	for i := range ctr.buf.Vecs {
    88  		ctr.buf.Vecs[i] = proc.GetVector(*ctr.batchList[0].Vecs[i].GetType())
    89  	}
    90  
    91  	wholeLength := 0
    92  	for {
    93  		choice := ctr.pickFirstRow()
    94  		for j := range ctr.buf.Vecs {
    95  			err = ctr.buf.Vecs[j].UnionOne(ctr.batchList[choice].Vecs[j], ctr.indexList[choice], mp)
    96  			if err != nil {
    97  				return false, err
    98  			}
    99  		}
   100  
   101  		wholeLength++
   102  		ctr.indexList[choice]++
   103  		if ctr.indexList[choice] == int64(ctr.batchList[choice].RowCount()) {
   104  			ctr.removeBatch(proc, choice)
   105  		}
   106  
   107  		if len(ctr.indexList) == 0 {
   108  			sendOver = true
   109  			break
   110  		}
   111  		if ctr.buf.Size() >= maxBatchSizeToSend {
   112  			break
   113  		}
   114  	}
   115  	ctr.buf.SetRowCount(wholeLength)
   116  	result.Batch = ctr.buf
   117  	return sendOver, nil
   118  }
   119  
   120  func (ctr *container) pickFirstRow() (batIndex int) {
   121  	l := len(ctr.indexList)
   122  
   123  	if l > 1 {
   124  		i, j := 0, 1
   125  		for j < l {
   126  			for k := 0; k < len(ctr.compares); k++ {
   127  				ctr.compares[k].Set(0, ctr.orderCols[i][k])
   128  				ctr.compares[k].Set(1, ctr.orderCols[j][k])
   129  				result := ctr.compares[k].Compare(0, 1, ctr.indexList[i], ctr.indexList[j])
   130  				if result < 0 {
   131  					break
   132  				} else if result > 0 {
   133  					i = j
   134  					break
   135  				} else if k == len(ctr.compares)-1 {
   136  					break
   137  				}
   138  			}
   139  			j++
   140  		}
   141  		return i
   142  	}
   143  	return 0
   144  }
   145  
   146  func (ctr *container) removeBatch(proc *process.Process, index int) {
   147  	bat := ctr.batchList[index]
   148  	cols := ctr.orderCols[index]
   149  
   150  	alreadyPut := make(map[*vector.Vector]bool, len(bat.Vecs))
   151  	for i := range bat.Vecs {
   152  		proc.PutVector(bat.Vecs[i])
   153  		alreadyPut[bat.Vecs[i]] = true
   154  	}
   155  	ctr.batchList = append(ctr.batchList[:index], ctr.batchList[index+1:]...)
   156  	ctr.indexList = append(ctr.indexList[:index], ctr.indexList[index+1:]...)
   157  
   158  	for i := range cols {
   159  		if _, ok := alreadyPut[cols[i]]; ok {
   160  			continue
   161  		}
   162  		proc.PutVector(cols[i])
   163  	}
   164  	ctr.orderCols = append(ctr.orderCols[:index], ctr.orderCols[index+1:]...)
   165  }
   166  
   167  func (arg *Argument) String(buf *bytes.Buffer) {
   168  	buf.WriteString(argName)
   169  	ap := arg
   170  	buf.WriteString(": mergeorder([")
   171  	for i, f := range ap.OrderBySpecs {
   172  		if i > 0 {
   173  			buf.WriteString(", ")
   174  		}
   175  		buf.WriteString(f.String())
   176  	}
   177  	buf.WriteString("])")
   178  }
   179  
   180  func (arg *Argument) Prepare(proc *process.Process) (err error) {
   181  	ap := arg
   182  	ap.ctr = new(container)
   183  	ctr := ap.ctr
   184  	ap.ctr.InitReceiver(proc, true)
   185  
   186  	length := 2 * len(proc.Reg.MergeReceivers)
   187  	ctr.batchList = make([]*batch.Batch, 0, length)
   188  	ctr.orderCols = make([][]*vector.Vector, 0, length)
   189  
   190  	ap.ctr.executors = make([]colexec.ExpressionExecutor, len(ap.OrderBySpecs))
   191  	for i := range ap.ctr.executors {
   192  		ap.ctr.executors[i], err = colexec.NewExpressionExecutor(proc, ap.OrderBySpecs[i].Expr)
   193  		if err != nil {
   194  			return err
   195  		}
   196  	}
   197  	return nil
   198  }
   199  
   200  func (arg *Argument) Call(proc *process.Process) (vm.CallResult, error) {
   201  	if err, isCancel := vm.CancelCheck(proc); isCancel {
   202  		return vm.CancelResult, err
   203  	}
   204  
   205  	ap := arg
   206  	ctr := ap.ctr
   207  
   208  	anal := proc.GetAnalyze(arg.GetIdx(), arg.GetParallelIdx(), arg.GetParallelMajor())
   209  	anal.Start()
   210  	defer anal.Stop()
   211  	result := vm.NewCallResult()
   212  
   213  	for {
   214  		switch ctr.status {
   215  		case receiving:
   216  			bat, end, err := ctr.ReceiveFromAllRegs(anal)
   217  			if err != nil {
   218  				return result, err
   219  			}
   220  			if end {
   221  				// if number of block is less than 2, no need to do merge sort.
   222  				ctr.status = normalSending
   223  
   224  				if len(ctr.batchList) > 1 {
   225  					ctr.status = pickUpSending
   226  
   227  					// evaluate the first batch's order column.
   228  					if err = ctr.evaluateOrderColumn(proc, 0); err != nil {
   229  						return result, err
   230  					}
   231  					ctr.generateCompares(ap.OrderBySpecs)
   232  					ctr.indexList = make([]int64, len(ctr.batchList))
   233  				}
   234  				continue
   235  			}
   236  
   237  			if err = ctr.mergeAndEvaluateOrderColumn(proc, bat); err != nil {
   238  				return result, err
   239  			}
   240  
   241  		case normalSending:
   242  			if len(ctr.batchList) == 0 {
   243  				result.Batch = nil
   244  				result.Status = vm.ExecStop
   245  				return result, nil
   246  			}
   247  
   248  			// If only one batch, no need to sort. just send it.
   249  			if len(ctr.batchList) == 1 {
   250  				ctr.buf = ctr.batchList[0]
   251  				ctr.batchList[0] = nil
   252  				result.Batch = ctr.buf
   253  				result.Status = vm.ExecStop
   254  				return result, nil
   255  			}
   256  
   257  		case pickUpSending:
   258  			ok, err := ctr.pickAndSend(proc, &result)
   259  			if ok {
   260  				result.Status = vm.ExecStop
   261  				return result, err
   262  			}
   263  			return result, err
   264  		}
   265  	}
   266  }