github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/mergetop/top.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mergetop
    16  
    17  import (
    18  	"bytes"
    19  	"container/heap"
    20  	"fmt"
    21  	"reflect"
    22  	"time"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/compare"
    26  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    27  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    29  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    30  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    31  )
    32  
    33  func String(arg any, buf *bytes.Buffer) {
    34  	ap := arg.(*Argument)
    35  	buf.WriteString("mergetop([")
    36  	for i, f := range ap.Fs {
    37  		if i > 0 {
    38  			buf.WriteString(", ")
    39  		}
    40  		buf.WriteString(f.String())
    41  	}
    42  	buf.WriteString(fmt.Sprintf("], %v)", ap.Limit))
    43  }
    44  
    45  func Prepare(proc *process.Process, arg any) error {
    46  	ap := arg.(*Argument)
    47  	ap.ctr = new(container)
    48  	if ap.Limit > 1024 {
    49  		ap.ctr.sels = make([]int64, 0, 1024)
    50  	} else {
    51  		ap.ctr.sels = make([]int64, 0, ap.Limit)
    52  	}
    53  	ap.ctr.poses = make([]int32, 0, len(ap.Fs))
    54  
    55  	ap.ctr.receiverListener = make([]reflect.SelectCase, len(proc.Reg.MergeReceivers))
    56  	for i, mr := range proc.Reg.MergeReceivers {
    57  		ap.ctr.receiverListener[i] = reflect.SelectCase{
    58  			Dir:  reflect.SelectRecv,
    59  			Chan: reflect.ValueOf(mr.Ch),
    60  		}
    61  	}
    62  	ap.ctr.aliveMergeReceiver = len(proc.Reg.MergeReceivers)
    63  	return nil
    64  }
    65  
    66  func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) {
    67  	anal := proc.GetAnalyze(idx)
    68  	anal.Start()
    69  	defer anal.Stop()
    70  	ap := arg.(*Argument)
    71  	ctr := ap.ctr
    72  
    73  	if ap.Limit == 0 {
    74  		ap.Free(proc, false)
    75  		proc.SetInputBatch(nil)
    76  		return true, nil
    77  	}
    78  
    79  	if err := ctr.build(ap, proc, anal, isFirst); err != nil {
    80  		ap.Free(proc, true)
    81  		return false, err
    82  	}
    83  
    84  	if ctr.bat == nil {
    85  		ap.Free(proc, false)
    86  		proc.SetInputBatch(nil)
    87  		return true, nil
    88  	}
    89  	err := ctr.eval(ap.Limit, proc, anal, isLast)
    90  	ap.Free(proc, err != nil)
    91  	return err == nil, err
    92  }
    93  
    94  func (ctr *container) build(ap *Argument, proc *process.Process, anal process.Analyze, isFirst bool) error {
    95  	for {
    96  		if ctr.aliveMergeReceiver == 0 {
    97  			return nil
    98  		}
    99  
   100  		start := time.Now()
   101  		chosen, value, ok := reflect.Select(ctr.receiverListener)
   102  		if !ok {
   103  			return moerr.NewInternalError(proc.Ctx, "pipeline closed unexpectedly")
   104  		}
   105  		anal.WaitStop(start)
   106  
   107  		pointer := value.UnsafePointer()
   108  		bat := (*batch.Batch)(pointer)
   109  		if bat == nil {
   110  			ctr.receiverListener = append(ctr.receiverListener[:chosen], ctr.receiverListener[chosen+1:]...)
   111  			ctr.aliveMergeReceiver--
   112  			continue
   113  		}
   114  
   115  		if bat.Length() == 0 {
   116  			continue
   117  		}
   118  
   119  		anal.Input(bat, isFirst)
   120  
   121  		ctr.n = len(bat.Vecs)
   122  		ctr.poses = ctr.poses[:0]
   123  		for _, f := range ap.Fs {
   124  			vec, err := colexec.EvalExpr(bat, proc, f.Expr)
   125  			if err != nil {
   126  				return err
   127  			}
   128  			flg := true
   129  			for i := range bat.Vecs {
   130  				if bat.Vecs[i] == vec {
   131  					flg = false
   132  					ctr.poses = append(ctr.poses, int32(i))
   133  					break
   134  				}
   135  			}
   136  			if flg {
   137  				ctr.poses = append(ctr.poses, int32(len(bat.Vecs)))
   138  				bat.Vecs = append(bat.Vecs, vec)
   139  			} else {
   140  				if vec != nil {
   141  					anal.Alloc(int64(vec.Size()))
   142  				}
   143  			}
   144  		}
   145  		if ctr.bat == nil {
   146  			mp := make(map[int]int)
   147  			for i, pos := range ctr.poses {
   148  				mp[int(pos)] = i
   149  			}
   150  			ctr.bat = batch.NewWithSize(len(bat.Vecs))
   151  			for i, vec := range bat.Vecs {
   152  				ctr.bat.Vecs[i] = vector.New(vec.Typ)
   153  			}
   154  			ctr.cmps = make([]compare.Compare, len(bat.Vecs))
   155  			for i := range ctr.cmps {
   156  				var desc, nullsLast bool
   157  				if pos, ok := mp[i]; ok {
   158  					desc = ap.Fs[pos].Flag&plan.OrderBySpec_DESC != 0
   159  					if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_FIRST != 0 {
   160  						nullsLast = false
   161  					} else if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_LAST != 0 {
   162  						nullsLast = true
   163  					} else {
   164  						nullsLast = desc
   165  					}
   166  				}
   167  				ctr.cmps[i] = compare.New(bat.Vecs[i].Typ, desc, nullsLast)
   168  			}
   169  		}
   170  		if err := ctr.processBatch(ap.Limit, bat, proc); err != nil {
   171  			bat.Clean(proc.Mp())
   172  			return err
   173  		}
   174  		bat.Clean(proc.Mp())
   175  	}
   176  }
   177  
   178  func (ctr *container) processBatch(limit int64, bat *batch.Batch, proc *process.Process) error {
   179  	var start int64
   180  
   181  	length := int64(len(bat.Zs))
   182  	if n := int64(len(ctr.sels)); n < limit {
   183  		start = limit - n
   184  		if start > length {
   185  			start = length
   186  		}
   187  		for i := int64(0); i < start; i++ {
   188  			for j, vec := range ctr.bat.Vecs {
   189  				if err := vector.UnionOne(vec, bat.Vecs[j], i, proc.Mp()); err != nil {
   190  					return err
   191  				}
   192  			}
   193  			ctr.sels = append(ctr.sels, n)
   194  			ctr.bat.Zs = append(ctr.bat.Zs, bat.Zs[i])
   195  			n++
   196  		}
   197  		if n == limit {
   198  			ctr.sort()
   199  		}
   200  	}
   201  	if start == length {
   202  		return nil
   203  	}
   204  
   205  	// bat is still have items
   206  	for i, cmp := range ctr.cmps {
   207  		cmp.Set(1, bat.Vecs[i])
   208  	}
   209  	for i, j := start, length; i < j; i++ {
   210  		if ctr.compare(1, 0, i, ctr.sels[0]) < 0 {
   211  			for _, cmp := range ctr.cmps {
   212  				if err := cmp.Copy(1, 0, i, ctr.sels[0], proc); err != nil {
   213  					return err
   214  				}
   215  				ctr.bat.Zs[0] = bat.Zs[i]
   216  			}
   217  			heap.Fix(ctr, 0)
   218  		}
   219  	}
   220  	return nil
   221  }
   222  
   223  func (ctr *container) eval(limit int64, proc *process.Process, anal process.Analyze, isLast bool) error {
   224  	if int64(len(ctr.sels)) < limit {
   225  		ctr.sort()
   226  	}
   227  	for i, cmp := range ctr.cmps {
   228  		ctr.bat.Vecs[i] = cmp.Vector()
   229  	}
   230  	sels := make([]int64, len(ctr.sels))
   231  	for i, j := 0, len(ctr.sels); i < j; i++ {
   232  		sels[len(sels)-1-i] = heap.Pop(ctr).(int64)
   233  	}
   234  	if err := ctr.bat.Shuffle(sels, proc.Mp()); err != nil {
   235  		return err
   236  	}
   237  	for i := ctr.n; i < len(ctr.bat.Vecs); i++ {
   238  		vector.Clean(ctr.bat.Vecs[i], proc.Mp())
   239  	}
   240  	ctr.bat.Vecs = ctr.bat.Vecs[:ctr.n]
   241  	ctr.bat.ExpandNulls()
   242  	anal.Output(ctr.bat, isLast)
   243  	proc.SetInputBatch(ctr.bat)
   244  	ctr.bat = nil
   245  	return nil
   246  }
   247  
   248  // do sort work for heap, and result order will be set in container.sels
   249  func (ctr *container) sort() {
   250  	for i, cmp := range ctr.cmps {
   251  		cmp.Set(0, ctr.bat.Vecs[i])
   252  	}
   253  	heap.Init(ctr)
   254  }