github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/top/top.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package top
    16  
    17  import (
    18  	"bytes"
    19  	"container/heap"
    20  	"fmt"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/compare"
    23  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    24  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    25  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    26  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    27  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    28  )
    29  
    30  func String(arg any, buf *bytes.Buffer) {
    31  	ap := arg.(*Argument)
    32  	buf.WriteString("top([")
    33  	for i, f := range ap.Fs {
    34  		if i > 0 {
    35  			buf.WriteString(", ")
    36  		}
    37  		buf.WriteString(f.String())
    38  	}
    39  	buf.WriteString(fmt.Sprintf("], %v)", ap.Limit))
    40  }
    41  
    42  func Prepare(_ *process.Process, arg any) error {
    43  	ap := arg.(*Argument)
    44  	ap.ctr = new(container)
    45  	if ap.Limit > 1024 {
    46  		ap.ctr.sels = make([]int64, 0, 1024)
    47  	} else {
    48  		ap.ctr.sels = make([]int64, 0, ap.Limit)
    49  	}
    50  	ap.ctr.poses = make([]int32, 0, len(ap.Fs))
    51  	return nil
    52  }
    53  
    54  func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) {
    55  	ap := arg.(*Argument)
    56  	ctr := ap.ctr
    57  	anal := proc.GetAnalyze(idx)
    58  	anal.Start()
    59  	defer anal.Stop()
    60  	for {
    61  		switch ctr.state {
    62  		case Build:
    63  			bat := proc.InputBatch()
    64  			if bat == nil {
    65  				ctr.state = Eval
    66  				continue
    67  			}
    68  			if len(bat.Zs) == 0 {
    69  				return false, nil
    70  			}
    71  			if ap.Limit == 0 {
    72  				bat.Clean(proc.Mp())
    73  				proc.SetInputBatch(nil)
    74  				return true, nil
    75  			}
    76  			err := ctr.build(ap, bat, proc, anal)
    77  			if err != nil {
    78  				ap.Free(proc, true)
    79  			}
    80  			return false, err
    81  
    82  		case Eval:
    83  			if ctr.bat == nil {
    84  				proc.SetInputBatch(nil)
    85  				return true, nil
    86  			}
    87  			err := ctr.eval(ap.Limit, proc)
    88  			ap.Free(proc, err != nil)
    89  			return err == nil, err
    90  		}
    91  	}
    92  }
    93  
    94  func (ctr *container) build(ap *Argument, bat *batch.Batch, proc *process.Process, analyze process.Analyze) error {
    95  	ctr.n = len(bat.Vecs)
    96  	ctr.poses = ctr.poses[:0]
    97  	for _, f := range ap.Fs {
    98  		vec, err := colexec.EvalExpr(bat, proc, f.Expr)
    99  		if err != nil {
   100  			return err
   101  		}
   102  		flg := true
   103  		for i := range bat.Vecs {
   104  			if bat.Vecs[i] == vec {
   105  				flg = false
   106  				ctr.poses = append(ctr.poses, int32(i))
   107  				break
   108  			}
   109  		}
   110  		if flg {
   111  			ctr.poses = append(ctr.poses, int32(len(bat.Vecs)))
   112  			bat.Vecs = append(bat.Vecs, vec)
   113  		} else {
   114  			if vec != nil {
   115  				analyze.Alloc(int64(vec.Size()))
   116  			}
   117  		}
   118  	}
   119  	if ctr.bat == nil {
   120  		mp := make(map[int]int)
   121  		for i, pos := range ctr.poses {
   122  			mp[int(pos)] = i
   123  		}
   124  		ctr.bat = batch.NewWithSize(len(bat.Vecs))
   125  		for i, vec := range bat.Vecs {
   126  			ctr.bat.Vecs[i] = vector.New(vec.Typ)
   127  		}
   128  		ctr.cmps = make([]compare.Compare, len(bat.Vecs))
   129  		for i := range ctr.cmps {
   130  			var desc, nullsLast bool
   131  			if pos, ok := mp[i]; ok {
   132  				desc = ap.Fs[pos].Flag&plan.OrderBySpec_DESC != 0
   133  				if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_FIRST != 0 {
   134  					nullsLast = false
   135  				} else if ap.Fs[pos].Flag&plan.OrderBySpec_NULLS_LAST != 0 {
   136  					nullsLast = true
   137  				} else {
   138  					nullsLast = desc
   139  				}
   140  			}
   141  			ctr.cmps[i] = compare.New(bat.Vecs[i].Typ, desc, nullsLast)
   142  		}
   143  	}
   144  	defer bat.Clean(proc.Mp())
   145  	proc.Reg.InputBatch = &batch.Batch{}
   146  	return ctr.processBatch(ap.Limit, bat, proc)
   147  }
   148  
   149  func (ctr *container) processBatch(limit int64, bat *batch.Batch, proc *process.Process) error {
   150  	var start int64
   151  
   152  	length := int64(len(bat.Zs))
   153  	if n := int64(len(ctr.sels)); n < limit {
   154  		start = limit - n
   155  		if start > length {
   156  			start = length
   157  		}
   158  		for i := int64(0); i < start; i++ {
   159  			for j, vec := range ctr.bat.Vecs {
   160  				if err := vector.UnionOne(vec, bat.Vecs[j], i, proc.Mp()); err != nil {
   161  					return err
   162  				}
   163  			}
   164  			ctr.sels = append(ctr.sels, n)
   165  			ctr.bat.Zs = append(ctr.bat.Zs, bat.Zs[i])
   166  			n++
   167  		}
   168  		if n == limit {
   169  			ctr.sort()
   170  		}
   171  	}
   172  	if start == length {
   173  		return nil
   174  	}
   175  
   176  	// bat is still have items
   177  	for i, cmp := range ctr.cmps {
   178  		cmp.Set(1, bat.Vecs[i])
   179  	}
   180  	for i, j := start, length; i < j; i++ {
   181  		if ctr.compare(1, 0, i, ctr.sels[0]) < 0 {
   182  			for _, cmp := range ctr.cmps {
   183  				if err := cmp.Copy(1, 0, i, ctr.sels[0], proc); err != nil {
   184  					return err
   185  				}
   186  				ctr.bat.Zs[0] = bat.Zs[i]
   187  			}
   188  			heap.Fix(ctr, 0)
   189  		}
   190  	}
   191  	return nil
   192  }
   193  
   194  func (ctr *container) eval(limit int64, proc *process.Process) error {
   195  	if int64(len(ctr.sels)) < limit {
   196  		ctr.sort()
   197  	}
   198  	for i, cmp := range ctr.cmps {
   199  		ctr.bat.Vecs[i] = cmp.Vector()
   200  	}
   201  	sels := make([]int64, len(ctr.sels))
   202  	for i, j := 0, len(ctr.sels); i < j; i++ {
   203  		sels[len(sels)-1-i] = heap.Pop(ctr).(int64)
   204  	}
   205  	if err := ctr.bat.Shuffle(sels, proc.Mp()); err != nil {
   206  		return err
   207  	}
   208  	for i := ctr.n; i < len(ctr.bat.Vecs); i++ {
   209  		vector.Clean(ctr.bat.Vecs[i], proc.Mp())
   210  	}
   211  	ctr.bat.Vecs = ctr.bat.Vecs[:ctr.n]
   212  	ctr.bat.ExpandNulls()
   213  	proc.Reg.InputBatch = ctr.bat
   214  	ctr.bat = nil
   215  	return nil
   216  }
   217  
   218  // do sort work for heap, and result order will be set in container.sels
   219  func (ctr *container) sort() {
   220  	for i, cmp := range ctr.cmps {
   221  		cmp.Set(0, ctr.bat.Vecs[i])
   222  	}
   223  	heap.Init(ctr)
   224  }