github.com/matrixorigin/matrixone@v0.7.0/pkg/sql/colexec/mergeorder/order.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mergeorder
    16  
    17  import (
    18  	"bytes"
    19  	"reflect"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/compare"
    24  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    25  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    26  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    27  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    28  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    29  )
    30  
    31  func String(arg any, buf *bytes.Buffer) {
    32  	ap := arg.(*Argument)
    33  	buf.WriteString("mergeorder([")
    34  	for i, f := range ap.Fs {
    35  		if i > 0 {
    36  			buf.WriteString(", ")
    37  		}
    38  		buf.WriteString(f.String())
    39  	}
    40  	buf.WriteString("])")
    41  }
    42  
    43  func Prepare(proc *process.Process, arg any) error {
    44  	ap := arg.(*Argument)
    45  	ap.ctr = new(container)
    46  	ap.ctr.poses = make([]int32, 0, len(ap.Fs))
    47  
    48  	ap.ctr.receiverListener = make([]reflect.SelectCase, len(proc.Reg.MergeReceivers))
    49  	for i, mr := range proc.Reg.MergeReceivers {
    50  		ap.ctr.receiverListener[i] = reflect.SelectCase{
    51  			Dir:  reflect.SelectRecv,
    52  			Chan: reflect.ValueOf(mr.Ch),
    53  		}
    54  	}
    55  	ap.ctr.aliveMergeReceiver = len(proc.Reg.MergeReceivers)
    56  	ap.ctr.compare0Index = make([]int32, len(ap.Fs))
    57  	ap.ctr.compare1Index = make([]int32, len(ap.Fs))
    58  	return nil
    59  }
    60  
    61  func Call(idx int, proc *process.Process, arg any, isFirst bool, isLast bool) (bool, error) {
    62  	var bat *batch.Batch
    63  	var end bool
    64  	var err error
    65  
    66  	ap := arg.(*Argument)
    67  	ctr := ap.ctr
    68  	anal := proc.GetAnalyze(idx)
    69  	anal.Start()
    70  	defer anal.Stop()
    71  
    72  	// get batch from merge receivers and do merge sort.
    73  	// save the unordered result in ctr.bat.
    74  	// save the ordered index list in ctr.finalSelectList
    75  	for {
    76  		start := time.Now()
    77  		bat, end, err = receiveBatch(proc, ctr)
    78  		if err != nil {
    79  			break
    80  		}
    81  		anal.WaitStop(start)
    82  		if end {
    83  			break
    84  		}
    85  
    86  		if bat == nil || bat.Length() == 0 {
    87  			continue
    88  		}
    89  		anal.Input(bat, isFirst)
    90  		bat.ExpandNulls()
    91  
    92  		if err = mergeSort(proc, bat, ap, ctr, anal); err != nil {
    93  			break
    94  		}
    95  	}
    96  	if err != nil {
    97  		ap.Free(proc, true)
    98  		return false, err
    99  	}
   100  
   101  	// remove and clean unnecessary vector
   102  	// shuffle the ctr.bat
   103  	if ctr.bat != nil {
   104  		for i := ctr.n; i < len(ctr.bat.Vecs); i++ {
   105  			vector.Clean(ctr.bat.Vecs[i], proc.Mp())
   106  		}
   107  		ctr.bat.Vecs = ctr.bat.Vecs[:ctr.n]
   108  		ctr.bat.ExpandNulls()
   109  	}
   110  	if err = ctr.bat.Shuffle(ctr.finalSelectList, proc.Mp()); err != nil {
   111  		ap.Free(proc, true)
   112  		return false, err
   113  	}
   114  
   115  	// output the sort result.
   116  	anal.Output(ctr.bat, isLast)
   117  	proc.SetInputBatch(ctr.bat)
   118  	ctr.bat = nil
   119  
   120  	// free and return
   121  	ap.Free(proc, false)
   122  	return true, nil
   123  }
   124  
   125  // receiveBatch get a batch from receiver, return true if all batches have been got.
   126  func receiveBatch(proc *process.Process, ctr *container) (*batch.Batch, bool, error) {
   127  	if ctr.aliveMergeReceiver == 0 {
   128  		return nil, true, nil
   129  	}
   130  	chosen, value, ok := reflect.Select(ctr.receiverListener)
   131  	if !ok {
   132  		return nil, false, moerr.NewInternalError(proc.Ctx, "pipeline closed unexpectedly")
   133  	}
   134  	pointer := value.UnsafePointer()
   135  	bat := (*batch.Batch)(pointer)
   136  	if bat == nil {
   137  		ctr.receiverListener = append(ctr.receiverListener[:chosen], ctr.receiverListener[chosen+1:]...)
   138  		ctr.aliveMergeReceiver--
   139  	}
   140  	return bat, false, nil
   141  }
   142  
   143  func mergeSort(proc *process.Process, bat2 *batch.Batch,
   144  	ap *Argument, ctr *container, anal process.Analyze) error {
   145  	ctr.n = len(bat2.Vecs)
   146  	ctr.poses = ctr.poses[:0]
   147  
   148  	// evaluate the order column.
   149  	for _, f := range ap.Fs {
   150  		vec, err := colexec.EvalExpr(bat2, proc, f.Expr)
   151  		if err != nil {
   152  			return err
   153  		}
   154  		newColumn := true
   155  		for i := range bat2.Vecs {
   156  			if bat2.Vecs[i] == vec {
   157  				newColumn = false
   158  				ctr.poses = append(ctr.poses, int32(i))
   159  				break
   160  			}
   161  		}
   162  		if newColumn {
   163  			ctr.poses = append(ctr.poses, int32(len(bat2.Vecs)))
   164  			bat2.Vecs = append(bat2.Vecs, vec)
   165  			anal.Alloc(int64(vec.Size()))
   166  		}
   167  	}
   168  	copy(ctr.compare1Index, ctr.poses)
   169  
   170  	// init the compare structure if first time.
   171  	if len(ctr.cmps) == 0 {
   172  		var desc, nullsLast bool
   173  		ctr.cmps = make([]compare.Compare, len(ap.Fs))
   174  		for i := range ctr.cmps {
   175  			desc = ap.Fs[i].Flag&plan.OrderBySpec_DESC != 0
   176  			if ap.Fs[i].Flag&plan.OrderBySpec_NULLS_FIRST != 0 {
   177  				nullsLast = false
   178  			} else if ap.Fs[i].Flag&plan.OrderBySpec_NULLS_LAST != 0 {
   179  				nullsLast = true
   180  			} else {
   181  				nullsLast = desc
   182  			}
   183  			ctr.cmps[i] = compare.New(bat2.Vecs[ctr.poses[i]].Typ, desc, nullsLast)
   184  		}
   185  	}
   186  
   187  	return ctr.mergeSort2(bat2, proc)
   188  }
   189  
   190  func (ctr *container) mergeSort2(bat2 *batch.Batch, proc *process.Process) error {
   191  	if ctr.bat == nil {
   192  		ctr.bat = bat2
   193  		ctr.finalSelectList = generateSelectList(int64(ctr.bat.Length()))
   194  		copy(ctr.compare0Index, ctr.poses)
   195  		return nil
   196  	}
   197  	bat1 := ctr.bat
   198  	// union bat1 and bat2
   199  	// do merge sort, get order index list.
   200  	s1, s2 := int64(0), int64(bat1.Vecs[0].Length()) // startIndexOfBat1, startIndexOfBat2
   201  
   202  	for i := range bat1.Vecs {
   203  		n := bat2.Vecs[i].Length()
   204  		if cap(ctr.unionFlag) >= n {
   205  			ctr.unionFlag = ctr.unionFlag[:n:cap(ctr.unionFlag)]
   206  		} else {
   207  			ctr.unionFlag = makeFlagsOne(n)
   208  		}
   209  		err := vector.UnionBatch(bat1.Vecs[i], bat2.Vecs[i], 0, n, ctr.unionFlag, proc.Mp())
   210  		if err != nil {
   211  			return err
   212  		}
   213  	}
   214  	bat1.Zs = append(bat1.Zs, bat2.Zs...)
   215  
   216  	// set cmp should after union work to avoid memory re-alloc while union.
   217  	for i, cmp := range ctr.cmps {
   218  		cmp.Set(0, bat1.GetVector(ctr.compare0Index[i]))
   219  		cmp.Set(1, bat2.GetVector(ctr.compare1Index[i]))
   220  	}
   221  
   222  	end1, end2 := s2, int64(bat1.Vecs[0].Length())
   223  	sels := make([]int64, 0, end2)
   224  
   225  	for s1 < end1 && s2 < end2 {
   226  		i := s1
   227  		j := s2 - end1
   228  		compareResult := 0
   229  		for k := range ctr.cmps {
   230  			compareResult = ctr.cmps[k].Compare(0, 1, ctr.finalSelectList[i], j)
   231  			if compareResult != 0 {
   232  				break
   233  			}
   234  		}
   235  		if compareResult <= 0 {
   236  			// weight of item1 is less or equal to item2
   237  			sels = append(sels, ctr.finalSelectList[s1])
   238  			s1++
   239  		} else {
   240  			sels = append(sels, s2)
   241  			s2++
   242  		}
   243  	}
   244  	for s1 < end1 {
   245  		sels = append(sels, ctr.finalSelectList[s1])
   246  		s1++
   247  	}
   248  	for s2 < end2 {
   249  		sels = append(sels, s2)
   250  		s2++
   251  	}
   252  	ctr.finalSelectList = sels
   253  	ctr.bat = bat1
   254  	bat2.Clean(proc.Mp())
   255  	return nil
   256  }
   257  
   258  func generateSelectList(j int64) []int64 {
   259  	list := make([]int64, j)
   260  	var i int64
   261  	for i = 0; i < j; i++ {
   262  		list[i] = i
   263  	}
   264  	return list
   265  }
   266  
   267  func makeFlagsOne(n int) []uint8 {
   268  	t := make([]uint8, n)
   269  	for i := range t {
   270  		t[i]++
   271  	}
   272  	return t
   273  }