github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/colexec/merge_util.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package colexec
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    19  	"github.com/matrixorigin/matrixone/pkg/sort"
    20  )
    21  
    22  type MergeInterface interface {
    23  	getNextPos() (int, int, int)
    24  }
    25  
    26  type heapElem[T any] struct {
    27  	data     *T
    28  	isNull   bool
    29  	batIndex int
    30  	rowIndex int
    31  }
    32  
    33  // we will sort by primary key or
    34  // clusterby key, so we just need one
    35  // vector of every batch.
    36  type Merge[T any] struct {
    37  	// the number of bacthes
    38  	size uint64
    39  	// convert the vecotrs which need to sort
    40  	// into cols data
    41  	cols [][]T
    42  	// pointer is used to specify
    43  	// which postion we have gotten.
    44  	// for example, pointers[i] means
    45  	// we are now at the i-th row for
    46  	// cols[i]
    47  	pointers []int
    48  
    49  	nulls []*nulls.Nulls
    50  
    51  	heaps *mergeHeap[T]
    52  }
    53  
    54  func newMerge[T any](size int, compLess sort.LessFunc[T], cols [][]T, nulls []*nulls.Nulls) (merge *Merge[T]) {
    55  	merge = &Merge[T]{
    56  		size:     uint64(size),
    57  		cols:     cols,
    58  		pointers: make([]int, size),
    59  		nulls:    nulls,
    60  	}
    61  	merge.heaps = newMergeHeap(uint64(size), compLess)
    62  	merge.initHeap()
    63  	return
    64  }
    65  
    66  func (merge *Merge[T]) initHeap() {
    67  	for i := 0; i < int(merge.size); i++ {
    68  		if len(merge.cols[i]) == 0 {
    69  			merge.pointers[i] = -1
    70  			merge.size--
    71  			continue
    72  		}
    73  		merge.heaps.push(&heapElem[T]{
    74  			data:     &merge.cols[i][merge.pointers[i]],
    75  			isNull:   merge.nulls[i].Contains(uint64(merge.pointers[i])),
    76  			batIndex: i,
    77  			rowIndex: merge.pointers[i],
    78  		})
    79  		if merge.pointers[i] >= len(merge.cols[i]) {
    80  			merge.pointers[i] = -1
    81  			merge.size--
    82  		}
    83  	}
    84  }
    85  
    86  func (merge *Merge[T]) getNextPos() (batchIndex, rowIndex, size int) {
    87  	data := merge.pushNext()
    88  	if data == nil {
    89  		// now, merge.size is 0
    90  		return -1, -1, int(merge.size)
    91  	}
    92  	return data.batIndex, data.rowIndex, int(merge.size)
    93  }
    94  
    95  func (merge *Merge[T]) pushNext() *heapElem[T] {
    96  	if merge.size == 0 {
    97  		return nil
    98  	}
    99  	data := merge.heaps.pop()
   100  	batchIndex := data.batIndex
   101  	merge.pointers[batchIndex]++
   102  	if merge.pointers[batchIndex] >= len(merge.cols[batchIndex]) {
   103  		merge.pointers[batchIndex] = -1
   104  		merge.size--
   105  	}
   106  	if merge.pointers[batchIndex] != -1 {
   107  		merge.heaps.push(&heapElem[T]{
   108  			data:     &merge.cols[batchIndex][merge.pointers[batchIndex]],
   109  			isNull:   merge.nulls[batchIndex].Contains(uint64(merge.pointers[batchIndex])),
   110  			batIndex: batchIndex,
   111  			rowIndex: merge.pointers[batchIndex],
   112  		})
   113  	}
   114  	return data
   115  }
   116  
   117  // mergeHeap will take null first rule
   118  type mergeHeap[T any] struct {
   119  	cmpLess sort.LessFunc[T]
   120  	datas   []*heapElem[T]
   121  	size    uint64
   122  }
   123  
   124  func newMergeHeap[T any](cap_size uint64, cmp sort.LessFunc[T]) *mergeHeap[T] {
   125  	return &mergeHeap[T]{
   126  		cmpLess: cmp,
   127  		datas:   make([]*heapElem[T], cap_size+1),
   128  		size:    0,
   129  	}
   130  }
   131  
   132  func (heap *mergeHeap[T]) push(data *heapElem[T]) {
   133  	heap.datas[heap.size+1] = data
   134  	heap.size++
   135  	heap.up(int(heap.size))
   136  }
   137  
   138  func (heap *mergeHeap[T]) pop() (data *heapElem[T]) {
   139  	if heap.size < 1 {
   140  		return nil
   141  	}
   142  	data = heap.datas[1]
   143  	heap.datas[1], heap.datas[heap.size] = heap.datas[heap.size], heap.datas[1]
   144  	heap.size--
   145  	heap.down(1)
   146  	return
   147  }
   148  
   149  func (heap *mergeHeap[T]) compLess(i, j int) bool {
   150  	if heap.datas[i].isNull {
   151  		return true
   152  	}
   153  	if heap.datas[j].isNull {
   154  		return false
   155  	}
   156  	return heap.cmpLess(*heap.datas[i].data, *heap.datas[j].data)
   157  }
   158  
   159  func (heap *mergeHeap[T]) down(i int) {
   160  	t := i
   161  	if i*2 <= int(heap.size) && heap.compLess(i*2, t) {
   162  		t = i * 2
   163  	}
   164  	if i*2+1 <= int(heap.size) && heap.compLess(i*2+1, t) {
   165  		t = i*2 + 1
   166  	}
   167  	if t != i {
   168  		heap.datas[t], heap.datas[i] = heap.datas[i], heap.datas[t]
   169  		heap.down(t)
   170  	}
   171  }
   172  
   173  func (heap *mergeHeap[T]) up(i int) {
   174  	t := i
   175  	if i/2 >= 1 && heap.compLess(t, i/2) {
   176  		t = i / 2
   177  	}
   178  	if t != i {
   179  		heap.datas[t], heap.datas[i] = heap.datas[i], heap.datas[t]
   180  		heap.up(t)
   181  	}
   182  }