github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/ordered_synchronizer_tmpl.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // {{/*
    12  // +build execgen_template
    13  //
    14  // This file is the execgen template for ordered_synchronizer.eg.go. It's
    15  // formatted in a special way, so it's both valid Go and a valid text/template
    16  // input. This permits editing this file with editor support.
    17  //
    18  // */}}
    19  
    20  package colexec
    21  
    22  import (
    23  	"container/heap"
    24  	"context"
    25  	"fmt"
    26  
    27  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    28  	"github.com/cockroachdb/cockroach/pkg/col/typeconv"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/colexec/execgen"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    34  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    35  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    36  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    37  )
    38  
    39  // Remove unused warning.
    40  var _ = execgen.UNSAFEGET
    41  
    42  // {{/*
    43  // Declarations to make the template compile properly.
    44  
    45  // _GOTYPESLICE is the template variable.
    46  type _GOTYPESLICE interface{}
    47  
    48  // _CANONICAL_TYPE_FAMILY is the template variable.
    49  const _CANONICAL_TYPE_FAMILY = types.UnknownFamily
    50  
    51  // _TYPE_WIDTH is the template variable.
    52  const _TYPE_WIDTH = 0
    53  
    54  // */}}
    55  
    56  // OrderedSynchronizer receives rows from multiple inputs and produces a single
    57  // stream of rows, ordered according to a set of columns. The rows in each input
    58  // stream are assumed to be ordered according to the same set of columns.
    59  type OrderedSynchronizer struct {
    60  	allocator             *colmem.Allocator
    61  	inputs                []colexecbase.Operator
    62  	ordering              sqlbase.ColumnOrdering
    63  	typs                  []*types.T
    64  	canonicalTypeFamilies []types.Family
    65  
    66  	// inputBatches stores the current batch for each input.
    67  	inputBatches []coldata.Batch
    68  	// inputIndices stores the current index into each input batch.
    69  	inputIndices []int
    70  	// heap is a min heap which stores indices into inputBatches. The "current
    71  	// value" of ith input batch is the tuple at inputIndices[i] position of
    72  	// inputBatches[i] batch. If an input is fully exhausted, it will be removed
    73  	// from heap.
    74  	heap []int
    75  	// comparators stores one comparator per ordering column.
    76  	comparators []vecComparator
    77  	output      coldata.Batch
    78  	outNulls    []*coldata.Nulls
    79  	// In order to reduce the number of interface conversions, we will get access
    80  	// to the underlying slice for the output vectors and will use them directly.
    81  	// {{range .}}
    82  	// {{range .WidthOverloads}}
    83  	out_TYPECols []_GOTYPESLICE
    84  	// {{end}}
    85  	// {{end}}
    86  	// outColsMap contains the positions of the corresponding vectors in the
    87  	// slice for the same types. For example, if we have an output batch with
    88  	// types = [Int64, Int64, Bool, Bytes, Bool, Int64], then outColsMap will be
    89  	//                      [0, 1, 0, 0, 1, 2]
    90  	//                       ^  ^  ^  ^  ^  ^
    91  	//                       |  |  |  |  |  |
    92  	//                       |  |  |  |  |  3rd among all Int64's
    93  	//                       |  |  |  |  2nd among all Bool's
    94  	//                       |  |  |  1st among all Bytes's
    95  	//                       |  |  1st among all Bool's
    96  	//                       |  2nd among all Int64's
    97  	//                       1st among all Int64's
    98  	outColsMap []int
    99  }
   100  
   101  var _ colexecbase.Operator = &OrderedSynchronizer{}
   102  
   103  // ChildCount implements the execinfrapb.OpNode interface.
   104  func (o *OrderedSynchronizer) ChildCount(verbose bool) int {
   105  	return len(o.inputs)
   106  }
   107  
   108  // Child implements the execinfrapb.OpNode interface.
   109  func (o *OrderedSynchronizer) Child(nth int, verbose bool) execinfra.OpNode {
   110  	return o.inputs[nth]
   111  }
   112  
   113  // NewOrderedSynchronizer creates a new OrderedSynchronizer.
   114  func NewOrderedSynchronizer(
   115  	allocator *colmem.Allocator,
   116  	inputs []colexecbase.Operator,
   117  	typs []*types.T,
   118  	ordering sqlbase.ColumnOrdering,
   119  ) (*OrderedSynchronizer, error) {
   120  	return &OrderedSynchronizer{
   121  		allocator:             allocator,
   122  		inputs:                inputs,
   123  		ordering:              ordering,
   124  		typs:                  typs,
   125  		canonicalTypeFamilies: typeconv.ToCanonicalTypeFamilies(typs),
   126  	}, nil
   127  }
   128  
   129  // Next is part of the Operator interface.
   130  func (o *OrderedSynchronizer) Next(ctx context.Context) coldata.Batch {
   131  	if o.inputBatches == nil {
   132  		o.inputBatches = make([]coldata.Batch, len(o.inputs))
   133  		o.heap = make([]int, 0, len(o.inputs))
   134  		for i := range o.inputs {
   135  			o.inputBatches[i] = o.inputs[i].Next(ctx)
   136  			o.updateComparators(i)
   137  			if o.inputBatches[i].Length() > 0 {
   138  				o.heap = append(o.heap, i)
   139  			}
   140  		}
   141  		heap.Init(o)
   142  	}
   143  	o.output.ResetInternalBatch()
   144  	outputIdx := 0
   145  	o.allocator.PerformOperation(o.output.ColVecs(), func() {
   146  		for outputIdx < coldata.BatchSize() {
   147  			if o.Len() == 0 {
   148  				// All inputs exhausted.
   149  				break
   150  			}
   151  
   152  			minBatch := o.heap[0]
   153  			// Copy the min row into the output.
   154  			batch := o.inputBatches[minBatch]
   155  			srcRowIdx := o.inputIndices[minBatch]
   156  			if sel := batch.Selection(); sel != nil {
   157  				srcRowIdx = sel[srcRowIdx]
   158  			}
   159  			for i := range o.typs {
   160  				vec := batch.ColVec(i)
   161  				if vec.Nulls().MaybeHasNulls() && vec.Nulls().NullAt(srcRowIdx) {
   162  					o.outNulls[i].SetNull(outputIdx)
   163  				} else {
   164  					switch o.canonicalTypeFamilies[i] {
   165  					// {{range .}}
   166  					case _CANONICAL_TYPE_FAMILY:
   167  						switch o.typs[i].Width() {
   168  						// {{range .WidthOverloads}}
   169  						case _TYPE_WIDTH:
   170  							srcCol := vec._TYPE()
   171  							outCol := o.out_TYPECols[o.outColsMap[i]]
   172  							v := execgen.UNSAFEGET(srcCol, srcRowIdx)
   173  							execgen.SET(outCol, outputIdx, v)
   174  							// {{end}}
   175  						}
   176  						// {{end}}
   177  					default:
   178  						colexecerror.InternalError(fmt.Sprintf("unhandled type %s", o.typs[i].String()))
   179  					}
   180  				}
   181  			}
   182  
   183  			// Advance the input batch, fetching a new batch if necessary.
   184  			if o.inputIndices[minBatch]+1 < o.inputBatches[minBatch].Length() {
   185  				o.inputIndices[minBatch]++
   186  			} else {
   187  				o.inputBatches[minBatch] = o.inputs[minBatch].Next(ctx)
   188  				o.inputIndices[minBatch] = 0
   189  				o.updateComparators(minBatch)
   190  			}
   191  			if o.inputBatches[minBatch].Length() == 0 {
   192  				heap.Remove(o, 0)
   193  			} else {
   194  				heap.Fix(o, 0)
   195  			}
   196  
   197  			outputIdx++
   198  		}
   199  	})
   200  
   201  	o.output.SetLength(outputIdx)
   202  	return o.output
   203  }
   204  
   205  // Init is part of the Operator interface.
   206  func (o *OrderedSynchronizer) Init() {
   207  	o.inputIndices = make([]int, len(o.inputs))
   208  	o.output = o.allocator.NewMemBatch(o.typs)
   209  	o.outNulls = make([]*coldata.Nulls, len(o.typs))
   210  	o.outColsMap = make([]int, len(o.typs))
   211  	for i, outVec := range o.output.ColVecs() {
   212  		o.outNulls[i] = outVec.Nulls()
   213  		switch typeconv.TypeFamilyToCanonicalTypeFamily(o.typs[i].Family()) {
   214  		// {{range .}}
   215  		case _CANONICAL_TYPE_FAMILY:
   216  			switch o.typs[i].Width() {
   217  			// {{range .WidthOverloads}}
   218  			case _TYPE_WIDTH:
   219  				o.outColsMap[i] = len(o.out_TYPECols)
   220  				o.out_TYPECols = append(o.out_TYPECols, outVec._TYPE())
   221  				// {{end}}
   222  			}
   223  		// {{end}}
   224  		default:
   225  			colexecerror.InternalError(fmt.Sprintf("unhandled type %s", o.typs[i]))
   226  		}
   227  	}
   228  	for i := range o.inputs {
   229  		o.inputs[i].Init()
   230  	}
   231  	o.comparators = make([]vecComparator, len(o.ordering))
   232  	for i := range o.ordering {
   233  		typ := o.typs[o.ordering[i].ColIdx]
   234  		o.comparators[i] = GetVecComparator(typ, len(o.inputs))
   235  	}
   236  }
   237  
   238  func (o *OrderedSynchronizer) compareRow(batchIdx1 int, batchIdx2 int) int {
   239  	batch1 := o.inputBatches[batchIdx1]
   240  	batch2 := o.inputBatches[batchIdx2]
   241  	valIdx1 := o.inputIndices[batchIdx1]
   242  	valIdx2 := o.inputIndices[batchIdx2]
   243  	if sel := batch1.Selection(); sel != nil {
   244  		valIdx1 = sel[valIdx1]
   245  	}
   246  	if sel := batch2.Selection(); sel != nil {
   247  		valIdx2 = sel[valIdx2]
   248  	}
   249  	for i := range o.ordering {
   250  		info := o.ordering[i]
   251  		res := o.comparators[i].compare(batchIdx1, batchIdx2, valIdx1, valIdx2)
   252  		if res != 0 {
   253  			switch d := info.Direction; d {
   254  			case encoding.Ascending:
   255  				return res
   256  			case encoding.Descending:
   257  				return -res
   258  			default:
   259  				colexecerror.InternalError(fmt.Sprintf("unexpected direction value %d", d))
   260  			}
   261  		}
   262  	}
   263  	return 0
   264  }
   265  
   266  // updateComparators should be run whenever a new batch is fetched. It updates
   267  // all the relevant vectors in o.comparators.
   268  func (o *OrderedSynchronizer) updateComparators(batchIdx int) {
   269  	batch := o.inputBatches[batchIdx]
   270  	if batch.Length() == 0 {
   271  		return
   272  	}
   273  	for i := range o.ordering {
   274  		vec := batch.ColVec(o.ordering[i].ColIdx)
   275  		o.comparators[i].setVec(batchIdx, vec)
   276  	}
   277  }
   278  
   279  // Len is part of heap.Interface and is only meant to be used internally.
   280  func (o *OrderedSynchronizer) Len() int {
   281  	return len(o.heap)
   282  }
   283  
   284  // Less is part of heap.Interface and is only meant to be used internally.
   285  func (o *OrderedSynchronizer) Less(i, j int) bool {
   286  	return o.compareRow(o.heap[i], o.heap[j]) < 0
   287  }
   288  
   289  // Swap is part of heap.Interface and is only meant to be used internally.
   290  func (o *OrderedSynchronizer) Swap(i, j int) {
   291  	o.heap[i], o.heap[j] = o.heap[j], o.heap[i]
   292  }
   293  
   294  // Push is part of heap.Interface and is only meant to be used internally.
   295  func (o *OrderedSynchronizer) Push(x interface{}) {
   296  	o.heap = append(o.heap, x.(int))
   297  }
   298  
   299  // Pop is part of heap.Interface and is only meant to be used internally.
   300  func (o *OrderedSynchronizer) Pop() interface{} {
   301  	x := o.heap[len(o.heap)-1]
   302  	o.heap = o.heap[:len(o.heap)-1]
   303  	return x
   304  }