github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/sort.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  // NewSorter returns a new sort operator, which sorts its input on the columns
    28  // given in orderingCols. The inputTypes must correspond 1-1 with the columns
    29  // in the input operator.
    30  func NewSorter(
    31  	allocator *colmem.Allocator,
    32  	input colexecbase.Operator,
    33  	inputTypes []*types.T,
    34  	orderingCols []execinfrapb.Ordering_Column,
    35  ) (colexecbase.Operator, error) {
    36  	return newSorter(allocator, newAllSpooler(allocator, input, inputTypes), inputTypes, orderingCols)
    37  }
    38  
    39  func newSorter(
    40  	allocator *colmem.Allocator,
    41  	input spooler,
    42  	inputTypes []*types.T,
    43  	orderingCols []execinfrapb.Ordering_Column,
    44  ) (resettableOperator, error) {
    45  	partitioners := make([]partitioner, len(orderingCols)-1)
    46  
    47  	var err error
    48  	for i, ord := range orderingCols {
    49  		if !isSorterSupported(inputTypes[ord.ColIdx], ord.Direction) {
    50  			return nil, errors.Errorf("sorter for type: %s and direction: %s not supported", inputTypes[ord.ColIdx], ord.Direction)
    51  		}
    52  		if i < len(orderingCols)-1 {
    53  			partitioners[i], err = newPartitioner(inputTypes[ord.ColIdx])
    54  			if err != nil {
    55  				return nil, err
    56  			}
    57  		}
    58  	}
    59  
    60  	return &sortOp{
    61  		allocator:    allocator,
    62  		input:        input,
    63  		inputTypes:   inputTypes,
    64  		sorters:      make([]colSorter, len(orderingCols)),
    65  		partitioners: partitioners,
    66  		orderingCols: orderingCols,
    67  		state:        sortSpooling,
    68  	}, nil
    69  }
    70  
    71  // spooler is a column vector operator that spools the data from its input.
    72  type spooler interface {
    73  	execinfra.OpNode
    74  
    75  	// init initializes this spooler and will be called once at the setup time.
    76  	init()
    77  	// spool performs the actual spooling.
    78  	spool(context.Context)
    79  	// getValues returns ith Vec of the already spooled data.
    80  	getValues(i int) coldata.Vec
    81  	// getNumTuples returns the number of spooled tuples.
    82  	getNumTuples() int
    83  	// getPartitionsCol returns a partitions column vector in which every true
    84  	// value indicates a start of a different partition (i.e. "chunk") within
    85  	// spooled tuples. It should return nil if all the tuples belong to the same
    86  	// partition.
    87  	getPartitionsCol() []bool
    88  	// getWindowedBatch returns a batch that is a "window" into all Vecs of the
    89  	// already spooled data, with tuples in range [startIdx, endIdx). This batch
    90  	// is not allowed to be modified and is only safe to use until the next call
    91  	// to this method.
    92  	// TODO(yuzefovich): one idea we might want to implement at some point is
    93  	// adding a wrapper on top of a coldata.Batch that is coldata.ImmutableBatch
    94  	// that returns coldata.ImmutableVecs to enforce immutability.
    95  	getWindowedBatch(startIdx, endIdx int) coldata.Batch
    96  }
    97  
    98  // allSpooler is the spooler that spools all tuples from the input. It is used
    99  // by the general sorter over the whole input.
   100  type allSpooler struct {
   101  	OneInputNode
   102  	NonExplainable
   103  
   104  	allocator *colmem.Allocator
   105  	// inputTypes contains the types of all of the columns from the input.
   106  	inputTypes []*types.T
   107  	// bufferedTuples stores all the values from the input after spooling. Each
   108  	// Vec in this batch is the entire column from the input.
   109  	bufferedTuples *appendOnlyBufferedBatch
   110  	// spooled indicates whether spool() has already been called.
   111  	spooled       bool
   112  	windowedBatch coldata.Batch
   113  }
   114  
   115  var _ spooler = &allSpooler{}
   116  var _ resetter = &allSpooler{}
   117  
   118  func newAllSpooler(
   119  	allocator *colmem.Allocator, input colexecbase.Operator, inputTypes []*types.T,
   120  ) spooler {
   121  	return &allSpooler{
   122  		OneInputNode: NewOneInputNode(input),
   123  		allocator:    allocator,
   124  		inputTypes:   inputTypes,
   125  	}
   126  }
   127  
   128  func (p *allSpooler) init() {
   129  	p.input.Init()
   130  	p.bufferedTuples = newAppendOnlyBufferedBatch(
   131  		p.allocator, p.inputTypes, 0, /* initialSize */
   132  	)
   133  	p.windowedBatch = p.allocator.NewMemBatchWithSize(p.inputTypes, 0 /* size */)
   134  }
   135  
   136  func (p *allSpooler) spool(ctx context.Context) {
   137  	if p.spooled {
   138  		colexecerror.InternalError("spool() is called for the second time")
   139  	}
   140  	p.spooled = true
   141  	for batch := p.input.Next(ctx); batch.Length() != 0; batch = p.input.Next(ctx) {
   142  		p.allocator.PerformOperation(p.bufferedTuples.ColVecs(), func() {
   143  			p.bufferedTuples.append(batch, 0 /* startIdx */, batch.Length())
   144  		})
   145  	}
   146  }
   147  
   148  func (p *allSpooler) getValues(i int) coldata.Vec {
   149  	if !p.spooled {
   150  		colexecerror.InternalError("getValues() is called before spool()")
   151  	}
   152  	return p.bufferedTuples.ColVec(i)
   153  }
   154  
   155  func (p *allSpooler) getNumTuples() int {
   156  	return p.bufferedTuples.Length()
   157  }
   158  
   159  func (p *allSpooler) getPartitionsCol() []bool {
   160  	if !p.spooled {
   161  		colexecerror.InternalError("getPartitionsCol() is called before spool()")
   162  	}
   163  	return nil
   164  }
   165  
   166  func (p *allSpooler) getWindowedBatch(startIdx, endIdx int) coldata.Batch {
   167  	// We don't need to worry about selection vectors here because if these were
   168  	// present on the original input batches, they have been removed when we were
   169  	// buffering up tuples.
   170  	for i := range p.inputTypes {
   171  		window := p.bufferedTuples.ColVec(i).Window(startIdx, endIdx)
   172  		p.windowedBatch.ReplaceCol(window, i)
   173  	}
   174  	p.windowedBatch.SetSelection(false)
   175  	p.windowedBatch.SetLength(endIdx - startIdx)
   176  	return p.windowedBatch
   177  }
   178  
   179  func (p *allSpooler) reset(ctx context.Context) {
   180  	if r, ok := p.input.(resetter); ok {
   181  		r.reset(ctx)
   182  	}
   183  	p.spooled = false
   184  	p.bufferedTuples.SetLength(0)
   185  	p.bufferedTuples.ResetInternalBatch()
   186  }
   187  
   188  type sortOp struct {
   189  	allocator *colmem.Allocator
   190  	input     spooler
   191  
   192  	// inputTypes contains the types of all of the columns from input.
   193  	inputTypes []*types.T
   194  	// orderingCols is the ordered list of column orderings that the sorter should
   195  	// sort on.
   196  	orderingCols []execinfrapb.Ordering_Column
   197  	// sorters contains one colSorter per sort column. The instantiation of
   198  	// sorters occurs within the sort method rather than during construction
   199  	// of the sortOp so that we can correctly choose a sorter based on
   200  	// whether the input has nulls or not.
   201  	sorters []colSorter
   202  	// partitioners contains one partitioner per sort column except for the last,
   203  	// which doesn't need to be partitioned.
   204  	partitioners []partitioner
   205  
   206  	// order maintains the order of tuples in the batch, after sorting. The value
   207  	// at index i in order is the ordinal value of the tuple in the input that
   208  	// belongs at index i. For example, if the input column to sort was
   209  	// [c,b,a,d], the order vector after sorting would be [2,1,0,3].
   210  	order []int
   211  	// emitted is the number of tuples emitted so far.
   212  	emitted int
   213  	// state is the current state of the sort.
   214  	state sortState
   215  
   216  	output coldata.Batch
   217  
   218  	exported int
   219  }
   220  
   221  var _ bufferingInMemoryOperator = &sortOp{}
   222  var _ resetter = &sortOp{}
   223  
   224  // colSorter is a single-column sorter, specialized on a particular type.
   225  type colSorter interface {
   226  	// init prepares this sorter, given a particular Vec and an order vector,
   227  	// which must be the same size as the input Vec and will be permuted with
   228  	// the same swaps as the column.
   229  	init(col coldata.Vec, order []int)
   230  	// sort globally sorts this sorter's column.
   231  	sort(ctx context.Context)
   232  	// sortPartitions sorts this sorter's column once for every partition in the
   233  	// partition slice.
   234  	sortPartitions(ctx context.Context, partitions []int)
   235  }
   236  
   237  func (p *sortOp) Init() {
   238  	p.input.init()
   239  }
   240  
   241  // sortState represents the state of the sort operator.
   242  type sortState int
   243  
   244  const (
   245  	// sortSpooling is the initial state of the operator, where it spools its
   246  	// input.
   247  	sortSpooling sortState = iota
   248  	// sortSorting is the second state of the operator, where it actually sorts
   249  	// all the spooled data.
   250  	sortSorting
   251  	// sortEmitting is the third state of the operator, indicating that each call
   252  	// to Next will return another batch of the sorted data.
   253  	sortEmitting
   254  )
   255  
   256  func (p *sortOp) Next(ctx context.Context) coldata.Batch {
   257  	switch p.state {
   258  	case sortSpooling:
   259  		p.input.spool(ctx)
   260  		p.state = sortSorting
   261  		fallthrough
   262  	case sortSorting:
   263  		p.sort(ctx)
   264  		p.state = sortEmitting
   265  		fallthrough
   266  	case sortEmitting:
   267  		newEmitted := p.emitted + coldata.BatchSize()
   268  		if newEmitted > p.input.getNumTuples() {
   269  			newEmitted = p.input.getNumTuples()
   270  		}
   271  		if newEmitted == p.emitted {
   272  			return coldata.ZeroBatch
   273  		}
   274  
   275  		p.resetOutput()
   276  		for j := 0; j < len(p.inputTypes); j++ {
   277  			// At this point, we have already fully sorted the input. It is ok to do
   278  			// this Copy outside of the allocator - the work has been done, but
   279  			// theoretically it is possible to hit the limit here (mainly with
   280  			// variable-sized types like Bytes). Nonetheless, for performance reasons
   281  			// it would be sad to fallback to disk at this point.
   282  			p.output.ColVec(j).Copy(
   283  				coldata.CopySliceArgs{
   284  					SliceArgs: coldata.SliceArgs{
   285  						Sel:         p.order,
   286  						Src:         p.input.getValues(j),
   287  						SrcStartIdx: p.emitted,
   288  						SrcEndIdx:   newEmitted,
   289  					},
   290  				},
   291  			)
   292  		}
   293  		p.output.SetLength(newEmitted - p.emitted)
   294  		p.emitted = newEmitted
   295  		return p.output
   296  	}
   297  	colexecerror.InternalError(fmt.Sprintf("invalid sort state %v", p.state))
   298  	// This code is unreachable, but the compiler cannot infer that.
   299  	return nil
   300  }
   301  
   302  // sort sorts the spooled tuples, so it must be called after spool() has been
   303  // performed.
   304  func (p *sortOp) sort(ctx context.Context) {
   305  	spooledTuples := p.input.getNumTuples()
   306  	if spooledTuples == 0 {
   307  		// There is nothing to sort.
   308  		return
   309  	}
   310  	// Allocate p.order and p.workingSpace if it hasn't been allocated yet or the
   311  	// underlying memory is insufficient.
   312  	if p.order == nil || cap(p.order) < spooledTuples {
   313  		p.order = make([]int, spooledTuples)
   314  	}
   315  	p.order = p.order[:spooledTuples]
   316  
   317  	// Initialize the order vector to the ordinal positions within the input set.
   318  	for i := 0; i < len(p.order); i++ {
   319  		p.order[i] = i
   320  	}
   321  
   322  	for i := range p.orderingCols {
   323  		inputVec := p.input.getValues(int(p.orderingCols[i].ColIdx))
   324  		p.sorters[i] = newSingleSorter(p.inputTypes[p.orderingCols[i].ColIdx], p.orderingCols[i].Direction, inputVec.MaybeHasNulls())
   325  		p.sorters[i].init(inputVec, p.order)
   326  	}
   327  
   328  	// Now, sort each column in turn.
   329  	sorters := p.sorters
   330  	partitionsCol := p.input.getPartitionsCol()
   331  	omitNextPartitioning := false
   332  	offset := 0
   333  	if partitionsCol == nil {
   334  		// All spooled tuples belong to the same partition, so the first column
   335  		// doesn't need special treatment - we just globally sort it.
   336  		p.sorters[0].sort(ctx)
   337  		if len(p.sorters) == 1 {
   338  			// We're done sorting. Transition to emitting.
   339  			return
   340  		}
   341  		sorters = sorters[1:]
   342  		partitionsCol = make([]bool, spooledTuples)
   343  	} else {
   344  		// There are at least two partitions already, so the first column needs the
   345  		// same special treatment as all others. The general sequence is as
   346  		// follows: global sort -> partition -> sort partitions -> partition ->
   347  		// -> sort partitions -> partition -> sort partitions -> ..., but in this
   348  		// case, global sort doesn't make sense and partitioning has already been
   349  		// done, so we want to skip the first partitioning step and sort partitions
   350  		// right away. Also, in order to account for not performed global sort, we
   351  		// introduce an offset of 1 for partitioners.
   352  		omitNextPartitioning = true
   353  		offset = 1
   354  	}
   355  
   356  	// The rest of the columns need p sorts, one per partition in the previous
   357  	// column. For example, in a two column sort:
   358  	//
   359  	// 1  b
   360  	// 2  b
   361  	// 1  a
   362  	// 2  a
   363  	//
   364  	// We'll first sort the first column:
   365  	//
   366  	// 1  b
   367  	// 1  a
   368  	// 2  b
   369  	// 2  a
   370  	//
   371  	// Then, for each group in the sorted, first column, we sort the second column:
   372  	//
   373  	// 1 a
   374  	// 1 b
   375  	// 2 a
   376  	// 2 b
   377  
   378  	partitions := make([]int, 0, 16)
   379  	for i, sorter := range sorters {
   380  		if !omitNextPartitioning {
   381  			// We partition the previous column by running an ordered distinct operation
   382  			// on it, ORing the results together with each subsequent column. This
   383  			// produces a distinct vector (a boolean vector that has true in each
   384  			// position that is different from the last position).
   385  			p.partitioners[i-offset].partitionWithOrder(p.input.getValues(int(p.orderingCols[i-offset].ColIdx)), p.order,
   386  				partitionsCol, spooledTuples)
   387  		} else {
   388  			omitNextPartitioning = false
   389  		}
   390  		// Convert the distinct vector into a selection vector - a vector of indices
   391  		// that were true in the distinct vector.
   392  		partitions = boolVecToSel64(partitionsCol, partitions[:0])
   393  		// For each partition (set of tuples that are identical in all of the sort
   394  		// columns we've seen so far), sort based on the new column.
   395  		sorter.sortPartitions(ctx, partitions)
   396  	}
   397  }
   398  
   399  func (p *sortOp) resetOutput() {
   400  	if p.output == nil {
   401  		p.output = p.allocator.NewMemBatch(p.inputTypes)
   402  	} else {
   403  		p.output.ResetInternalBatch()
   404  	}
   405  }
   406  
   407  func (p *sortOp) reset(ctx context.Context) {
   408  	if r, ok := p.input.(resetter); ok {
   409  		r.reset(ctx)
   410  	}
   411  	p.emitted = 0
   412  	p.exported = 0
   413  	p.state = sortSpooling
   414  }
   415  
   416  func (p *sortOp) ChildCount(verbose bool) int {
   417  	return 1
   418  }
   419  
   420  func (p *sortOp) Child(nth int, verbose bool) execinfra.OpNode {
   421  	if nth == 0 {
   422  		return p.input
   423  	}
   424  	colexecerror.InternalError(fmt.Sprintf("invalid index %d", nth))
   425  	// This code is unreachable, but the compiler cannot infer that.
   426  	return nil
   427  }
   428  
   429  func (p *sortOp) ExportBuffered(colexecbase.Operator) coldata.Batch {
   430  	if p.exported == p.input.getNumTuples() {
   431  		return coldata.ZeroBatch
   432  	}
   433  	newExported := p.exported + coldata.BatchSize()
   434  	if newExported > p.input.getNumTuples() {
   435  		newExported = p.input.getNumTuples()
   436  	}
   437  	b := p.input.getWindowedBatch(p.exported, newExported)
   438  	p.exported = newExported
   439  	return b
   440  }