github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/unordered_distinct.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    20  )
    21  
    22  // NewUnorderedDistinct creates an unordered distinct on the given distinct
    23  // columns.
    24  // numHashBuckets determines the number of buckets that the hash table is
    25  // created with.
    26  func NewUnorderedDistinct(
    27  	allocator *colmem.Allocator,
    28  	input colexecbase.Operator,
    29  	distinctCols []uint32,
    30  	typs []*types.T,
    31  	numHashBuckets uint64,
    32  ) colexecbase.Operator {
    33  	ht := newHashTable(
    34  		allocator,
    35  		numHashBuckets,
    36  		typs,
    37  		distinctCols,
    38  		true, /* allowNullEquality */
    39  		hashTableDistinctBuildMode,
    40  		hashTableDefaultProbeMode,
    41  	)
    42  
    43  	return &unorderedDistinct{
    44  		OneInputNode: NewOneInputNode(input),
    45  		allocator:    allocator,
    46  		ht:           ht,
    47  		output:       allocator.NewMemBatch(typs),
    48  	}
    49  }
    50  
    51  // unorderedDistinct performs a DISTINCT operation using a hashTable. Once the
    52  // building of the hashTable is completed, this operator iterates over all of
    53  // the tuples to check whether the tuple is the "head" of a linked list that
    54  // contain all of the tuples that are equal on distinct columns. Only the
    55  // "head" is included into the big selection vector. Once the big selection
    56  // vector is populated, the operator proceeds to returning the batches
    57  // according to a chunk of the selection vector.
    58  type unorderedDistinct struct {
    59  	OneInputNode
    60  
    61  	allocator     *colmem.Allocator
    62  	ht            *hashTable
    63  	buildFinished bool
    64  
    65  	distinctCount int
    66  
    67  	output           coldata.Batch
    68  	outputBatchStart int
    69  }
    70  
    71  var _ colexecbase.Operator = &unorderedDistinct{}
    72  
    73  func (op *unorderedDistinct) Init() {
    74  	op.input.Init()
    75  }
    76  
    77  func (op *unorderedDistinct) Next(ctx context.Context) coldata.Batch {
    78  	op.output.ResetInternalBatch()
    79  	// First, build the hash table and populate the selection vector that
    80  	// includes only distinct tuples.
    81  	if !op.buildFinished {
    82  		op.buildFinished = true
    83  		op.ht.build(ctx, op.input)
    84  
    85  		// We're using the hashTable in distinct mode, so it buffers only distinct
    86  		// tuples, as a result, we will be simply returning all buffered tuples.
    87  		op.distinctCount = op.ht.vals.Length()
    88  	}
    89  
    90  	// Create and return the next batch of input to a maximum size of
    91  	// coldata.BatchSize().
    92  	nSelected := 0
    93  	batchEnd := op.outputBatchStart + coldata.BatchSize()
    94  	if batchEnd > op.distinctCount {
    95  		batchEnd = op.distinctCount
    96  	}
    97  	nSelected = batchEnd - op.outputBatchStart
    98  
    99  	op.allocator.PerformOperation(op.output.ColVecs(), func() {
   100  		for colIdx, fromCol := range op.ht.vals.ColVecs() {
   101  			toCol := op.output.ColVec(colIdx)
   102  			toCol.Copy(
   103  				coldata.CopySliceArgs{
   104  					SliceArgs: coldata.SliceArgs{
   105  						Src:         fromCol,
   106  						SrcStartIdx: op.outputBatchStart,
   107  						SrcEndIdx:   batchEnd,
   108  					},
   109  				},
   110  			)
   111  		}
   112  	})
   113  
   114  	op.outputBatchStart = batchEnd
   115  	op.output.SetLength(nSelected)
   116  	return op.output
   117  }
   118  
   119  // reset resets the unorderedDistinct.
   120  func (op *unorderedDistinct) reset(ctx context.Context) {
   121  	if r, ok := op.input.(resetter); ok {
   122  		r.reset(ctx)
   123  	}
   124  	op.ht.vals.ResetInternalBatch()
   125  	op.ht.vals.SetLength(0)
   126  	op.buildFinished = false
   127  	op.ht.reset(ctx)
   128  	op.distinctCount = 0
   129  	op.outputBatchStart = 0
   130  }