github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/unordered_distinct.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/col/coldata" 17 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 18 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 19 "github.com/cockroachdb/cockroach/pkg/sql/types" 20 ) 21 22 // NewUnorderedDistinct creates an unordered distinct on the given distinct 23 // columns. 24 // numHashBuckets determines the number of buckets that the hash table is 25 // created with. 26 func NewUnorderedDistinct( 27 allocator *colmem.Allocator, 28 input colexecbase.Operator, 29 distinctCols []uint32, 30 typs []*types.T, 31 numHashBuckets uint64, 32 ) colexecbase.Operator { 33 ht := newHashTable( 34 allocator, 35 numHashBuckets, 36 typs, 37 distinctCols, 38 true, /* allowNullEquality */ 39 hashTableDistinctBuildMode, 40 hashTableDefaultProbeMode, 41 ) 42 43 return &unorderedDistinct{ 44 OneInputNode: NewOneInputNode(input), 45 allocator: allocator, 46 ht: ht, 47 output: allocator.NewMemBatch(typs), 48 } 49 } 50 51 // unorderedDistinct performs a DISTINCT operation using a hashTable. Once the 52 // building of the hashTable is completed, this operator iterates over all of 53 // the tuples to check whether the tuple is the "head" of a linked list that 54 // contain all of the tuples that are equal on distinct columns. Only the 55 // "head" is included into the big selection vector. Once the big selection 56 // vector is populated, the operator proceeds to returning the batches 57 // according to a chunk of the selection vector. 58 type unorderedDistinct struct { 59 OneInputNode 60 61 allocator *colmem.Allocator 62 ht *hashTable 63 buildFinished bool 64 65 distinctCount int 66 67 output coldata.Batch 68 outputBatchStart int 69 } 70 71 var _ colexecbase.Operator = &unorderedDistinct{} 72 73 func (op *unorderedDistinct) Init() { 74 op.input.Init() 75 } 76 77 func (op *unorderedDistinct) Next(ctx context.Context) coldata.Batch { 78 op.output.ResetInternalBatch() 79 // First, build the hash table and populate the selection vector that 80 // includes only distinct tuples. 81 if !op.buildFinished { 82 op.buildFinished = true 83 op.ht.build(ctx, op.input) 84 85 // We're using the hashTable in distinct mode, so it buffers only distinct 86 // tuples, as a result, we will be simply returning all buffered tuples. 87 op.distinctCount = op.ht.vals.Length() 88 } 89 90 // Create and return the next batch of input to a maximum size of 91 // coldata.BatchSize(). 92 nSelected := 0 93 batchEnd := op.outputBatchStart + coldata.BatchSize() 94 if batchEnd > op.distinctCount { 95 batchEnd = op.distinctCount 96 } 97 nSelected = batchEnd - op.outputBatchStart 98 99 op.allocator.PerformOperation(op.output.ColVecs(), func() { 100 for colIdx, fromCol := range op.ht.vals.ColVecs() { 101 toCol := op.output.ColVec(colIdx) 102 toCol.Copy( 103 coldata.CopySliceArgs{ 104 SliceArgs: coldata.SliceArgs{ 105 Src: fromCol, 106 SrcStartIdx: op.outputBatchStart, 107 SrcEndIdx: batchEnd, 108 }, 109 }, 110 ) 111 } 112 }) 113 114 op.outputBatchStart = batchEnd 115 op.output.SetLength(nSelected) 116 return op.output 117 } 118 119 // reset resets the unorderedDistinct. 120 func (op *unorderedDistinct) reset(ctx context.Context) { 121 if r, ok := op.input.(resetter); ok { 122 r.reset(ctx) 123 } 124 op.ht.vals.ResetInternalBatch() 125 op.ht.vals.SetLength(0) 126 op.buildFinished = false 127 op.ht.reset(ctx) 128 op.distinctCount = 0 129 op.outputBatchStart = 0 130 }