github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/hash_aggregator_tmpl.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // {{/*
    12  // +build execgen_template
    13  //
    14  // This file is the execgen template for hash_aggregator.eg.go. It's formatted
    15  // in a special way, so it's both valid Go and a valid text/template input. This
    16  // permits editing this file with editor support.
    17  //
    18  // */}}
    19  
    20  package colexec
    21  
    22  import (
    23  	"fmt"
    24  
    25  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/colexec/execgen"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    29  )
    30  
    31  // Remove unused warning.
    32  var _ = execgen.UNSAFEGET
    33  
    34  // {{/*
    35  
    36  // Declarations to make the template compile properly.
    37  
    38  // _CANONICAL_TYPE_FAMILY is the template variable.
    39  const _CANONICAL_TYPE_FAMILY = types.UnknownFamily
    40  
    41  // _TYPE_WIDTH is the template variable.
    42  const _TYPE_WIDTH = 0
    43  
    44  // _ASSIGN_NE is the template function for assigning the result of comparing
    45  // the second input to the third input into the first input.
    46  func _ASSIGN_NE(_, _, _, _, _, _ interface{}) int {
    47  	colexecerror.InternalError("")
    48  }
    49  
    50  // */}}
    51  
    52  // {{/*
    53  func _POPULATE_SELS(b coldata.Batch, hashBuffer []uint64, _BATCH_HAS_SELECTION bool) { // */}}
    54  	// {{define "populateSels" -}}
    55  	for selIdx, hashCode := range hashBuffer {
    56  		selsSlot := -1
    57  		for slot, hash := range op.scratch.hashCodeForSelsSlot {
    58  			if hash == hashCode {
    59  				// We have already seen a tuple with the same hashCode
    60  				// previously, so we will append into the same sels slot.
    61  				selsSlot = slot
    62  				break
    63  			}
    64  		}
    65  		if selsSlot < 0 {
    66  			// This is the first tuple in hashBuffer with this hashCode, so we
    67  			// will add this tuple to the next available sels slot.
    68  			selsSlot = len(op.scratch.hashCodeForSelsSlot)
    69  			op.scratch.hashCodeForSelsSlot = append(op.scratch.hashCodeForSelsSlot, hashCode)
    70  		}
    71  		// {{if .BatchHasSelection}}
    72  		op.scratch.sels[selsSlot] = append(op.scratch.sels[selsSlot], batchSelection[selIdx])
    73  		// {{else}}
    74  		op.scratch.sels[selsSlot] = append(op.scratch.sels[selsSlot], selIdx)
    75  		// {{end}}
    76  	}
    77  	// {{end}}
    78  	// {{/*
    79  } // */}}
    80  
    81  // populateSels populates intermediate selection vectors (stored in
    82  // op.scratch.sels) for each hash code present in b. hashBuffer must contain
    83  // the hash codes for all of the tuples in b.
    84  func (op *hashAggregator) populateSels(b coldata.Batch, hashBuffer []uint64) {
    85  	// Note: we don't need to reset any of the slices in op.scratch.sels since
    86  	// they all are of zero length here (see the comment for op.scratch.sels
    87  	// for context).
    88  	op.scratch.hashCodeForSelsSlot = op.scratch.hashCodeForSelsSlot[:0]
    89  	if batchSelection := b.Selection(); batchSelection != nil {
    90  		_POPULATE_SELS(b, hashBuffer, true)
    91  	} else {
    92  		_POPULATE_SELS(b, hashBuffer, false)
    93  	}
    94  }
    95  
    96  // {{/*
    97  func _MATCH_LOOP(
    98  	sel []int,
    99  	lhs coldata.Vec,
   100  	rhs coldata.Vec,
   101  	aggKeyIdx int,
   102  	lhsNull bool,
   103  	diff []bool,
   104  	_LHS_MAYBE_HAS_NULLS bool,
   105  	_RHS_MAYBE_HAS_NULLS bool,
   106  ) { // */}}
   107  	// {{define "matchLoop" -}}
   108  
   109  	lhsVal := execgen.UNSAFEGET(lhsCol, aggKeyIdx)
   110  
   111  	for selIdx, rowIdx := range sel {
   112  		// {{if .LhsMaybeHasNulls}}
   113  		// {{if .RhsMaybeHasNulls}}
   114  		rhsNull := rhs.Nulls().NullAt(rowIdx)
   115  		if lhsNull && rhsNull {
   116  			// Both values are NULLs, and we do not consider them different.
   117  			continue
   118  		} else if lhsNull || rhsNull {
   119  			diff[selIdx] = true
   120  			continue
   121  		}
   122  		// {{else}}
   123  		if lhsNull {
   124  			diff[selIdx] = true
   125  			continue
   126  		}
   127  		// {{end}}
   128  		// {{end}}
   129  
   130  		rhsVal := execgen.UNSAFEGET(rhsCol, rowIdx)
   131  
   132  		var cmp bool
   133  		_ASSIGN_NE(cmp, lhsVal, rhsVal, _, lhsCol, rhsCol)
   134  		diff[selIdx] = diff[selIdx] || cmp
   135  	}
   136  
   137  	// {{end}}
   138  	// {{/*
   139  } // */}}
   140  
   141  // match takes a selection vector and compares it against the values of the key
   142  // of its aggregation function. It returns a selection vector representing the
   143  // unmatched tuples and a boolean to indicate whether or not there are any
   144  // matching tuples. It directly writes the result of matched tuples into the
   145  // selection vector of 'b' and sets the length of the batch to the number of
   146  // matching tuples. match also takes a diff boolean slice for internal use.
   147  // This slice need to be allocated to be at at least as big as sel and set to
   148  // all false. diff will be reset to all false when match returns. This is to
   149  // avoid additional slice allocation.
   150  // - firstDefiniteMatch indicates whether we know that tuple with index sel[0]
   151  //   matches the key of the aggregation function and whether we can short
   152  //   circuit probing that tuple.
   153  // NOTE: the return vector will reuse the memory allocated for the selection
   154  //       vector.
   155  func (v hashAggFuncs) match(
   156  	sel []int,
   157  	b coldata.Batch,
   158  	keyCols []uint32,
   159  	keyTypes []*types.T,
   160  	keyCanonicalTypeFamilies []types.Family,
   161  	keyMapping coldata.Batch,
   162  	diff []bool,
   163  	firstDefiniteMatch bool,
   164  ) (bool, []int) {
   165  	// We want to directly write to the selection vector to avoid extra
   166  	// allocation.
   167  	b.SetSelection(true)
   168  	matched := b.Selection()[:0]
   169  
   170  	aggKeyIdx := v.keyIdx
   171  
   172  	if firstDefiniteMatch {
   173  		matched = append(matched, sel[0])
   174  		sel = sel[1:]
   175  		diff = diff[:len(diff)-1]
   176  	}
   177  
   178  	if len(sel) > 0 {
   179  		for keyIdx, colIdx := range keyCols {
   180  			lhs := keyMapping.ColVec(keyIdx)
   181  			lhsHasNull := lhs.MaybeHasNulls()
   182  
   183  			rhs := b.ColVec(int(colIdx))
   184  			rhsHasNull := rhs.MaybeHasNulls()
   185  
   186  			switch keyCanonicalTypeFamilies[keyIdx] {
   187  			// {{range .}}
   188  			case _CANONICAL_TYPE_FAMILY:
   189  				switch keyTypes[keyIdx].Width() {
   190  				// {{range .WidthOverloads}}
   191  				case _TYPE_WIDTH:
   192  					lhsCol := lhs.TemplateType()
   193  					rhsCol := rhs.TemplateType()
   194  					if lhsHasNull {
   195  						lhsNull := lhs.Nulls().NullAt(v.keyIdx)
   196  						if rhsHasNull {
   197  							_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, true, true)
   198  						} else {
   199  							_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, true, false)
   200  						}
   201  					} else {
   202  						if rhsHasNull {
   203  							_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, false, true)
   204  						} else {
   205  							_MATCH_LOOP(sel, lhs, rhs, aggKeyIdx, lhsNull, diff, false, false)
   206  						}
   207  					}
   208  					// {{end}}
   209  				}
   210  				// {{end}}
   211  			default:
   212  				colexecerror.InternalError(fmt.Sprintf("unhandled type %s", keyTypes[keyIdx]))
   213  			}
   214  		}
   215  	}
   216  
   217  	remaining := sel[:0]
   218  	for selIdx, tupleIdx := range sel {
   219  		if diff[selIdx] {
   220  			remaining = append(remaining, tupleIdx)
   221  		} else {
   222  			matched = append(matched, tupleIdx)
   223  		}
   224  	}
   225  
   226  	anyMatched := false
   227  	if len(matched) > 0 {
   228  		b.SetLength(len(matched))
   229  		anyMatched = true
   230  	}
   231  
   232  	// Reset diff slice back to all false.
   233  	for n := 0; n < len(diff); n += copy(diff, zeroBoolColumn) {
   234  	}
   235  
   236  	return anyMatched, remaining
   237  }