github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/hashtable_tmpl.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // {{/*
    12  // +build execgen_template
    13  //
    14  // This file is the execgen template for hashtable.eg.go. It's formatted in a
    15  // special way, so it's both valid Go and a valid text/template input. This
    16  // permits editing this file with editor support.
    17  //
    18  // */}}
    19  
    20  package colexec
    21  
    22  import (
    23  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/colexec/execgen"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    27  )
    28  
    29  // Remove unused warning.
    30  var _ = execgen.UNSAFEGET
    31  
    32  // {{/*
    33  
    34  // _LEFT_CANONICAL_TYPE_FAMILY is the template variable.
    35  const _LEFT_CANONICAL_TYPE_FAMILY = types.UnknownFamily
    36  
    37  // _LEFT_TYPE_WIDTH is the template variable.
    38  const _LEFT_TYPE_WIDTH = 0
    39  
    40  // _RIGHT_CANONICAL_TYPE_FAMILY is the template variable.
    41  const _RIGHT_CANONICAL_TYPE_FAMILY = types.UnknownFamily
    42  
    43  // _RIGHT_TYPE_WIDTH is the template variable.
    44  const _RIGHT_TYPE_WIDTH = 0
    45  
    46  // _ASSIGN_NE is the template equality function for assigning the first input
    47  // to the result of the the second input != the third input.
    48  func _ASSIGN_NE(_, _, _, _, _, _ interface{}) int {
    49  	colexecerror.InternalError("")
    50  }
    51  
    52  // _L_UNSAFEGET is the template function that will be replaced by
    53  // "execgen.UNSAFEGET" which uses _L_TYP.
    54  func _L_UNSAFEGET(_, _ interface{}) interface{} {
    55  	colexecerror.InternalError("")
    56  }
    57  
    58  // _R_UNSAFEGET is the template function that will be replaced by
    59  // "execgen.UNSAFEGET" which uses _R_TYP.
    60  func _R_UNSAFEGET(_, _ interface{}) interface{} {
    61  	colexecerror.InternalError("")
    62  }
    63  
    64  // This is a code snippet that is the main body of checkCol* functions. It
    65  // takes in the following template "meta" variables that enable/disable certain
    66  // code paths:
    67  // _PROBE_HAS_NULLS - a boolean as .ProbeHasNulls that determines whether the
    68  // probe vector might have NULL values.
    69  // _BUILD_HAS_NULLS - a boolean as .BuildHasNulls that determines whether the
    70  // build vector might have NULL values.
    71  // _ALLOW_NULL_EQUALITY - a boolean as .AllowNullEquality that determines
    72  // whether NULL values should be treated as equal.
    73  // _SELECT_DISTINCT - a boolean as .SelectDistinct that determines whether a
    74  // probe tuple should be marked as "distinct" if its groupID is zero (meaning
    75  // that there is no tuple in the hash table with the same hash code).
    76  // _USE_PROBE_SEL - a boolean as .UseProbeSel that determines whether there is
    77  // a selection vector on the probe vector.
    78  // _PROBING_AGAINST_ITSELF - a boolean as .ProbingAgainstItself that tells us
    79  // whether the probe and the build vectors are the same object. Having this
    80  // knob allows us to not generate code for the combination of _USE_PROBE_SEL is
    81  // false when _USE_BUILD_SEL is true (if we were to add _USE_BUILD_SEL) because
    82  // such code would never be used.
    83  // _DELETING_PROBE_MODE - a boolean as .DeletingProbeMode that indicates
    84  // whether the hash table is used with hashTableDeletingProbeMode probing mode.
    85  // When it is true, the hashTable uses 'visited' slice to mark previously
    86  // matched tuples as "deleted" so they won't get matched again.
    87  func _CHECK_COL_BODY(
    88  	_PROBE_HAS_NULLS bool,
    89  	_BUILD_HAS_NULLS bool,
    90  	_ALLOW_NULL_EQUALITY bool,
    91  	_SELECT_DISTINCT bool,
    92  	_USE_PROBE_SEL bool,
    93  	_PROBING_AGAINST_ITSELF bool,
    94  	_DELETING_PROBE_MODE bool,
    95  ) { // */}}
    96  	// {{define "checkColBody" -}}
    97  	var (
    98  		probeIdx, buildIdx       int
    99  		probeIsNull, buildIsNull bool
   100  	)
   101  	// Early bounds check.
   102  	_ = ht.probeScratch.toCheck[nToCheck-1]
   103  	for i := uint64(0); i < nToCheck; i++ {
   104  		// keyID of 0 is reserved to represent the end of the next chain.
   105  		toCheck := ht.probeScratch.toCheck[i]
   106  		keyID := ht.probeScratch.groupID[toCheck]
   107  		if keyID != 0 {
   108  			// the build table key (calculated using keys[keyID - 1] = key) is
   109  			// compared to the corresponding probe table to determine if a match is
   110  			// found.
   111  			// {{if .DeletingProbeMode}}
   112  			if ht.visited[keyID] {
   113  				// This build tuple has already been matched, so we treat
   114  				// it as different from the probe tuple.
   115  				ht.probeScratch.differs[toCheck] = true
   116  				continue
   117  			}
   118  			// {{end}}
   119  
   120  			// {{if .UseProbeSel}}
   121  			probeIdx = probeSel[toCheck]
   122  			// {{else}}
   123  			probeIdx = int(toCheck)
   124  			// {{end}}
   125  			// {{if .ProbeHasNulls}}
   126  			probeIsNull = probeVec.Nulls().NullAt(probeIdx)
   127  			// {{end}}
   128  			// {{/*
   129  			//     Usually, the build vector is already stored in the hash table,
   130  			//     so there is no selection vector. However, there is a use case
   131  			//     when we want to apply the selection vector to keyID when the
   132  			//     hash table is used by unordered distinct to remove the
   133  			//     duplicates within the vector itself - the vector is being
   134  			//     probed "against itself". In such case .UseProbeSel also
   135  			//     means .UseBuildSel if we were to introduce it.
   136  			// */}}
   137  			// {{if and (.UseProbeSel) (.ProbingAgainstItself)}}
   138  			// The vector is probed against itself, so buildVec has the same
   139  			// selection vector as probeVec.
   140  			buildIdx = probeSel[keyID-1]
   141  			// {{else}}
   142  			buildIdx = int(keyID - 1)
   143  			// {{end}}
   144  			// {{if .BuildHasNulls}}
   145  			buildIsNull = buildVec.Nulls().NullAt(buildIdx)
   146  			// {{end}}
   147  			// {{if .AllowNullEquality}}
   148  			if probeIsNull && buildIsNull {
   149  				// Both values are NULLs, and since we're allowing null equality, we
   150  				// proceed to the next value to check.
   151  				continue
   152  			} else if probeIsNull {
   153  				// Only probing value is NULL, so it is different from the build value
   154  				// (which is non-NULL). We mark it as "different" and proceed to the
   155  				// next value to check. This behavior is special in case of allowing
   156  				// null equality because we don't want to reset the groupID of the
   157  				// current probing tuple.
   158  				ht.probeScratch.differs[toCheck] = true
   159  				continue
   160  			}
   161  			// {{end}}
   162  			if probeIsNull {
   163  				ht.probeScratch.groupID[toCheck] = 0
   164  			} else if buildIsNull {
   165  				ht.probeScratch.differs[toCheck] = true
   166  			} else {
   167  				probeVal := _L_UNSAFEGET(probeKeys, probeIdx)
   168  				buildVal := _R_UNSAFEGET(buildKeys, buildIdx)
   169  				var unique bool
   170  				_ASSIGN_NE(unique, probeVal, buildVal, _, probeKeys, buildKeys)
   171  				ht.probeScratch.differs[toCheck] = ht.probeScratch.differs[toCheck] || unique
   172  			}
   173  		}
   174  		// {{if .SelectDistinct}}
   175  		if keyID == 0 {
   176  			ht.probeScratch.distinct[toCheck] = true
   177  		}
   178  		// {{end}}
   179  	}
   180  	// {{end}}
   181  	// {{/*
   182  }
   183  
   184  func _CHECK_COL_WITH_NULLS(
   185  	_USE_PROBE_SEL bool, _PROBING_AGAINST_ITSELF bool, _DELETING_PROBE_MODE bool,
   186  ) { // */}}
   187  	// {{define "checkColWithNulls" -}}
   188  	// {{$probingAgainstItself := .ProbingAgainstItself}}
   189  	// {{$deletingProbeMode := .DeletingProbeMode}}
   190  	if probeVec.MaybeHasNulls() {
   191  		if buildVec.MaybeHasNulls() {
   192  			if ht.allowNullEquality {
   193  				// {{/*
   194  				// The allowNullEquality flag only matters if both vectors have nulls.
   195  				// This lets us avoid writing all 2^3 conditional branches.
   196  				// */}}
   197  				_CHECK_COL_BODY(true, true, true, false, _USE_PROBE_SEL, _PROBING_AGAINST_ITSELF, _DELETING_PROBE_MODE)
   198  			} else {
   199  				_CHECK_COL_BODY(true, true, false, false, _USE_PROBE_SEL, _PROBING_AGAINST_ITSELF, _DELETING_PROBE_MODE)
   200  			}
   201  		} else {
   202  			_CHECK_COL_BODY(true, false, false, false, _USE_PROBE_SEL, _PROBING_AGAINST_ITSELF, _DELETING_PROBE_MODE)
   203  		}
   204  	} else {
   205  		if buildVec.MaybeHasNulls() {
   206  			_CHECK_COL_BODY(false, true, false, false, _USE_PROBE_SEL, _PROBING_AGAINST_ITSELF, _DELETING_PROBE_MODE)
   207  		} else {
   208  			_CHECK_COL_BODY(false, false, false, false, _USE_PROBE_SEL, _PROBING_AGAINST_ITSELF, _DELETING_PROBE_MODE)
   209  		}
   210  	}
   211  	// {{end}}
   212  	// {{/*
   213  }
   214  
   215  func _CHECK_COL_FUNCTION_TEMPLATE(_PROBING_AGAINST_ITSELF bool, _DELETING_PROBE_MODE bool) { // */}}
   216  	// {{define "checkColFunctionTemplate" -}}
   217  	// {{$probingAgainstItself := .ProbingAgainstItself}}
   218  	// {{$deletingProbeMode := .DeletingProbeMode}}
   219  	// {{with .Global}}
   220  	// In order to inline the templated code of overloads, we need to have a
   221  	// `_overloadHelper` local variable of type `overloadHelper`.
   222  	_overloadHelper := ht.overloadHelper
   223  	switch probeVec.CanonicalTypeFamily() {
   224  	// {{range .LeftFamilies}}
   225  	case _LEFT_CANONICAL_TYPE_FAMILY:
   226  		switch probeVec.Type().Width() {
   227  		// {{range .LeftWidths}}
   228  		case _LEFT_TYPE_WIDTH:
   229  			switch buildVec.CanonicalTypeFamily() {
   230  			// {{range .RightFamilies}}
   231  			case _RIGHT_CANONICAL_TYPE_FAMILY:
   232  				switch buildVec.Type().Width() {
   233  				// {{range .RightWidths}}
   234  				case _RIGHT_TYPE_WIDTH:
   235  					probeKeys := probeVec._ProbeType()
   236  					buildKeys := buildVec._BuildType()
   237  					if probeSel != nil {
   238  						_CHECK_COL_WITH_NULLS(true, _PROBING_AGAINST_ITSELF, _DELETING_PROBE_MODE)
   239  					} else {
   240  						_CHECK_COL_WITH_NULLS(false, _PROBING_AGAINST_ITSELF, _DELETING_PROBE_MODE)
   241  					}
   242  					// {{end}}
   243  				}
   244  				// {{end}}
   245  			}
   246  			// {{end}}
   247  		}
   248  		// {{end}}
   249  	}
   250  	// {{end}}
   251  	// {{end}}
   252  	// {{/*
   253  } // */}}
   254  
   255  // {{if and (not .HashTableMode.IsDistinctBuild) (not .HashTableMode.IsDeletingProbe)}}
   256  
   257  // checkCol determines if the current key column in the groupID buckets matches
   258  // the specified equality column key. If there is no match, then the key is
   259  // added to differs. If the bucket has reached the end, the key is rejected. If
   260  // the hashTable disallows null equality, then if any element in the key is
   261  // null, there is no match.
   262  func (ht *hashTable) checkCol(
   263  	probeVec, buildVec coldata.Vec, keyColIdx int, nToCheck uint64, probeSel []int,
   264  ) {
   265  	// {{with .Overloads}}
   266  	_CHECK_COL_FUNCTION_TEMPLATE(false, false)
   267  	// {{end}}
   268  }
   269  
   270  // {{end}}
   271  
   272  // {{if .HashTableMode.IsDistinctBuild}}
   273  
   274  // checkColAgainstItself is similar to checkCol, but it probes the vector
   275  // against itself.
   276  func (ht *hashTable) checkColAgainstItself(vec coldata.Vec, nToCheck uint64, sel []int) {
   277  	// {{/*
   278  	// In order to reuse the same template function as checkCol uses, we use
   279  	// the same variable names.
   280  	// */}}
   281  	probeVec, buildVec, probeSel := vec, vec, sel
   282  	// {{with .Overloads}}
   283  	_CHECK_COL_FUNCTION_TEMPLATE(true, false)
   284  	// {{end}}
   285  }
   286  
   287  // {{end}}
   288  
   289  // {{if .HashTableMode.IsDeletingProbe}}
   290  
   291  // checkColDeleting determines if the current key column in the groupID buckets
   292  // matches the specified equality column key. If there is no match *or* the key
   293  // has been already used, then the key is added to differs. If the bucket has
   294  // reached the end, the key is rejected. If the hashTable disallows null
   295  // equality, then if any element in the key is null, there is no match.
   296  func (ht *hashTable) checkColDeleting(
   297  	probeVec, buildVec coldata.Vec, keyColIdx int, nToCheck uint64, probeSel []int,
   298  ) {
   299  	// {{with .Overloads}}
   300  	_CHECK_COL_FUNCTION_TEMPLATE(false, true)
   301  	// {{end}}
   302  }
   303  
   304  // {{end}}
   305  
   306  // {{/*
   307  func _CHECK_COL_FOR_DISTINCT_WITH_NULLS(_USE_PROBE_SEL bool) { // */}}
   308  	// {{define "checkColForDistinctWithNulls" -}}
   309  	if probeVec.MaybeHasNulls() {
   310  		if buildVec.MaybeHasNulls() {
   311  			_CHECK_COL_BODY(true, true, true, true, _USE_PROBE_SEL, false, false)
   312  		} else {
   313  			_CHECK_COL_BODY(true, false, true, true, _USE_PROBE_SEL, false, false)
   314  		}
   315  	} else {
   316  		if buildVec.MaybeHasNulls() {
   317  			_CHECK_COL_BODY(false, true, true, true, _USE_PROBE_SEL, false, false)
   318  		} else {
   319  			_CHECK_COL_BODY(false, false, true, true, _USE_PROBE_SEL, false, false)
   320  		}
   321  	}
   322  
   323  	// {{end}}
   324  	// {{/*
   325  } // */}}
   326  
   327  // {{if .HashTableMode.IsDistinctBuild}}
   328  // {{with .Overloads}}
   329  
   330  func (ht *hashTable) checkColForDistinctTuples(
   331  	probeVec, buildVec coldata.Vec, nToCheck uint64, probeSel []int,
   332  ) {
   333  	switch probeVec.CanonicalTypeFamily() {
   334  	// {{range .LeftFamilies}}
   335  	// {{$leftFamily := .LeftCanonicalFamilyStr}}
   336  	case _LEFT_CANONICAL_TYPE_FAMILY:
   337  		switch probeVec.Type().Width() {
   338  		// {{range .LeftWidths}}
   339  		// {{$leftWidth := .Width}}
   340  		case _LEFT_TYPE_WIDTH:
   341  			switch probeVec.CanonicalTypeFamily() {
   342  			// {{range .RightFamilies}}
   343  			// {{$rightFamily := .RightCanonicalFamilyStr}}
   344  			case _RIGHT_CANONICAL_TYPE_FAMILY:
   345  				switch probeVec.Type().Width() {
   346  				// {{range .RightWidths}}
   347  				// {{$rightWidth := .Width}}
   348  				// {{if and (eq $leftFamily $rightFamily) (eq $leftWidth $rightWidth)}}
   349  				// {{/* We're being this tricky with code generation because we
   350  				//      know that both probeVec and buildVec are of the same
   351  				//      type, so we need to iterate over one level of type
   352  				//      family - width. But, the checkCol function above needs
   353  				//      two layers, so in order to keep these templated
   354  				//      functions in a single file we make sure that we
   355  				//      generate the code only if the "second" level is the
   356  				//      same as the "first" one */}}
   357  				case _RIGHT_TYPE_WIDTH:
   358  					probeKeys := probeVec._ProbeType()
   359  					buildKeys := buildVec._ProbeType()
   360  					if probeSel != nil {
   361  						_CHECK_COL_FOR_DISTINCT_WITH_NULLS(true)
   362  					} else {
   363  						_CHECK_COL_FOR_DISTINCT_WITH_NULLS(false)
   364  					}
   365  					// {{end}}
   366  					// {{end}}
   367  				}
   368  				// {{end}}
   369  			}
   370  			// {{end}}
   371  		}
   372  		// {{end}}
   373  	}
   374  }
   375  
   376  // {{end}}
   377  // {{end}}
   378  
   379  // {{/*
   380  func _CHECK_BODY(_SELECT_SAME_TUPLES bool, _DELETING_PROBE_MODE bool) { // */}}
   381  	// {{define "checkBody" -}}
   382  	for _, toCheck := range ht.probeScratch.toCheck[:nToCheck] {
   383  		if !ht.probeScratch.differs[toCheck] {
   384  			// If the current key matches with the probe key, we want to update headID
   385  			// with the current key if it has not been set yet.
   386  			keyID := ht.probeScratch.groupID[toCheck]
   387  			// {{if .DeletingProbeMode}}
   388  			// We need to check whether this key hasn't been "deleted" (we
   389  			// reuse 'visited' array for tracking which tuples are deleted).
   390  			// TODO(yuzefovich): rather than reusing 'visited' array to have
   391  			// "deleted" marks we could be actually removing tuples' keyIDs
   392  			// from the hash chains. This will require changing our use of
   393  			// singly linked list 'next' to doubly linked list.
   394  			if !ht.visited[keyID] {
   395  				// It hasn't been deleted, so we match it with 'toCheck'
   396  				// probing tuple and "delete" the key.
   397  				ht.probeScratch.headID[toCheck] = keyID
   398  				ht.visited[keyID] = true
   399  			} else {
   400  				// It has been deleted, so we need to continue probing on the
   401  				// next chain if it's not the end of the chain already.
   402  				if keyID != 0 {
   403  					ht.probeScratch.toCheck[nDiffers] = toCheck
   404  					nDiffers++
   405  				}
   406  			}
   407  			continue
   408  			// {{else}}
   409  			if ht.probeScratch.headID[toCheck] == 0 {
   410  				ht.probeScratch.headID[toCheck] = keyID
   411  			}
   412  			// {{if .SelectSameTuples}}
   413  			firstID := ht.probeScratch.headID[toCheck]
   414  			if !ht.visited[keyID] {
   415  				// We can then add this keyID into the same array at the end of the
   416  				// corresponding linked list and mark this ID as visited. Since there
   417  				// can be multiple keys that match this probe key, we want to mark
   418  				// differs at this position to be true. This way, the prober will
   419  				// continue probing for this key until it reaches the end of the next
   420  				// chain.
   421  				ht.probeScratch.differs[toCheck] = true
   422  				ht.visited[keyID] = true
   423  				if firstID != keyID {
   424  					ht.same[keyID] = ht.same[firstID]
   425  					ht.same[firstID] = keyID
   426  				}
   427  			}
   428  			// {{end}}
   429  			// {{end}}
   430  		}
   431  		if ht.probeScratch.differs[toCheck] {
   432  			// Continue probing in this next chain for the probe key.
   433  			ht.probeScratch.differs[toCheck] = false
   434  			ht.probeScratch.toCheck[nDiffers] = toCheck
   435  			nDiffers++
   436  		}
   437  	}
   438  	// {{end}}
   439  	// {{/*
   440  } // */}}
   441  
   442  // {{if .HashTableMode.IsDistinctBuild}}
   443  
   444  // checkBuildForDistinct finds all tuples in probeVecs that are not present in
   445  // buffered tuples stored in ht.vals. It stores the probeVecs's distinct tuples'
   446  // keyIDs in headID buffer.
   447  // NOTE: It assumes that probeVecs does not contain any duplicates itself.
   448  // NOTE: It assumes that probeSel has already been populated and it is not nil.
   449  func (ht *hashTable) checkBuildForDistinct(
   450  	probeVecs []coldata.Vec, nToCheck uint64, probeSel []int,
   451  ) uint64 {
   452  	if probeSel == nil {
   453  		colexecerror.InternalError("invalid selection vector")
   454  	}
   455  	copy(ht.probeScratch.distinct, zeroBoolColumn)
   456  
   457  	ht.checkColsForDistinctTuples(probeVecs, nToCheck, probeSel)
   458  	nDiffers := uint64(0)
   459  	for i := uint64(0); i < nToCheck; i++ {
   460  		if ht.probeScratch.distinct[ht.probeScratch.toCheck[i]] {
   461  			ht.probeScratch.distinct[ht.probeScratch.toCheck[i]] = false
   462  			// Calculated using the convention: keyID = keys.indexOf(key) + 1.
   463  			ht.probeScratch.headID[ht.probeScratch.toCheck[i]] = ht.probeScratch.toCheck[i] + 1
   464  		} else if ht.probeScratch.differs[ht.probeScratch.toCheck[i]] {
   465  			// Continue probing in this next chain for the probe key.
   466  			ht.probeScratch.differs[ht.probeScratch.toCheck[i]] = false
   467  			ht.probeScratch.toCheck[nDiffers] = ht.probeScratch.toCheck[i]
   468  			nDiffers++
   469  		}
   470  	}
   471  	return nDiffers
   472  }
   473  
   474  // {{end}}
   475  
   476  // {{/*
   477  //     Note that both probing modes (when hash table is built in full mode)
   478  //     are handled by the same check() function, so we will generate it only
   479  //     once.
   480  // */}}
   481  // {{if .HashTableMode.IsDeletingProbe}}
   482  
   483  // check performs an equality check between the current key in the groupID bucket
   484  // and the probe key at that index. If there is a match, the hashTable's same
   485  // array is updated to lazily populate the linked list of identical build
   486  // table keys. The visited flag for corresponding build table key is also set. A
   487  // key is removed from toCheck if it has already been visited in a previous
   488  // probe, or the bucket has reached the end (key not found in build table). The
   489  // new length of toCheck is returned by this function.
   490  func (ht *hashTable) check(
   491  	probeVecs []coldata.Vec, buildKeyCols []uint32, nToCheck uint64, probeSel []int,
   492  ) uint64 {
   493  	ht.checkCols(probeVecs, ht.vals.ColVecs(), buildKeyCols, nToCheck, probeSel)
   494  	nDiffers := uint64(0)
   495  	switch ht.probeMode {
   496  	case hashTableDefaultProbeMode:
   497  		_CHECK_BODY(true, false)
   498  	case hashTableDeletingProbeMode:
   499  		_CHECK_BODY(true, true)
   500  	default:
   501  		colexecerror.InternalError("unsupported hash table probe mode")
   502  	}
   503  	return nDiffers
   504  }
   505  
   506  // {{end}}
   507  
   508  // {{if .HashTableMode.IsDistinctBuild}}
   509  
   510  // checkProbeForDistinct performs a column by column check for duplicated tuples
   511  // in the probe table.
   512  func (ht *hashTable) checkProbeForDistinct(vecs []coldata.Vec, nToCheck uint64, sel []int) uint64 {
   513  	for i := range ht.keyCols {
   514  		ht.checkColAgainstItself(vecs[i], nToCheck, sel)
   515  	}
   516  	nDiffers := uint64(0)
   517  	_CHECK_BODY(false, false)
   518  	return nDiffers
   519  }
   520  
   521  // {{end}}
   522  
   523  // {{/*
   524  func _UPDATE_SEL_BODY(_USE_SEL bool) { // */}}
   525  	// {{define "updateSelBody" -}}
   526  	// Reuse the buffer allocated for distinct.
   527  	visited := ht.probeScratch.distinct
   528  	copy(visited, zeroBoolColumn)
   529  	for i := 0; i < b.Length(); i++ {
   530  		if ht.probeScratch.headID[i] != 0 {
   531  			if hasVisited := visited[ht.probeScratch.headID[i]-1]; !hasVisited {
   532  				// {{if .UseSel}}
   533  				sel[distinctCount] = sel[ht.probeScratch.headID[i]-1]
   534  				// {{else}}
   535  				sel[distinctCount] = int(ht.probeScratch.headID[i] - 1)
   536  				// {{end}}
   537  				visited[ht.probeScratch.headID[i]-1] = true
   538  				// Compacting and deduplicating hash buffer.
   539  				ht.probeScratch.hashBuffer[distinctCount] = ht.probeScratch.hashBuffer[i]
   540  				distinctCount++
   541  			}
   542  		}
   543  		ht.probeScratch.headID[i] = 0
   544  		ht.probeScratch.differs[i] = false
   545  	}
   546  	// {{end}}
   547  	// {{/*
   548  } // */}}
   549  
   550  // {{if .HashTableMode.IsDistinctBuild}}
   551  
   552  // updateSel updates the selection vector in the given batch using the headID
   553  // buffer. For each nonzero keyID in headID, it will be translated to the actual
   554  // key index using the convention keyID = keys.indexOf(key) + 1. If the input
   555  // batch's selection vector is nil, the key index will be directly used to
   556  // populate the selection vector. Otherwise, the selection vector's value at the
   557  // key index will be used. The duplicated keyIDs will be discarded. The
   558  // hashBuffer will also compact and discard hash values of duplicated keys.
   559  func (ht *hashTable) updateSel(b coldata.Batch) {
   560  	distinctCount := 0
   561  	if sel := b.Selection(); sel != nil {
   562  		_UPDATE_SEL_BODY(true)
   563  	} else {
   564  		b.SetSelection(true)
   565  		sel = b.Selection()
   566  		_UPDATE_SEL_BODY(false)
   567  	}
   568  	b.SetLength(distinctCount)
   569  }
   570  
   571  // distinctCheck determines if the current key in the groupID bucket matches the
   572  // equality column key. If there is a match, then the key is removed from
   573  // toCheck. If the bucket has reached the end, the key is rejected. The toCheck
   574  // list is reconstructed to only hold the indices of the eqCol keys that have
   575  // not been found. The new length of toCheck is returned by this function.
   576  func (ht *hashTable) distinctCheck(nToCheck uint64, probeSel []int) uint64 {
   577  	probeVecs := ht.probeScratch.keys
   578  	buildVecs := ht.vals.ColVecs()
   579  	buildKeyCols := ht.keyCols
   580  	ht.checkCols(probeVecs, buildVecs, buildKeyCols, nToCheck, probeSel)
   581  	// Select the indices that differ and put them into toCheck.
   582  	nDiffers := uint64(0)
   583  	for i := uint64(0); i < nToCheck; i++ {
   584  		if ht.probeScratch.differs[ht.probeScratch.toCheck[i]] {
   585  			ht.probeScratch.differs[ht.probeScratch.toCheck[i]] = false
   586  			ht.probeScratch.toCheck[nDiffers] = ht.probeScratch.toCheck[i]
   587  			nDiffers++
   588  		}
   589  	}
   590  	return nDiffers
   591  }
   592  
   593  // {{end}}