github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/distinct_tmpl.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // {{/*
    12  // +build execgen_template
    13  //
    14  // This file is the execgen template for distinct.eg.go. It's formatted in a
    15  // special way, so it's both valid Go and a valid text/template input. This
    16  // permits editing this file with editor support.
    17  //
    18  // */}}
    19  
    20  package colexec
    21  
    22  import (
    23  	"context"
    24  
    25  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    26  	"github.com/cockroachdb/cockroach/pkg/col/typeconv"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/colexec/execgen"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    31  	"github.com/cockroachdb/errors"
    32  )
    33  
    34  // OrderedDistinctColsToOperators is a utility function that given an input and
    35  // a slice of columns, creates a chain of distinct operators and returns the
    36  // last distinct operator in that chain as well as its output column.
    37  func OrderedDistinctColsToOperators(
    38  	input colexecbase.Operator, distinctCols []uint32, typs []*types.T,
    39  ) (colexecbase.Operator, []bool, error) {
    40  	distinctCol := make([]bool, coldata.BatchSize())
    41  	// zero the boolean column on every iteration.
    42  	input = fnOp{
    43  		OneInputNode: NewOneInputNode(input),
    44  		fn:           func() { copy(distinctCol, zeroBoolColumn) },
    45  	}
    46  	var (
    47  		err error
    48  		r   resettableOperator
    49  		ok  bool
    50  	)
    51  	for i := range distinctCols {
    52  		input, err = newSingleDistinct(input, int(distinctCols[i]), distinctCol, typs[distinctCols[i]])
    53  		if err != nil {
    54  			return nil, nil, err
    55  		}
    56  	}
    57  	if r, ok = input.(resettableOperator); !ok {
    58  		colexecerror.InternalError("unexpectedly an ordered distinct is not a resetter")
    59  	}
    60  	distinctChain := &distinctChainOps{
    61  		resettableOperator: r,
    62  	}
    63  	return distinctChain, distinctCol, nil
    64  }
    65  
    66  type distinctChainOps struct {
    67  	resettableOperator
    68  }
    69  
    70  var _ resettableOperator = &distinctChainOps{}
    71  
    72  // NewOrderedDistinct creates a new ordered distinct operator on the given
    73  // input columns with the given types.
    74  func NewOrderedDistinct(
    75  	input colexecbase.Operator, distinctCols []uint32, typs []*types.T,
    76  ) (colexecbase.Operator, error) {
    77  	op, outputCol, err := OrderedDistinctColsToOperators(input, distinctCols, typs)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  	return &boolVecToSelOp{
    82  		OneInputNode: NewOneInputNode(op),
    83  		outputCol:    outputCol,
    84  	}, nil
    85  }
    86  
    87  // Remove unused warning.
    88  var _ = execgen.UNSAFEGET
    89  
    90  // {{/*
    91  
    92  // Declarations to make the template compile properly.
    93  
    94  // _GOTYPE is the template variable.
    95  type _GOTYPE interface{}
    96  
    97  // _GOTYPESLICE is the template variable.
    98  type _GOTYPESLICE interface{}
    99  
   100  // _ASSIGN_NE is the template equality function for assigning the first input
   101  // to the result of the second input != the third input.
   102  func _ASSIGN_NE(_ bool, _, _, _, _, _ _GOTYPE) bool {
   103  	colexecerror.InternalError("")
   104  }
   105  
   106  // _CANONICAL_TYPE_FAMILY is the template variable.
   107  const _CANONICAL_TYPE_FAMILY = types.UnknownFamily
   108  
   109  // _TYPE_WIDTH is the template variable.
   110  const _TYPE_WIDTH = 0
   111  
   112  // */}}
   113  
   114  func newSingleDistinct(
   115  	input colexecbase.Operator, distinctColIdx int, outputCol []bool, t *types.T,
   116  ) (colexecbase.Operator, error) {
   117  	switch typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) {
   118  	// {{range .}}
   119  	case _CANONICAL_TYPE_FAMILY:
   120  		switch t.Width() {
   121  		// {{range .WidthOverloads}}
   122  		case _TYPE_WIDTH:
   123  			return &distinct_TYPEOp{
   124  				OneInputNode:   NewOneInputNode(input),
   125  				distinctColIdx: distinctColIdx,
   126  				outputCol:      outputCol,
   127  			}, nil
   128  			// {{end}}
   129  		}
   130  		// {{end}}
   131  	}
   132  	return nil, errors.Errorf("unsupported distinct type %s", t)
   133  }
   134  
   135  // partitioner is a simple implementation of sorted distinct that's useful for
   136  // other operators that need to partition an arbitrarily-sized Vec.
   137  type partitioner interface {
   138  	// partition partitions the input colVec of size n, writing true to the
   139  	// outputCol for every value that differs from the previous one.
   140  	partition(colVec coldata.Vec, outputCol []bool, n int)
   141  
   142  	// partitionWithOrder is like partition, except it performs the partitioning
   143  	// on the input Vec as if it were ordered via the input order vector, which is
   144  	// a selection vector. The output is written in absolute order, however. For
   145  	// example, with an input vector [a,b,b] and an order vector [1,2,0], which
   146  	// implies a reordered input vector [b,b,a], the resultant outputCol would be
   147  	// [true, false, true], indicating a distinct value at the 0th and 2nd
   148  	// elements.
   149  	partitionWithOrder(colVec coldata.Vec, order []int, outputCol []bool, n int)
   150  }
   151  
   152  // newPartitioner returns a new partitioner on type t.
   153  func newPartitioner(t *types.T) (partitioner, error) {
   154  	switch typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) {
   155  	// {{range .}}
   156  	case _CANONICAL_TYPE_FAMILY:
   157  		switch t.Width() {
   158  		// {{range .WidthOverloads}}
   159  		case _TYPE_WIDTH:
   160  			return partitioner_TYPE{}, nil
   161  			// {{end}}
   162  		}
   163  		// {{end}}
   164  	}
   165  	return nil, errors.Errorf("unsupported partition type %s", t)
   166  }
   167  
   168  // {{range .}}
   169  // {{range .WidthOverloads}}
   170  
   171  // distinct_TYPEOp runs a distinct on the column in distinctColIdx, writing
   172  // true to the resultant bool column for every value that differs from the
   173  // previous one.
   174  type distinct_TYPEOp struct {
   175  	OneInputNode
   176  
   177  	// distinctColIdx is the index of the column to distinct upon.
   178  	distinctColIdx int
   179  
   180  	// outputCol is the boolean output column. It is shared by all of the
   181  	// other distinct operators in a distinct operator set.
   182  	outputCol []bool
   183  
   184  	// Set to true at runtime when we've seen the first row. Distinct always
   185  	// outputs the first row that it sees.
   186  	foundFirstRow bool
   187  
   188  	// lastVal is the last value seen by the operator, so that the distincting
   189  	// still works across batch boundaries.
   190  	lastVal     _GOTYPE
   191  	lastValNull bool
   192  }
   193  
   194  var _ resettableOperator = &distinct_TYPEOp{}
   195  
   196  func (p *distinct_TYPEOp) Init() {
   197  	p.input.Init()
   198  }
   199  
   200  func (p *distinct_TYPEOp) reset(ctx context.Context) {
   201  	p.foundFirstRow = false
   202  	p.lastValNull = false
   203  	if resetter, ok := p.input.(resetter); ok {
   204  		resetter.reset(ctx)
   205  	}
   206  }
   207  
   208  func (p *distinct_TYPEOp) Next(ctx context.Context) coldata.Batch {
   209  	batch := p.input.Next(ctx)
   210  	if batch.Length() == 0 {
   211  		return batch
   212  	}
   213  	outputCol := p.outputCol
   214  	vec := batch.ColVec(p.distinctColIdx)
   215  	var nulls *coldata.Nulls
   216  	if vec.MaybeHasNulls() {
   217  		nulls = vec.Nulls()
   218  	}
   219  	col := vec.TemplateType()
   220  
   221  	// We always output the first row.
   222  	lastVal := p.lastVal
   223  	lastValNull := p.lastValNull
   224  	sel := batch.Selection()
   225  	firstIdx := 0
   226  	if sel != nil {
   227  		firstIdx = sel[0]
   228  	}
   229  	if !p.foundFirstRow {
   230  		outputCol[firstIdx] = true
   231  		p.foundFirstRow = true
   232  	} else if nulls == nil && lastValNull {
   233  		// The last value of the previous batch was null, so the first value of this
   234  		// non-null batch is distinct.
   235  		outputCol[firstIdx] = true
   236  		lastValNull = false
   237  	}
   238  
   239  	n := batch.Length()
   240  	if sel != nil {
   241  		// Bounds check elimination.
   242  		sel = sel[:n]
   243  		if nulls != nil {
   244  			for _, checkIdx := range sel {
   245  				outputIdx := checkIdx
   246  				_CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol)
   247  			}
   248  		} else {
   249  			for _, checkIdx := range sel {
   250  				outputIdx := checkIdx
   251  				_CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol)
   252  			}
   253  		}
   254  	} else {
   255  		// Bounds check elimination.
   256  		col = execgen.SLICE(col, 0, n)
   257  		outputCol = outputCol[:n]
   258  		_ = outputCol[n-1]
   259  		if nulls != nil {
   260  			for execgen.RANGE(checkIdx, col, 0, n) {
   261  				outputIdx := checkIdx
   262  				_CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol)
   263  			}
   264  		} else {
   265  			for execgen.RANGE(checkIdx, col, 0, n) {
   266  				outputIdx := checkIdx
   267  				_CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol)
   268  			}
   269  		}
   270  	}
   271  
   272  	p.lastVal = lastVal
   273  	p.lastValNull = lastValNull
   274  
   275  	return batch
   276  }
   277  
   278  // partitioner_TYPE partitions an arbitrary-length colVec by running a distinct
   279  // operation over it. It writes the same format to outputCol that sorted
   280  // distinct does: true for every row that differs from the previous row in the
   281  // input column.
   282  type partitioner_TYPE struct{}
   283  
   284  func (p partitioner_TYPE) partitionWithOrder(
   285  	colVec coldata.Vec, order []int, outputCol []bool, n int,
   286  ) {
   287  	var lastVal _GOTYPE
   288  	var lastValNull bool
   289  	var nulls *coldata.Nulls
   290  	if colVec.MaybeHasNulls() {
   291  		nulls = colVec.Nulls()
   292  	}
   293  
   294  	col := colVec.TemplateType()
   295  	col = execgen.SLICE(col, 0, n)
   296  	outputCol = outputCol[:n]
   297  	outputCol[0] = true
   298  	if nulls != nil {
   299  		for outputIdx, checkIdx := range order {
   300  			_CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol)
   301  		}
   302  	} else {
   303  		for outputIdx, checkIdx := range order {
   304  			_CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol)
   305  		}
   306  	}
   307  }
   308  
   309  func (p partitioner_TYPE) partition(colVec coldata.Vec, outputCol []bool, n int) {
   310  	var (
   311  		lastVal     _GOTYPE
   312  		lastValNull bool
   313  		nulls       *coldata.Nulls
   314  	)
   315  	if colVec.MaybeHasNulls() {
   316  		nulls = colVec.Nulls()
   317  	}
   318  
   319  	col := colVec.TemplateType()
   320  	col = execgen.SLICE(col, 0, n)
   321  	outputCol = outputCol[:n]
   322  	outputCol[0] = true
   323  	if nulls != nil {
   324  		for execgen.RANGE(checkIdx, col, 0, n) {
   325  			outputIdx := checkIdx
   326  			_CHECK_DISTINCT_WITH_NULLS(checkIdx, outputIdx, lastVal, nulls, lastValNull, col, outputCol)
   327  		}
   328  	} else {
   329  		for execgen.RANGE(checkIdx, col, 0, n) {
   330  			outputIdx := checkIdx
   331  			_CHECK_DISTINCT(checkIdx, outputIdx, lastVal, col, outputCol)
   332  		}
   333  	}
   334  }
   335  
   336  // {{end}}
   337  // {{end}}
   338  
   339  // {{/*
   340  // _CHECK_DISTINCT retrieves the value at the ith index of col, compares it
   341  // to the passed in lastVal, and sets the ith value of outputCol to true if the
   342  // compared values were distinct. It presumes that the current batch has no null
   343  // values.
   344  func _CHECK_DISTINCT(
   345  	checkIdx int, outputIdx int, lastVal _GOTYPE, col []_GOTYPE, outputCol []bool,
   346  ) { // */}}
   347  
   348  	// {{define "checkDistinct" -}}
   349  	// {{with .Global}}
   350  	v := execgen.UNSAFEGET(col, checkIdx)
   351  	var unique bool
   352  	_ASSIGN_NE(unique, v, lastVal, _, col, _)
   353  	outputCol[outputIdx] = outputCol[outputIdx] || unique
   354  	execgen.COPYVAL(lastVal, v)
   355  	// {{end}}
   356  	// {{end}}
   357  
   358  	// {{/*
   359  } // */}}
   360  
   361  // {{/*
   362  // _CHECK_DISTINCT_WITH_NULLS behaves the same as _CHECK_DISTINCT, but it also
   363  // considers whether the previous and current values are null. It assumes that
   364  // `nulls` is non-nil.
   365  func _CHECK_DISTINCT_WITH_NULLS(
   366  	checkIdx int,
   367  	outputIdx int,
   368  	lastVal _GOTYPE,
   369  	nulls *coldata.Nulls,
   370  	lastValNull bool,
   371  	col []_GOTYPE,
   372  	outputCol []bool,
   373  ) { // */}}
   374  
   375  	// {{define "checkDistinctWithNulls" -}}
   376  	// {{with .Global}}
   377  	null := nulls.NullAt(checkIdx)
   378  	if null {
   379  		if !lastValNull {
   380  			// The current value is null while the previous was not.
   381  			outputCol[outputIdx] = true
   382  		}
   383  	} else {
   384  		v := execgen.UNSAFEGET(col, checkIdx)
   385  		if lastValNull {
   386  			// The previous value was null while the current is not.
   387  			outputCol[outputIdx] = true
   388  		} else {
   389  			// Neither value is null, so we must compare.
   390  			var unique bool
   391  			_ASSIGN_NE(unique, v, lastVal, _, col, _)
   392  			outputCol[outputIdx] = outputCol[outputIdx] || unique
   393  		}
   394  		execgen.COPYVAL(lastVal, v)
   395  	}
   396  	lastValNull = null
   397  	// {{end}}
   398  	// {{end}}
   399  
   400  	// {{/*
   401  } // */}}