github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/distinct.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  	"github.com/cockroachdb/cockroach/pkg/util"
    24  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    25  	"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
    26  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    27  	"github.com/cockroachdb/cockroach/pkg/util/stringarena"
    28  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    29  	"github.com/cockroachdb/errors"
    30  	"github.com/opentracing/opentracing-go"
    31  )
    32  
    33  // distinct is the physical processor implementation of the DISTINCT relational operator.
    34  type distinct struct {
    35  	execinfra.ProcessorBase
    36  
    37  	input            execinfra.RowSource
    38  	types            []*types.T
    39  	haveLastGroupKey bool
    40  	lastGroupKey     sqlbase.EncDatumRow
    41  	arena            stringarena.Arena
    42  	seen             map[string]struct{}
    43  	orderedCols      []uint32
    44  	distinctCols     util.FastIntSet
    45  	memAcc           mon.BoundAccount
    46  	datumAlloc       sqlbase.DatumAlloc
    47  	scratch          []byte
    48  	nullsAreDistinct bool
    49  	nullCount        uint32
    50  	errorOnDup       string
    51  }
    52  
    53  // sortedDistinct is a specialized distinct that can be used when all of the
    54  // distinct columns are also ordered.
    55  type sortedDistinct struct {
    56  	distinct
    57  }
    58  
    59  var _ execinfra.Processor = &distinct{}
    60  var _ execinfra.RowSource = &distinct{}
    61  var _ execinfra.OpNode = &distinct{}
    62  
    63  const distinctProcName = "distinct"
    64  
    65  var _ execinfra.Processor = &sortedDistinct{}
    66  var _ execinfra.RowSource = &sortedDistinct{}
    67  var _ execinfra.OpNode = &sortedDistinct{}
    68  
    69  const sortedDistinctProcName = "sorted distinct"
    70  
    71  // newDistinct instantiates a new Distinct processor.
    72  func newDistinct(
    73  	flowCtx *execinfra.FlowCtx,
    74  	processorID int32,
    75  	spec *execinfrapb.DistinctSpec,
    76  	input execinfra.RowSource,
    77  	post *execinfrapb.PostProcessSpec,
    78  	output execinfra.RowReceiver,
    79  ) (execinfra.RowSourcedProcessor, error) {
    80  	if len(spec.DistinctColumns) == 0 {
    81  		return nil, errors.AssertionFailedf("0 distinct columns specified for distinct processor")
    82  	}
    83  
    84  	var distinctCols, orderedCols util.FastIntSet
    85  	allSorted := true
    86  
    87  	for _, col := range spec.OrderedColumns {
    88  		orderedCols.Add(int(col))
    89  	}
    90  	for _, col := range spec.DistinctColumns {
    91  		if !orderedCols.Contains(int(col)) {
    92  			allSorted = false
    93  		}
    94  		distinctCols.Add(int(col))
    95  	}
    96  	if !orderedCols.SubsetOf(distinctCols) {
    97  		return nil, errors.AssertionFailedf("ordered cols must be a subset of distinct cols")
    98  	}
    99  
   100  	ctx := flowCtx.EvalCtx.Ctx()
   101  	memMonitor := execinfra.NewMonitor(ctx, flowCtx.EvalCtx.Mon, "distinct-mem")
   102  	d := &distinct{
   103  		input:            input,
   104  		orderedCols:      spec.OrderedColumns,
   105  		distinctCols:     distinctCols,
   106  		memAcc:           memMonitor.MakeBoundAccount(),
   107  		types:            input.OutputTypes(),
   108  		nullsAreDistinct: spec.NullsAreDistinct,
   109  		errorOnDup:       spec.ErrorOnDup,
   110  	}
   111  
   112  	var returnProcessor execinfra.RowSourcedProcessor = d
   113  	if allSorted {
   114  		// We can use the faster sortedDistinct processor.
   115  		// TODO(asubiotto): We should have a distinctBase, rather than making a copy
   116  		// of a distinct processor.
   117  		sd := &sortedDistinct{
   118  			distinct: distinct{
   119  				input:            input,
   120  				orderedCols:      spec.OrderedColumns,
   121  				distinctCols:     distinctCols,
   122  				memAcc:           memMonitor.MakeBoundAccount(),
   123  				types:            input.OutputTypes(),
   124  				nullsAreDistinct: spec.NullsAreDistinct,
   125  				errorOnDup:       spec.ErrorOnDup,
   126  			},
   127  		}
   128  		// Set d to the new distinct copy for further initialization.
   129  		d = &sd.distinct
   130  		returnProcessor = sd
   131  	}
   132  
   133  	if err := d.Init(
   134  		d, post, d.types, flowCtx, processorID, output, memMonitor, /* memMonitor */
   135  		execinfra.ProcStateOpts{
   136  			InputsToDrain: []execinfra.RowSource{d.input},
   137  			TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata {
   138  				d.close()
   139  				return nil
   140  			},
   141  		}); err != nil {
   142  		return nil, err
   143  	}
   144  	d.lastGroupKey = d.Out.RowAlloc.AllocRow(len(d.types))
   145  	d.haveLastGroupKey = false
   146  	// If we set up the arena when d is created, the pointer to the memAcc
   147  	// will be changed because the sortedDistinct case makes a copy of d.
   148  	// So we have to set up the account here.
   149  	d.arena = stringarena.Make(&d.memAcc)
   150  
   151  	if sp := opentracing.SpanFromContext(ctx); sp != nil && tracing.IsRecording(sp) {
   152  		d.input = newInputStatCollector(d.input)
   153  		d.FinishTrace = d.outputStatsToTrace
   154  	}
   155  
   156  	return returnProcessor, nil
   157  }
   158  
   159  // Start is part of the RowSource interface.
   160  func (d *distinct) Start(ctx context.Context) context.Context {
   161  	d.input.Start(ctx)
   162  	return d.StartInternal(ctx, distinctProcName)
   163  }
   164  
   165  // Start is part of the RowSource interface.
   166  func (d *sortedDistinct) Start(ctx context.Context) context.Context {
   167  	d.input.Start(ctx)
   168  	return d.StartInternal(ctx, sortedDistinctProcName)
   169  }
   170  
   171  func (d *distinct) matchLastGroupKey(row sqlbase.EncDatumRow) (bool, error) {
   172  	if !d.haveLastGroupKey {
   173  		return false, nil
   174  	}
   175  	for _, colIdx := range d.orderedCols {
   176  		res, err := d.lastGroupKey[colIdx].Compare(
   177  			d.types[colIdx], &d.datumAlloc, d.EvalCtx, &row[colIdx],
   178  		)
   179  		if res != 0 || err != nil {
   180  			return false, err
   181  		}
   182  
   183  		// If null values are treated as distinct from one another, then a grouping
   184  		// column with a NULL value means that the row should never match any other
   185  		// row.
   186  		if d.nullsAreDistinct && d.lastGroupKey[colIdx].IsNull() {
   187  			return false, nil
   188  		}
   189  	}
   190  	return true, nil
   191  }
   192  
   193  // encode appends the encoding of non-ordered columns, which we use as a key in
   194  // our 'seen' set.
   195  func (d *distinct) encode(appendTo []byte, row sqlbase.EncDatumRow) ([]byte, error) {
   196  	var err error
   197  	foundNull := false
   198  	for i, datum := range row {
   199  		// Ignore columns that are not in the distinctCols, as if we are
   200  		// post-processing to strip out column Y, we cannot include it as
   201  		// (X1, Y1) and (X1, Y2) will appear as distinct rows, but if we are
   202  		// stripping out Y, we do not want (X1) and (X1) to be in the results.
   203  		if !d.distinctCols.Contains(i) {
   204  			continue
   205  		}
   206  
   207  		appendTo, err = datum.Fingerprint(d.types[i], &d.datumAlloc, appendTo)
   208  		if err != nil {
   209  			return nil, err
   210  		}
   211  
   212  		// If null values are treated as distinct from one another, then append
   213  		// a unique identifier to the end of the encoding, so that the row will
   214  		// always be in its own distinct group.
   215  		if d.nullsAreDistinct && datum.IsNull() {
   216  			foundNull = true
   217  		}
   218  	}
   219  
   220  	if foundNull {
   221  		appendTo = encoding.EncodeUint32Ascending(appendTo, d.nullCount)
   222  		d.nullCount++
   223  	}
   224  
   225  	return appendTo, nil
   226  }
   227  
   228  func (d *distinct) close() {
   229  	if d.InternalClose() {
   230  		d.memAcc.Close(d.Ctx)
   231  		d.MemMonitor.Stop(d.Ctx)
   232  	}
   233  }
   234  
   235  // Next is part of the RowSource interface.
   236  func (d *distinct) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
   237  	for d.State == execinfra.StateRunning {
   238  		row, meta := d.input.Next()
   239  		if meta != nil {
   240  			if meta.Err != nil {
   241  				d.MoveToDraining(nil /* err */)
   242  			}
   243  			return nil, meta
   244  		}
   245  		if row == nil {
   246  			d.MoveToDraining(nil /* err */)
   247  			break
   248  		}
   249  
   250  		// If we are processing DISTINCT(x, y) and the input stream is ordered
   251  		// by x, we define x to be our group key. Our seen set at any given time
   252  		// is only the set of all rows with the same group key. The encoding of
   253  		// the row is the key we use in our 'seen' set.
   254  		encoding, err := d.encode(d.scratch, row)
   255  		if err != nil {
   256  			d.MoveToDraining(err)
   257  			break
   258  		}
   259  		d.scratch = encoding[:0]
   260  
   261  		// The 'seen' set is reset whenever we find consecutive rows differing on the
   262  		// group key thus avoiding the need to store encodings of all rows.
   263  		matched, err := d.matchLastGroupKey(row)
   264  		if err != nil {
   265  			d.MoveToDraining(err)
   266  			break
   267  		}
   268  
   269  		if !matched {
   270  			// Since the sorted distinct columns have changed, we know that all the
   271  			// distinct keys in the 'seen' set will never be seen again. This allows
   272  			// us to keep the current arena block and overwrite strings previously
   273  			// allocated on it, which implies that UnsafeReset() is safe to call here.
   274  			copy(d.lastGroupKey, row)
   275  			d.haveLastGroupKey = true
   276  			if err := d.arena.UnsafeReset(d.Ctx); err != nil {
   277  				d.MoveToDraining(err)
   278  				break
   279  			}
   280  			d.seen = make(map[string]struct{})
   281  		}
   282  
   283  		// Check whether row is distinct.
   284  		if _, ok := d.seen[string(encoding)]; ok {
   285  			if d.errorOnDup != "" {
   286  				// Row is a duplicate input to an Upsert operation, so raise
   287  				// an error.
   288  				//
   289  				// TODO(knz): errorOnDup could be passed via log.Safe() if
   290  				// there was a guarantee that it does not contain PII. Or
   291  				// better yet, the caller would construct an `error` object to
   292  				// return here instead of a string.
   293  				// See: https://github.com/cockroachdb/cockroach/issues/48166
   294  				err = pgerror.Newf(pgcode.CardinalityViolation, "%s", d.errorOnDup)
   295  				d.MoveToDraining(err)
   296  				break
   297  			}
   298  			continue
   299  		}
   300  		s, err := d.arena.AllocBytes(d.Ctx, encoding)
   301  		if err != nil {
   302  			d.MoveToDraining(err)
   303  			break
   304  		}
   305  		d.seen[s] = struct{}{}
   306  
   307  		if outRow := d.ProcessRowHelper(row); outRow != nil {
   308  			return outRow, nil
   309  		}
   310  	}
   311  	return nil, d.DrainHelper()
   312  }
   313  
   314  // Next is part of the RowSource interface.
   315  //
   316  // sortedDistinct is simpler than distinct. All it has to do is keep track
   317  // of the last row it saw, emitting if the new row is different.
   318  func (d *sortedDistinct) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
   319  	for d.State == execinfra.StateRunning {
   320  		row, meta := d.input.Next()
   321  		if meta != nil {
   322  			if meta.Err != nil {
   323  				d.MoveToDraining(nil /* err */)
   324  			}
   325  			return nil, meta
   326  		}
   327  		if row == nil {
   328  			d.MoveToDraining(nil /* err */)
   329  			break
   330  		}
   331  		matched, err := d.matchLastGroupKey(row)
   332  		if err != nil {
   333  			d.MoveToDraining(err)
   334  			break
   335  		}
   336  		if matched {
   337  			if d.errorOnDup != "" {
   338  				// Row is a duplicate input to an Upsert operation, so raise an error.
   339  				// TODO(knz): errorOnDup could be passed via log.Safe() if
   340  				// there was a guarantee that it does not contain PII.
   341  				err = pgerror.Newf(pgcode.CardinalityViolation, "%s", d.errorOnDup)
   342  				d.MoveToDraining(err)
   343  				break
   344  			}
   345  			continue
   346  		}
   347  
   348  		d.haveLastGroupKey = true
   349  		copy(d.lastGroupKey, row)
   350  
   351  		if outRow := d.ProcessRowHelper(row); outRow != nil {
   352  			return outRow, nil
   353  		}
   354  	}
   355  	return nil, d.DrainHelper()
   356  }
   357  
   358  // ConsumerClosed is part of the RowSource interface.
   359  func (d *distinct) ConsumerClosed() {
   360  	// The consumer is done, Next() will not be called again.
   361  	d.close()
   362  }
   363  
   364  var _ execinfrapb.DistSQLSpanStats = &DistinctStats{}
   365  
   366  const distinctTagPrefix = "distinct."
   367  
   368  // Stats implements the SpanStats interface.
   369  func (ds *DistinctStats) Stats() map[string]string {
   370  	inputStatsMap := ds.InputStats.Stats(distinctTagPrefix)
   371  	inputStatsMap[distinctTagPrefix+MaxMemoryTagSuffix] = humanizeutil.IBytes(ds.MaxAllocatedMem)
   372  	return inputStatsMap
   373  }
   374  
   375  // StatsForQueryPlan implements the DistSQLSpanStats interface.
   376  func (ds *DistinctStats) StatsForQueryPlan() []string {
   377  	stats := ds.InputStats.StatsForQueryPlan("")
   378  
   379  	if ds.MaxAllocatedMem != 0 {
   380  		stats = append(stats,
   381  			fmt.Sprintf("%s: %s", MaxMemoryQueryPlanSuffix, humanizeutil.IBytes(ds.MaxAllocatedMem)))
   382  	}
   383  
   384  	return stats
   385  }
   386  
   387  // outputStatsToTrace outputs the collected distinct stats to the trace. Will
   388  // fail silently if the Distinct processor is not collecting stats.
   389  func (d *distinct) outputStatsToTrace() {
   390  	is, ok := getInputStats(d.FlowCtx, d.input)
   391  	if !ok {
   392  		return
   393  	}
   394  	if sp := opentracing.SpanFromContext(d.Ctx); sp != nil {
   395  		tracing.SetSpanStats(
   396  			sp, &DistinctStats{InputStats: is, MaxAllocatedMem: d.MemMonitor.MaximumBytes()},
   397  		)
   398  	}
   399  }
   400  
   401  // ChildCount is part of the execinfra.OpNode interface.
   402  func (d *distinct) ChildCount(verbose bool) int {
   403  	if _, ok := d.input.(execinfra.OpNode); ok {
   404  		return 1
   405  	}
   406  	return 0
   407  }
   408  
   409  // Child is part of the execinfra.OpNode interface.
   410  func (d *distinct) Child(nth int, verbose bool) execinfra.OpNode {
   411  	if nth == 0 {
   412  		if n, ok := d.input.(execinfra.OpNode); ok {
   413  			return n
   414  		}
   415  		panic("input to distinct is not an execinfra.OpNode")
   416  	}
   417  	panic(fmt.Sprintf("invalid index %d", nth))
   418  }