github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/distinct.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "fmt" 16 17 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 18 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 19 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 20 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 21 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 22 "github.com/cockroachdb/cockroach/pkg/sql/types" 23 "github.com/cockroachdb/cockroach/pkg/util" 24 "github.com/cockroachdb/cockroach/pkg/util/encoding" 25 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 26 "github.com/cockroachdb/cockroach/pkg/util/mon" 27 "github.com/cockroachdb/cockroach/pkg/util/stringarena" 28 "github.com/cockroachdb/cockroach/pkg/util/tracing" 29 "github.com/cockroachdb/errors" 30 "github.com/opentracing/opentracing-go" 31 ) 32 33 // distinct is the physical processor implementation of the DISTINCT relational operator. 34 type distinct struct { 35 execinfra.ProcessorBase 36 37 input execinfra.RowSource 38 types []*types.T 39 haveLastGroupKey bool 40 lastGroupKey sqlbase.EncDatumRow 41 arena stringarena.Arena 42 seen map[string]struct{} 43 orderedCols []uint32 44 distinctCols util.FastIntSet 45 memAcc mon.BoundAccount 46 datumAlloc sqlbase.DatumAlloc 47 scratch []byte 48 nullsAreDistinct bool 49 nullCount uint32 50 errorOnDup string 51 } 52 53 // sortedDistinct is a specialized distinct that can be used when all of the 54 // distinct columns are also ordered. 55 type sortedDistinct struct { 56 distinct 57 } 58 59 var _ execinfra.Processor = &distinct{} 60 var _ execinfra.RowSource = &distinct{} 61 var _ execinfra.OpNode = &distinct{} 62 63 const distinctProcName = "distinct" 64 65 var _ execinfra.Processor = &sortedDistinct{} 66 var _ execinfra.RowSource = &sortedDistinct{} 67 var _ execinfra.OpNode = &sortedDistinct{} 68 69 const sortedDistinctProcName = "sorted distinct" 70 71 // newDistinct instantiates a new Distinct processor. 72 func newDistinct( 73 flowCtx *execinfra.FlowCtx, 74 processorID int32, 75 spec *execinfrapb.DistinctSpec, 76 input execinfra.RowSource, 77 post *execinfrapb.PostProcessSpec, 78 output execinfra.RowReceiver, 79 ) (execinfra.RowSourcedProcessor, error) { 80 if len(spec.DistinctColumns) == 0 { 81 return nil, errors.AssertionFailedf("0 distinct columns specified for distinct processor") 82 } 83 84 var distinctCols, orderedCols util.FastIntSet 85 allSorted := true 86 87 for _, col := range spec.OrderedColumns { 88 orderedCols.Add(int(col)) 89 } 90 for _, col := range spec.DistinctColumns { 91 if !orderedCols.Contains(int(col)) { 92 allSorted = false 93 } 94 distinctCols.Add(int(col)) 95 } 96 if !orderedCols.SubsetOf(distinctCols) { 97 return nil, errors.AssertionFailedf("ordered cols must be a subset of distinct cols") 98 } 99 100 ctx := flowCtx.EvalCtx.Ctx() 101 memMonitor := execinfra.NewMonitor(ctx, flowCtx.EvalCtx.Mon, "distinct-mem") 102 d := &distinct{ 103 input: input, 104 orderedCols: spec.OrderedColumns, 105 distinctCols: distinctCols, 106 memAcc: memMonitor.MakeBoundAccount(), 107 types: input.OutputTypes(), 108 nullsAreDistinct: spec.NullsAreDistinct, 109 errorOnDup: spec.ErrorOnDup, 110 } 111 112 var returnProcessor execinfra.RowSourcedProcessor = d 113 if allSorted { 114 // We can use the faster sortedDistinct processor. 115 // TODO(asubiotto): We should have a distinctBase, rather than making a copy 116 // of a distinct processor. 117 sd := &sortedDistinct{ 118 distinct: distinct{ 119 input: input, 120 orderedCols: spec.OrderedColumns, 121 distinctCols: distinctCols, 122 memAcc: memMonitor.MakeBoundAccount(), 123 types: input.OutputTypes(), 124 nullsAreDistinct: spec.NullsAreDistinct, 125 errorOnDup: spec.ErrorOnDup, 126 }, 127 } 128 // Set d to the new distinct copy for further initialization. 129 d = &sd.distinct 130 returnProcessor = sd 131 } 132 133 if err := d.Init( 134 d, post, d.types, flowCtx, processorID, output, memMonitor, /* memMonitor */ 135 execinfra.ProcStateOpts{ 136 InputsToDrain: []execinfra.RowSource{d.input}, 137 TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata { 138 d.close() 139 return nil 140 }, 141 }); err != nil { 142 return nil, err 143 } 144 d.lastGroupKey = d.Out.RowAlloc.AllocRow(len(d.types)) 145 d.haveLastGroupKey = false 146 // If we set up the arena when d is created, the pointer to the memAcc 147 // will be changed because the sortedDistinct case makes a copy of d. 148 // So we have to set up the account here. 149 d.arena = stringarena.Make(&d.memAcc) 150 151 if sp := opentracing.SpanFromContext(ctx); sp != nil && tracing.IsRecording(sp) { 152 d.input = newInputStatCollector(d.input) 153 d.FinishTrace = d.outputStatsToTrace 154 } 155 156 return returnProcessor, nil 157 } 158 159 // Start is part of the RowSource interface. 160 func (d *distinct) Start(ctx context.Context) context.Context { 161 d.input.Start(ctx) 162 return d.StartInternal(ctx, distinctProcName) 163 } 164 165 // Start is part of the RowSource interface. 166 func (d *sortedDistinct) Start(ctx context.Context) context.Context { 167 d.input.Start(ctx) 168 return d.StartInternal(ctx, sortedDistinctProcName) 169 } 170 171 func (d *distinct) matchLastGroupKey(row sqlbase.EncDatumRow) (bool, error) { 172 if !d.haveLastGroupKey { 173 return false, nil 174 } 175 for _, colIdx := range d.orderedCols { 176 res, err := d.lastGroupKey[colIdx].Compare( 177 d.types[colIdx], &d.datumAlloc, d.EvalCtx, &row[colIdx], 178 ) 179 if res != 0 || err != nil { 180 return false, err 181 } 182 183 // If null values are treated as distinct from one another, then a grouping 184 // column with a NULL value means that the row should never match any other 185 // row. 186 if d.nullsAreDistinct && d.lastGroupKey[colIdx].IsNull() { 187 return false, nil 188 } 189 } 190 return true, nil 191 } 192 193 // encode appends the encoding of non-ordered columns, which we use as a key in 194 // our 'seen' set. 195 func (d *distinct) encode(appendTo []byte, row sqlbase.EncDatumRow) ([]byte, error) { 196 var err error 197 foundNull := false 198 for i, datum := range row { 199 // Ignore columns that are not in the distinctCols, as if we are 200 // post-processing to strip out column Y, we cannot include it as 201 // (X1, Y1) and (X1, Y2) will appear as distinct rows, but if we are 202 // stripping out Y, we do not want (X1) and (X1) to be in the results. 203 if !d.distinctCols.Contains(i) { 204 continue 205 } 206 207 appendTo, err = datum.Fingerprint(d.types[i], &d.datumAlloc, appendTo) 208 if err != nil { 209 return nil, err 210 } 211 212 // If null values are treated as distinct from one another, then append 213 // a unique identifier to the end of the encoding, so that the row will 214 // always be in its own distinct group. 215 if d.nullsAreDistinct && datum.IsNull() { 216 foundNull = true 217 } 218 } 219 220 if foundNull { 221 appendTo = encoding.EncodeUint32Ascending(appendTo, d.nullCount) 222 d.nullCount++ 223 } 224 225 return appendTo, nil 226 } 227 228 func (d *distinct) close() { 229 if d.InternalClose() { 230 d.memAcc.Close(d.Ctx) 231 d.MemMonitor.Stop(d.Ctx) 232 } 233 } 234 235 // Next is part of the RowSource interface. 236 func (d *distinct) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 237 for d.State == execinfra.StateRunning { 238 row, meta := d.input.Next() 239 if meta != nil { 240 if meta.Err != nil { 241 d.MoveToDraining(nil /* err */) 242 } 243 return nil, meta 244 } 245 if row == nil { 246 d.MoveToDraining(nil /* err */) 247 break 248 } 249 250 // If we are processing DISTINCT(x, y) and the input stream is ordered 251 // by x, we define x to be our group key. Our seen set at any given time 252 // is only the set of all rows with the same group key. The encoding of 253 // the row is the key we use in our 'seen' set. 254 encoding, err := d.encode(d.scratch, row) 255 if err != nil { 256 d.MoveToDraining(err) 257 break 258 } 259 d.scratch = encoding[:0] 260 261 // The 'seen' set is reset whenever we find consecutive rows differing on the 262 // group key thus avoiding the need to store encodings of all rows. 263 matched, err := d.matchLastGroupKey(row) 264 if err != nil { 265 d.MoveToDraining(err) 266 break 267 } 268 269 if !matched { 270 // Since the sorted distinct columns have changed, we know that all the 271 // distinct keys in the 'seen' set will never be seen again. This allows 272 // us to keep the current arena block and overwrite strings previously 273 // allocated on it, which implies that UnsafeReset() is safe to call here. 274 copy(d.lastGroupKey, row) 275 d.haveLastGroupKey = true 276 if err := d.arena.UnsafeReset(d.Ctx); err != nil { 277 d.MoveToDraining(err) 278 break 279 } 280 d.seen = make(map[string]struct{}) 281 } 282 283 // Check whether row is distinct. 284 if _, ok := d.seen[string(encoding)]; ok { 285 if d.errorOnDup != "" { 286 // Row is a duplicate input to an Upsert operation, so raise 287 // an error. 288 // 289 // TODO(knz): errorOnDup could be passed via log.Safe() if 290 // there was a guarantee that it does not contain PII. Or 291 // better yet, the caller would construct an `error` object to 292 // return here instead of a string. 293 // See: https://github.com/cockroachdb/cockroach/issues/48166 294 err = pgerror.Newf(pgcode.CardinalityViolation, "%s", d.errorOnDup) 295 d.MoveToDraining(err) 296 break 297 } 298 continue 299 } 300 s, err := d.arena.AllocBytes(d.Ctx, encoding) 301 if err != nil { 302 d.MoveToDraining(err) 303 break 304 } 305 d.seen[s] = struct{}{} 306 307 if outRow := d.ProcessRowHelper(row); outRow != nil { 308 return outRow, nil 309 } 310 } 311 return nil, d.DrainHelper() 312 } 313 314 // Next is part of the RowSource interface. 315 // 316 // sortedDistinct is simpler than distinct. All it has to do is keep track 317 // of the last row it saw, emitting if the new row is different. 318 func (d *sortedDistinct) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 319 for d.State == execinfra.StateRunning { 320 row, meta := d.input.Next() 321 if meta != nil { 322 if meta.Err != nil { 323 d.MoveToDraining(nil /* err */) 324 } 325 return nil, meta 326 } 327 if row == nil { 328 d.MoveToDraining(nil /* err */) 329 break 330 } 331 matched, err := d.matchLastGroupKey(row) 332 if err != nil { 333 d.MoveToDraining(err) 334 break 335 } 336 if matched { 337 if d.errorOnDup != "" { 338 // Row is a duplicate input to an Upsert operation, so raise an error. 339 // TODO(knz): errorOnDup could be passed via log.Safe() if 340 // there was a guarantee that it does not contain PII. 341 err = pgerror.Newf(pgcode.CardinalityViolation, "%s", d.errorOnDup) 342 d.MoveToDraining(err) 343 break 344 } 345 continue 346 } 347 348 d.haveLastGroupKey = true 349 copy(d.lastGroupKey, row) 350 351 if outRow := d.ProcessRowHelper(row); outRow != nil { 352 return outRow, nil 353 } 354 } 355 return nil, d.DrainHelper() 356 } 357 358 // ConsumerClosed is part of the RowSource interface. 359 func (d *distinct) ConsumerClosed() { 360 // The consumer is done, Next() will not be called again. 361 d.close() 362 } 363 364 var _ execinfrapb.DistSQLSpanStats = &DistinctStats{} 365 366 const distinctTagPrefix = "distinct." 367 368 // Stats implements the SpanStats interface. 369 func (ds *DistinctStats) Stats() map[string]string { 370 inputStatsMap := ds.InputStats.Stats(distinctTagPrefix) 371 inputStatsMap[distinctTagPrefix+MaxMemoryTagSuffix] = humanizeutil.IBytes(ds.MaxAllocatedMem) 372 return inputStatsMap 373 } 374 375 // StatsForQueryPlan implements the DistSQLSpanStats interface. 376 func (ds *DistinctStats) StatsForQueryPlan() []string { 377 stats := ds.InputStats.StatsForQueryPlan("") 378 379 if ds.MaxAllocatedMem != 0 { 380 stats = append(stats, 381 fmt.Sprintf("%s: %s", MaxMemoryQueryPlanSuffix, humanizeutil.IBytes(ds.MaxAllocatedMem))) 382 } 383 384 return stats 385 } 386 387 // outputStatsToTrace outputs the collected distinct stats to the trace. Will 388 // fail silently if the Distinct processor is not collecting stats. 389 func (d *distinct) outputStatsToTrace() { 390 is, ok := getInputStats(d.FlowCtx, d.input) 391 if !ok { 392 return 393 } 394 if sp := opentracing.SpanFromContext(d.Ctx); sp != nil { 395 tracing.SetSpanStats( 396 sp, &DistinctStats{InputStats: is, MaxAllocatedMem: d.MemMonitor.MaximumBytes()}, 397 ) 398 } 399 } 400 401 // ChildCount is part of the execinfra.OpNode interface. 402 func (d *distinct) ChildCount(verbose bool) int { 403 if _, ok := d.input.(execinfra.OpNode); ok { 404 return 1 405 } 406 return 0 407 } 408 409 // Child is part of the execinfra.OpNode interface. 410 func (d *distinct) Child(nth int, verbose bool) execinfra.OpNode { 411 if nth == 0 { 412 if n, ok := d.input.(execinfra.OpNode); ok { 413 return n 414 } 415 panic("input to distinct is not an execinfra.OpNode") 416 } 417 panic(fmt.Sprintf("invalid index %d", nth)) 418 }