github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sorter.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 18 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 20 "github.com/cockroachdb/cockroach/pkg/sql/rowcontainer" 21 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 22 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 23 "github.com/cockroachdb/cockroach/pkg/util/mon" 24 "github.com/cockroachdb/cockroach/pkg/util/tracing" 25 "github.com/cockroachdb/errors" 26 "github.com/opentracing/opentracing-go" 27 ) 28 29 // sorter sorts the input rows according to the specified ordering. 30 type sorterBase struct { 31 execinfra.ProcessorBase 32 33 input execinfra.RowSource 34 ordering sqlbase.ColumnOrdering 35 matchLen uint32 36 37 rows rowcontainer.SortableRowContainer 38 i rowcontainer.RowIterator 39 40 // Only set if the ability to spill to disk is enabled. 41 diskMonitor *mon.BytesMonitor 42 } 43 44 func (s *sorterBase) init( 45 self execinfra.RowSource, 46 flowCtx *execinfra.FlowCtx, 47 processorID int32, 48 processorName string, 49 input execinfra.RowSource, 50 post *execinfrapb.PostProcessSpec, 51 output execinfra.RowReceiver, 52 ordering sqlbase.ColumnOrdering, 53 matchLen uint32, 54 opts execinfra.ProcStateOpts, 55 ) error { 56 ctx := flowCtx.EvalCtx.Ctx() 57 if sp := opentracing.SpanFromContext(ctx); sp != nil && tracing.IsRecording(sp) { 58 input = newInputStatCollector(input) 59 s.FinishTrace = s.outputStatsToTrace 60 } 61 62 // Limit the memory use by creating a child monitor with a hard limit. 63 // The processor will overflow to disk if this limit is not enough. 64 memMonitor := execinfra.NewLimitedMonitor(ctx, flowCtx.EvalCtx.Mon, flowCtx.Cfg, fmt.Sprintf("%s-limited", processorName)) 65 if err := s.ProcessorBase.Init( 66 self, post, input.OutputTypes(), flowCtx, processorID, output, memMonitor, opts, 67 ); err != nil { 68 memMonitor.Stop(ctx) 69 return err 70 } 71 72 s.diskMonitor = execinfra.NewMonitor(ctx, flowCtx.Cfg.DiskMonitor, fmt.Sprintf("%s-disk", processorName)) 73 rc := rowcontainer.DiskBackedRowContainer{} 74 rc.Init( 75 ordering, 76 input.OutputTypes(), 77 s.EvalCtx, 78 flowCtx.Cfg.TempStorage, 79 memMonitor, 80 s.diskMonitor, 81 0, /* rowCapacity */ 82 ) 83 s.rows = &rc 84 85 s.input = input 86 s.ordering = ordering 87 s.matchLen = matchLen 88 return nil 89 } 90 91 // Next is part of the RowSource interface. It is extracted into sorterBase 92 // because this implementation of next is shared between the sortAllProcessor 93 // and the sortTopKProcessor. 94 func (s *sorterBase) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 95 for s.State == execinfra.StateRunning { 96 if ok, err := s.i.Valid(); err != nil || !ok { 97 s.MoveToDraining(err) 98 break 99 } 100 101 row, err := s.i.Row() 102 if err != nil { 103 s.MoveToDraining(err) 104 break 105 } 106 s.i.Next() 107 108 if outRow := s.ProcessRowHelper(row); outRow != nil { 109 return outRow, nil 110 } 111 } 112 return nil, s.DrainHelper() 113 } 114 115 func (s *sorterBase) close() { 116 // We are done sorting rows, close the iterator we have open. 117 if s.InternalClose() { 118 if s.i != nil { 119 s.i.Close() 120 } 121 ctx := s.Ctx 122 s.rows.Close(ctx) 123 s.MemMonitor.Stop(ctx) 124 if s.diskMonitor != nil { 125 s.diskMonitor.Stop(ctx) 126 } 127 } 128 } 129 130 var _ execinfrapb.DistSQLSpanStats = &SorterStats{} 131 132 const sorterTagPrefix = "sorter." 133 134 // Stats implements the SpanStats interface. 135 func (ss *SorterStats) Stats() map[string]string { 136 statsMap := ss.InputStats.Stats(sorterTagPrefix) 137 statsMap[sorterTagPrefix+MaxMemoryTagSuffix] = humanizeutil.IBytes(ss.MaxAllocatedMem) 138 statsMap[sorterTagPrefix+MaxDiskTagSuffix] = humanizeutil.IBytes(ss.MaxAllocatedDisk) 139 return statsMap 140 } 141 142 // StatsForQueryPlan implements the DistSQLSpanStats interface. 143 func (ss *SorterStats) StatsForQueryPlan() []string { 144 stats := ss.InputStats.StatsForQueryPlan("" /* prefix */) 145 146 if ss.MaxAllocatedMem != 0 { 147 stats = append(stats, 148 fmt.Sprintf("%s: %s", MaxMemoryQueryPlanSuffix, humanizeutil.IBytes(ss.MaxAllocatedMem))) 149 } 150 151 if ss.MaxAllocatedDisk != 0 { 152 stats = append(stats, 153 fmt.Sprintf("%s: %s", MaxDiskQueryPlanSuffix, humanizeutil.IBytes(ss.MaxAllocatedDisk))) 154 } 155 156 return stats 157 } 158 159 // outputStatsToTrace outputs the collected sorter stats to the trace. Will fail 160 // silently if stats are not being collected. 161 func (s *sorterBase) outputStatsToTrace() { 162 is, ok := getInputStats(s.FlowCtx, s.input) 163 if !ok { 164 return 165 } 166 if sp := opentracing.SpanFromContext(s.Ctx); sp != nil { 167 tracing.SetSpanStats( 168 sp, 169 &SorterStats{ 170 InputStats: is, 171 MaxAllocatedMem: s.MemMonitor.MaximumBytes(), 172 MaxAllocatedDisk: s.diskMonitor.MaximumBytes(), 173 }, 174 ) 175 } 176 } 177 178 func newSorter( 179 ctx context.Context, 180 flowCtx *execinfra.FlowCtx, 181 processorID int32, 182 spec *execinfrapb.SorterSpec, 183 input execinfra.RowSource, 184 post *execinfrapb.PostProcessSpec, 185 output execinfra.RowReceiver, 186 ) (execinfra.Processor, error) { 187 count := uint64(0) 188 if post.Limit != 0 && post.Filter.Empty() { 189 // The sorter needs to produce Offset + Limit rows. The ProcOutputHelper 190 // will discard the first Offset ones. 191 // LIMIT and OFFSET should each never be greater than math.MaxInt64, the 192 // parser ensures this. 193 if post.Limit > math.MaxInt64 || post.Offset > math.MaxInt64 { 194 return nil, errors.AssertionFailedf( 195 "error creating sorter: limit %d offset %d too large", 196 errors.Safe(post.Limit), errors.Safe(post.Offset)) 197 } 198 count = post.Limit + post.Offset 199 } 200 201 // Choose the optimal processor. 202 if spec.OrderingMatchLen == 0 { 203 if count == 0 { 204 // No specified ordering match length and unspecified limit; no 205 // optimizations are possible so we simply load all rows into memory and 206 // sort all values in-place. It has a worst-case time complexity of 207 // O(n*log(n)) and a worst-case space complexity of O(n). 208 return newSortAllProcessor(ctx, flowCtx, processorID, spec, input, post, output) 209 } 210 // No specified ordering match length but specified limit; we can optimize 211 // our sort procedure by maintaining a max-heap populated with only the 212 // smallest k rows seen. It has a worst-case time complexity of 213 // O(n*log(k)) and a worst-case space complexity of O(k). 214 return newSortTopKProcessor(flowCtx, processorID, spec, input, post, output, count) 215 } 216 // Ordering match length is specified. We will be able to use existing 217 // ordering in order to avoid loading all the rows into memory. If we're 218 // scanning an index with a prefix matching an ordering prefix, we can only 219 // accumulate values for equal fields in this prefix, sort the accumulated 220 // chunk and then output. 221 // TODO(irfansharif): Add optimization for case where both ordering match 222 // length and limit is specified. 223 return newSortChunksProcessor(flowCtx, processorID, spec, input, post, output) 224 } 225 226 // sortAllProcessor reads in all values into the wrapped rows and 227 // uses sort.Sort to sort all values in-place. It has a worst-case time 228 // complexity of O(n*log(n)) and a worst-case space complexity of O(n). 229 // 230 // This processor is intended to be used when all values need to be sorted. 231 type sortAllProcessor struct { 232 sorterBase 233 } 234 235 var _ execinfra.Processor = &sortAllProcessor{} 236 var _ execinfra.RowSource = &sortAllProcessor{} 237 238 const sortAllProcName = "sortAll" 239 240 func newSortAllProcessor( 241 ctx context.Context, 242 flowCtx *execinfra.FlowCtx, 243 processorID int32, 244 spec *execinfrapb.SorterSpec, 245 input execinfra.RowSource, 246 post *execinfrapb.PostProcessSpec, 247 out execinfra.RowReceiver, 248 ) (execinfra.Processor, error) { 249 proc := &sortAllProcessor{} 250 if err := proc.sorterBase.init( 251 proc, flowCtx, processorID, sortAllProcName, input, post, out, 252 execinfrapb.ConvertToColumnOrdering(spec.OutputOrdering), 253 spec.OrderingMatchLen, 254 execinfra.ProcStateOpts{ 255 InputsToDrain: []execinfra.RowSource{input}, 256 TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata { 257 proc.close() 258 return nil 259 }, 260 }, 261 ); err != nil { 262 return nil, err 263 } 264 return proc, nil 265 } 266 267 // Start is part of the RowSource interface. 268 func (s *sortAllProcessor) Start(ctx context.Context) context.Context { 269 s.input.Start(ctx) 270 ctx = s.StartInternal(ctx, sortAllProcName) 271 272 valid, err := s.fill() 273 if !valid || err != nil { 274 s.MoveToDraining(err) 275 } 276 return ctx 277 } 278 279 // fill fills s.rows with the input's rows. 280 // 281 // Metadata is buffered in s.trailingMeta. 282 // 283 // The ok retval is false if an error occurred or if the input returned an error 284 // metadata record. The caller is expected to inspect the error (if any) and 285 // drain if it's not recoverable. It is possible for ok to be false even if no 286 // error is returned - in case an error metadata was received. 287 func (s *sortAllProcessor) fill() (ok bool, _ error) { 288 ctx := s.EvalCtx.Ctx() 289 290 for { 291 row, meta := s.input.Next() 292 if meta != nil { 293 s.AppendTrailingMeta(*meta) 294 if meta.Err != nil { 295 return false, nil //nolint:returnerrcheck 296 } 297 continue 298 } 299 if row == nil { 300 break 301 } 302 303 if err := s.rows.AddRow(ctx, row); err != nil { 304 return false, err 305 } 306 } 307 s.rows.Sort(ctx) 308 309 s.i = s.rows.NewFinalIterator(ctx) 310 s.i.Rewind() 311 return true, nil 312 } 313 314 // ConsumerDone is part of the RowSource interface. 315 func (s *sortAllProcessor) ConsumerDone() { 316 s.input.ConsumerDone() 317 } 318 319 // ConsumerClosed is part of the RowSource interface. 320 func (s *sortAllProcessor) ConsumerClosed() { 321 // The consumer is done, Next() will not be called again. 322 s.close() 323 } 324 325 // sortTopKProcessor creates a max-heap in its wrapped rows and keeps 326 // this heap populated with only the top k values seen. It accomplishes this 327 // by comparing new values (before the deep copy) with the top of the heap. 328 // If the new value is less than the current top, the top will be replaced 329 // and the heap will be fixed. If not, the new value is dropped. When finished, 330 // the max heap is converted to a min-heap effectively sorting the values 331 // correctly in-place. It has a worst-case time complexity of O(n*log(k)) and a 332 // worst-case space complexity of O(k). 333 // 334 // This processor is intended to be used when exactly k values need to be sorted, 335 // where k is known before sorting begins. 336 // 337 // TODO(irfansharif): (taken from TODO found in sql/sort.go) There are better 338 // algorithms that can achieve a sorted top k in a worst-case time complexity 339 // of O(n + k*log(k)) while maintaining a worst-case space complexity of O(k). 340 // For instance, the top k can be found in linear time, and then this can be 341 // sorted in linearithmic time. 342 type sortTopKProcessor struct { 343 sorterBase 344 k uint64 345 } 346 347 var _ execinfra.Processor = &sortTopKProcessor{} 348 var _ execinfra.RowSource = &sortTopKProcessor{} 349 350 const sortTopKProcName = "sortTopK" 351 352 var errSortTopKZeroK = errors.New("invalid value 0 for k") 353 354 func newSortTopKProcessor( 355 flowCtx *execinfra.FlowCtx, 356 processorID int32, 357 spec *execinfrapb.SorterSpec, 358 input execinfra.RowSource, 359 post *execinfrapb.PostProcessSpec, 360 out execinfra.RowReceiver, 361 k uint64, 362 ) (execinfra.Processor, error) { 363 if k == 0 { 364 return nil, errors.NewAssertionErrorWithWrappedErrf(errSortTopKZeroK, 365 "error creating top k sorter") 366 } 367 ordering := execinfrapb.ConvertToColumnOrdering(spec.OutputOrdering) 368 proc := &sortTopKProcessor{k: k} 369 if err := proc.sorterBase.init( 370 proc, flowCtx, processorID, sortTopKProcName, input, post, out, 371 ordering, spec.OrderingMatchLen, 372 execinfra.ProcStateOpts{ 373 InputsToDrain: []execinfra.RowSource{input}, 374 TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata { 375 proc.close() 376 return nil 377 }, 378 }, 379 ); err != nil { 380 return nil, err 381 } 382 return proc, nil 383 } 384 385 // Start is part of the RowSource interface. 386 func (s *sortTopKProcessor) Start(ctx context.Context) context.Context { 387 s.input.Start(ctx) 388 ctx = s.StartInternal(ctx, sortTopKProcName) 389 390 // The execution loop for the SortTopK processor is similar to that of the 391 // SortAll processor; the difference is that we push rows into a max-heap 392 // of size at most K, and only sort those. 393 heapCreated := false 394 for { 395 row, meta := s.input.Next() 396 if meta != nil { 397 s.AppendTrailingMeta(*meta) 398 if meta.Err != nil { 399 s.MoveToDraining(nil /* err */) 400 break 401 } 402 continue 403 } 404 if row == nil { 405 break 406 } 407 408 if uint64(s.rows.Len()) < s.k { 409 // Accumulate up to k values. 410 if err := s.rows.AddRow(ctx, row); err != nil { 411 s.MoveToDraining(err) 412 break 413 } 414 } else { 415 if !heapCreated { 416 // Arrange the k values into a max-heap. 417 s.rows.InitTopK() 418 heapCreated = true 419 } 420 // Replace the max value if the new row is smaller, maintaining the 421 // max-heap. 422 if err := s.rows.MaybeReplaceMax(ctx, row); err != nil { 423 s.MoveToDraining(err) 424 break 425 } 426 } 427 } 428 s.rows.Sort(ctx) 429 s.i = s.rows.NewFinalIterator(ctx) 430 s.i.Rewind() 431 return ctx 432 } 433 434 // ConsumerDone is part of the RowSource interface. 435 func (s *sortTopKProcessor) ConsumerDone() { 436 s.input.ConsumerDone() 437 } 438 439 // ConsumerClosed is part of the RowSource interface. 440 func (s *sortTopKProcessor) ConsumerClosed() { 441 // The consumer is done, Next() will not be called again. 442 s.close() 443 } 444 445 // If we're scanning an index with a prefix matching an ordering prefix, we only accumulate values 446 // for equal fields in this prefix, sort the accumulated chunk and then output. 447 type sortChunksProcessor struct { 448 sorterBase 449 450 alloc sqlbase.DatumAlloc 451 452 // sortChunksProcessor accumulates rows that are equal on a prefix, until it 453 // encounters a row that is greater. It stores that greater row in nextChunkRow 454 nextChunkRow sqlbase.EncDatumRow 455 } 456 457 var _ execinfra.Processor = &sortChunksProcessor{} 458 var _ execinfra.RowSource = &sortChunksProcessor{} 459 460 const sortChunksProcName = "sortChunks" 461 462 func newSortChunksProcessor( 463 flowCtx *execinfra.FlowCtx, 464 processorID int32, 465 spec *execinfrapb.SorterSpec, 466 input execinfra.RowSource, 467 post *execinfrapb.PostProcessSpec, 468 out execinfra.RowReceiver, 469 ) (execinfra.Processor, error) { 470 ordering := execinfrapb.ConvertToColumnOrdering(spec.OutputOrdering) 471 472 proc := &sortChunksProcessor{} 473 if err := proc.sorterBase.init( 474 proc, flowCtx, processorID, sortChunksProcName, input, post, out, ordering, spec.OrderingMatchLen, 475 execinfra.ProcStateOpts{ 476 InputsToDrain: []execinfra.RowSource{input}, 477 TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata { 478 proc.close() 479 return nil 480 }, 481 }, 482 ); err != nil { 483 return nil, err 484 } 485 proc.i = proc.rows.NewFinalIterator(proc.Ctx) 486 return proc, nil 487 } 488 489 // chunkCompleted is a helper function that determines if the given row shares the same 490 // values for the first matchLen ordering columns with the given prefix. 491 func (s *sortChunksProcessor) chunkCompleted( 492 nextChunkRow, prefix sqlbase.EncDatumRow, 493 ) (bool, error) { 494 types := s.input.OutputTypes() 495 for _, ord := range s.ordering[:s.matchLen] { 496 col := ord.ColIdx 497 cmp, err := nextChunkRow[col].Compare(types[col], &s.alloc, s.EvalCtx, &prefix[col]) 498 if cmp != 0 || err != nil { 499 return true, err 500 } 501 } 502 return false, nil 503 } 504 505 // fill one chunk of rows from the input and sort them. 506 // 507 // Metadata is buffered in s.trailingMeta. Returns true if a valid chunk of rows 508 // has been read and sorted, false otherwise (if the input had no more rows or 509 // if a metadata record was encountered). The caller is expected to drain when 510 // this returns false. 511 func (s *sortChunksProcessor) fill() (bool, error) { 512 ctx := s.Ctx 513 514 var meta *execinfrapb.ProducerMetadata 515 516 nextChunkRow := s.nextChunkRow 517 s.nextChunkRow = nil 518 for nextChunkRow == nil { 519 nextChunkRow, meta = s.input.Next() 520 if meta != nil { 521 s.AppendTrailingMeta(*meta) 522 if meta.Err != nil { 523 return false, nil //nolint:returnerrcheck 524 } 525 continue 526 } else if nextChunkRow == nil { 527 return false, nil 528 } 529 break 530 } 531 prefix := nextChunkRow 532 533 // Add the chunk 534 if err := s.rows.AddRow(ctx, nextChunkRow); err != nil { 535 return false, err 536 } 537 538 // We will accumulate rows to form a chunk such that they all share the same values 539 // as prefix for the first s.matchLen ordering columns. 540 for { 541 nextChunkRow, meta = s.input.Next() 542 543 if meta != nil { 544 s.AppendTrailingMeta(*meta) 545 if meta.Err != nil { 546 return false, nil //nolint:returnerrcheck 547 } 548 continue 549 } 550 if nextChunkRow == nil { 551 break 552 } 553 554 chunkCompleted, err := s.chunkCompleted(nextChunkRow, prefix) 555 556 if err != nil { 557 return false, err 558 } 559 if chunkCompleted { 560 s.nextChunkRow = nextChunkRow 561 break 562 } 563 564 if err := s.rows.AddRow(ctx, nextChunkRow); err != nil { 565 return false, err 566 } 567 } 568 569 s.rows.Sort(ctx) 570 571 return true, nil 572 } 573 574 // Start is part of the RowSource interface. 575 func (s *sortChunksProcessor) Start(ctx context.Context) context.Context { 576 s.input.Start(ctx) 577 return s.StartInternal(ctx, sortChunksProcName) 578 } 579 580 // Next is part of the RowSource interface. 581 func (s *sortChunksProcessor) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 582 ctx := s.Ctx 583 for s.State == execinfra.StateRunning { 584 ok, err := s.i.Valid() 585 if err != nil { 586 s.MoveToDraining(err) 587 break 588 } 589 // If we don't have an active chunk, clear and refill it. 590 if !ok { 591 if err := s.rows.UnsafeReset(ctx); err != nil { 592 s.MoveToDraining(err) 593 break 594 } 595 valid, err := s.fill() 596 if !valid || err != nil { 597 s.MoveToDraining(err) 598 break 599 } 600 s.i.Close() 601 s.i = s.rows.NewFinalIterator(ctx) 602 s.i.Rewind() 603 if ok, err := s.i.Valid(); err != nil || !ok { 604 s.MoveToDraining(err) 605 break 606 } 607 } 608 609 // If we have an active chunk, get a row from it. 610 row, err := s.i.Row() 611 if err != nil { 612 s.MoveToDraining(err) 613 break 614 } 615 s.i.Next() 616 617 if outRow := s.ProcessRowHelper(row); outRow != nil { 618 return outRow, nil 619 } 620 } 621 return nil, s.DrainHelper() 622 } 623 624 // ConsumerClosed is part of the RowSource interface. 625 func (s *sortChunksProcessor) ConsumerClosed() { 626 // The consumer is done, Next() will not be called again. 627 s.close() 628 }