github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/joinreader_strategies.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/roachpb" 17 "github.com/cockroachdb/cockroach/pkg/sql/rowcontainer" 18 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 19 "github.com/cockroachdb/cockroach/pkg/sql/span" 20 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 21 "github.com/cockroachdb/cockroach/pkg/util/log" 22 "github.com/cockroachdb/errors" 23 ) 24 25 type defaultSpanGenerator struct { 26 spanBuilder *span.Builder 27 numKeyCols int 28 lookupCols []uint32 29 30 indexKeyRow sqlbase.EncDatumRow 31 keyToInputRowIndices map[string][]int 32 33 scratchSpans roachpb.Spans 34 } 35 36 // Generate spans for a given row. 37 // If lookup columns are specified will use those to collect the relevant 38 // columns. Otherwise the first rows are assumed to correspond with the index. 39 // It additionally returns whether the row contains null, which is needed to 40 // decide whether or not to split the generated span into separate family 41 // specific spans. 42 func (g *defaultSpanGenerator) generateSpan( 43 row sqlbase.EncDatumRow, 44 ) (_ roachpb.Span, containsNull bool, _ error) { 45 numLookupCols := len(g.lookupCols) 46 if numLookupCols > g.numKeyCols { 47 return roachpb.Span{}, false, errors.Errorf( 48 "%d lookup columns specified, expecting at most %d", numLookupCols, g.numKeyCols) 49 } 50 51 g.indexKeyRow = g.indexKeyRow[:0] 52 for _, id := range g.lookupCols { 53 g.indexKeyRow = append(g.indexKeyRow, row[id]) 54 } 55 return g.spanBuilder.SpanFromEncDatums(g.indexKeyRow, numLookupCols) 56 } 57 58 func (g *defaultSpanGenerator) hasNullLookupColumn(row sqlbase.EncDatumRow) bool { 59 for _, colIdx := range g.lookupCols { 60 if row[colIdx].IsNull() { 61 return true 62 } 63 } 64 return false 65 } 66 67 func (g *defaultSpanGenerator) generateSpans(rows []sqlbase.EncDatumRow) (roachpb.Spans, error) { 68 // This loop gets optimized to a runtime.mapclear call. 69 for k := range g.keyToInputRowIndices { 70 delete(g.keyToInputRowIndices, k) 71 } 72 // We maintain a map from index key to the corresponding input rows so we can 73 // join the index results to the inputs. 74 g.scratchSpans = g.scratchSpans[:0] 75 for i, inputRow := range rows { 76 if g.hasNullLookupColumn(inputRow) { 77 continue 78 } 79 generatedSpan, containsNull, err := g.generateSpan(inputRow) 80 if err != nil { 81 return nil, err 82 } 83 inputRowIndices := g.keyToInputRowIndices[string(generatedSpan.Key)] 84 if inputRowIndices == nil { 85 g.scratchSpans = g.spanBuilder.MaybeSplitSpanIntoSeparateFamilies( 86 g.scratchSpans, generatedSpan, len(g.lookupCols), containsNull) 87 } 88 g.keyToInputRowIndices[string(generatedSpan.Key)] = append(inputRowIndices, i) 89 } 90 return g.scratchSpans, nil 91 } 92 93 type joinReaderStrategy interface { 94 // getLookupRowsBatchSizeHint returns the size in bytes of the batch of lookup 95 // rows. 96 getLookupRowsBatchSizeHint() int64 97 // processLookupRows consumes the rows the joinReader has buffered and should 98 // return the lookup spans. 99 processLookupRows(rows []sqlbase.EncDatumRow) (roachpb.Spans, error) 100 // processLookedUpRow processes a looked up row. A joinReaderState is returned 101 // to indicate the next state to transition to. If this next state is 102 // jrPerformingLookup, processLookedUpRow will be called again if the looked 103 // up rows have not been exhausted. A transition to jrStateUnknown is 104 // unsupported, but if an error is returned, the joinReader will transition 105 // to draining. 106 processLookedUpRow(ctx context.Context, row sqlbase.EncDatumRow, key roachpb.Key) (joinReaderState, error) 107 // prepareToEmit informs the strategy implementation that all looked up rows 108 // have been read, and that it should prepare for calls to nextRowToEmit. 109 prepareToEmit(ctx context.Context) 110 // nextRowToEmit gets the next row to emit from the strategy. An accompanying 111 // joinReaderState is also returned, indicating a state to transition to after 112 // emitting this row. A transition to jrStateUnknown is unsupported, but if an 113 // error is returned, the joinReader will transition to draining. 114 nextRowToEmit(ctx context.Context) (sqlbase.EncDatumRow, joinReaderState, error) 115 // spilled returns whether the strategy spilled to disk. 116 spilled() bool 117 // close releases any resources associated with the joinReaderStrategy. 118 close(ctx context.Context) 119 } 120 121 // joinReaderNoOrderingStrategy is a joinReaderStrategy that doesn't maintain 122 // the input ordering. This is more performant than joinReaderOrderingStrategy. 123 type joinReaderNoOrderingStrategy struct { 124 *joinerBase 125 defaultSpanGenerator 126 isPartialJoin bool 127 inputRows []sqlbase.EncDatumRow 128 // matched[i] specifies whether inputRows[i] had a match. 129 matched []bool 130 131 scratchMatchingInputRowIndices []int 132 133 emitState struct { 134 // processingLookupRow is an explicit boolean that specifies whether the 135 // strategy is currently processing a match. This is set to true in 136 // processLookedUpRow and causes nextRowToEmit to process the data in 137 // emitState. If set to false, the strategy determines in nextRowToEmit 138 // that no more looked up rows need processing, so unmatched input rows need 139 // to be emitted. 140 processingLookupRow bool 141 unmatchedInputRowIndicesCursor int 142 // unmatchedInputRowIndices is used only when emitting unmatched rows after 143 // processing lookup results. It is populated once when first emitting 144 // unmatched rows. 145 unmatchedInputRowIndices []int 146 matchingInputRowIndicesCursor int 147 matchingInputRowIndices []int 148 lookedUpRow sqlbase.EncDatumRow 149 } 150 } 151 152 // getLookupRowsBatchSizeHint returns the batch size for the join reader no 153 // ordering strategy. This number was chosen by running TPCH queries 7, 9, 10, 154 // and 11 with varying batch sizes and choosing the smallest batch size that 155 // offered a significant performance improvement. Larger batch sizes offered 156 // small to no marginal improvements. 157 func (s *joinReaderNoOrderingStrategy) getLookupRowsBatchSizeHint() int64 { 158 return 2 << 20 /* 2 MiB */ 159 } 160 161 func (s *joinReaderNoOrderingStrategy) processLookupRows( 162 rows []sqlbase.EncDatumRow, 163 ) (roachpb.Spans, error) { 164 s.inputRows = rows 165 if cap(s.matched) < len(s.inputRows) { 166 s.matched = make([]bool, len(s.inputRows)) 167 } else { 168 s.matched = s.matched[:len(s.inputRows)] 169 for i := range s.matched { 170 s.matched[i] = false 171 } 172 } 173 return s.generateSpans(s.inputRows) 174 } 175 176 func (s *joinReaderNoOrderingStrategy) processLookedUpRow( 177 _ context.Context, row sqlbase.EncDatumRow, key roachpb.Key, 178 ) (joinReaderState, error) { 179 matchingInputRowIndices := s.keyToInputRowIndices[string(key)] 180 if s.isPartialJoin { 181 // In the case of partial joins, only process input rows that have not been 182 // matched yet. Make a copy of the matching input row indices to avoid 183 // overwriting the caller's slice. 184 s.scratchMatchingInputRowIndices = s.scratchMatchingInputRowIndices[:0] 185 for _, inputRowIdx := range matchingInputRowIndices { 186 if !s.matched[inputRowIdx] { 187 s.scratchMatchingInputRowIndices = append(s.scratchMatchingInputRowIndices, inputRowIdx) 188 } 189 } 190 matchingInputRowIndices = s.scratchMatchingInputRowIndices 191 } 192 s.emitState.processingLookupRow = true 193 s.emitState.lookedUpRow = row 194 s.emitState.matchingInputRowIndices = matchingInputRowIndices 195 s.emitState.matchingInputRowIndicesCursor = 0 196 return jrEmittingRows, nil 197 } 198 199 func (s *joinReaderNoOrderingStrategy) prepareToEmit(ctx context.Context) {} 200 201 func (s *joinReaderNoOrderingStrategy) nextRowToEmit( 202 _ context.Context, 203 ) (sqlbase.EncDatumRow, joinReaderState, error) { 204 if !s.emitState.processingLookupRow { 205 // processLookedUpRow was not called before nextRowToEmit, which means that 206 // the next unmatched row needs to be processed. 207 if !shouldEmitUnmatchedRow(leftSide, s.joinType) { 208 // The joinType does not require the joiner to emit unmatched rows. Move 209 // on to the next batch of lookup rows. 210 return nil, jrReadingInput, nil 211 } 212 213 if len(s.matched) != 0 { 214 s.emitState.unmatchedInputRowIndices = s.emitState.unmatchedInputRowIndices[:0] 215 for inputRowIdx, m := range s.matched { 216 if !m { 217 s.emitState.unmatchedInputRowIndices = append(s.emitState.unmatchedInputRowIndices, inputRowIdx) 218 } 219 } 220 s.matched = s.matched[:0] 221 s.emitState.unmatchedInputRowIndicesCursor = 0 222 } 223 224 if s.emitState.unmatchedInputRowIndicesCursor >= len(s.emitState.unmatchedInputRowIndices) { 225 // All unmatched rows have been emitted. 226 return nil, jrReadingInput, nil 227 } 228 inputRow := s.inputRows[s.emitState.unmatchedInputRowIndices[s.emitState.unmatchedInputRowIndicesCursor]] 229 s.emitState.unmatchedInputRowIndicesCursor++ 230 if !s.joinType.ShouldIncludeRightColsInOutput() { 231 return inputRow, jrEmittingRows, nil 232 } 233 return s.renderUnmatchedRow(inputRow, leftSide), jrEmittingRows, nil 234 } 235 236 for s.emitState.matchingInputRowIndicesCursor < len(s.emitState.matchingInputRowIndices) { 237 inputRowIdx := s.emitState.matchingInputRowIndices[s.emitState.matchingInputRowIndicesCursor] 238 s.emitState.matchingInputRowIndicesCursor++ 239 inputRow := s.inputRows[inputRowIdx] 240 241 // Render the output row, this also evaluates the ON condition. 242 outputRow, err := s.render(inputRow, s.emitState.lookedUpRow) 243 if err != nil { 244 return nil, jrStateUnknown, err 245 } 246 if outputRow == nil { 247 // This row failed the ON condition, so it remains unmatched. 248 continue 249 } 250 251 s.matched[inputRowIdx] = true 252 if !s.joinType.ShouldIncludeRightColsInOutput() { 253 if s.joinType == sqlbase.LeftAntiJoin { 254 // Skip emitting row. 255 continue 256 } 257 return inputRow, jrEmittingRows, nil 258 } 259 return outputRow, jrEmittingRows, nil 260 } 261 262 // Processed all matches for a given lookup row, move to the next lookup row. 263 // Set processingLookupRow to false explicitly so if the joinReader re-enters 264 // nextRowToEmit, the strategy knows that no more lookup rows were processed 265 // and should proceed to emit unmatched rows. 266 s.emitState.processingLookupRow = false 267 return nil, jrPerformingLookup, nil 268 } 269 270 func (s *joinReaderNoOrderingStrategy) spilled() bool { return false } 271 272 func (s *joinReaderNoOrderingStrategy) close(_ context.Context) {} 273 274 // partialJoinSentinel is used as the inputRowIdxToLookedUpRowIndices value for 275 // semi- and anti-joins, where we only need to know about the existence of a 276 // match. 277 var partialJoinSentinel = []int{-1} 278 279 // joinReaderOrderingStrategy is a joinReaderStrategy that maintains the input 280 // ordering. This is more expensive than joinReaderNoOrderingStrategy. 281 type joinReaderOrderingStrategy struct { 282 *joinerBase 283 defaultSpanGenerator 284 isPartialJoin bool 285 286 inputRows []sqlbase.EncDatumRow 287 288 // inputRowIdxToLookedUpRowIndices is a multimap from input row indices to 289 // corresponding looked up row indices. It's populated in the 290 // jrPerformingLookup state. For non partial joins (everything but semi/anti 291 // join), the looked up rows are the rows that came back from the lookup 292 // span for each input row, without checking for matches with respect to the 293 // on-condition. For semi/anti join, we store at most one sentinel value, 294 // indicating a matching lookup if it's present, since the right side of a 295 // semi/anti join is not used. 296 inputRowIdxToLookedUpRowIndices [][]int 297 298 lookedUpRowIdx int 299 lookedUpRows *rowcontainer.DiskBackedNumberedRowContainer 300 301 // emitCursor contains information about where the next row to emit is within 302 // inputRowIdxToLookedUpRowIndices. 303 emitCursor struct { 304 // inputRowIdx contains the index into inputRowIdxToLookedUpRowIndices that 305 // we're about to emit. 306 inputRowIdx int 307 // outputRowIdx contains the index into the inputRowIdx'th row of 308 // inputRowIdxToLookedUpRowIndices that we're about to emit. 309 outputRowIdx int 310 // seenMatch is true if there was a match at the current inputRowIdx. A 311 // match means that there's no need to output an outer or anti join row. 312 seenMatch bool 313 } 314 } 315 316 func (s *joinReaderOrderingStrategy) getLookupRowsBatchSizeHint() int64 { 317 // TODO(asubiotto): Eventually we might want to adjust this batch size 318 // dynamically based on whether the result row container spilled or not. 319 return 10 << 10 /* 10 KiB */ 320 } 321 322 func (s *joinReaderOrderingStrategy) processLookupRows( 323 rows []sqlbase.EncDatumRow, 324 ) (roachpb.Spans, error) { 325 // Maintain a map from input row index to the corresponding output rows. This 326 // will allow us to preserve the order of the input in the face of multiple 327 // input rows having the same lookup keyspan, or if we're doing an outer join 328 // and we need to emit unmatched rows. 329 if cap(s.inputRowIdxToLookedUpRowIndices) >= len(rows) { 330 s.inputRowIdxToLookedUpRowIndices = s.inputRowIdxToLookedUpRowIndices[:len(rows)] 331 for i := range s.inputRowIdxToLookedUpRowIndices { 332 s.inputRowIdxToLookedUpRowIndices[i] = s.inputRowIdxToLookedUpRowIndices[i][:0] 333 } 334 } else { 335 s.inputRowIdxToLookedUpRowIndices = make([][]int, len(rows)) 336 } 337 338 s.inputRows = rows 339 return s.generateSpans(s.inputRows) 340 } 341 342 func (s *joinReaderOrderingStrategy) processLookedUpRow( 343 ctx context.Context, row sqlbase.EncDatumRow, key roachpb.Key, 344 ) (joinReaderState, error) { 345 matchingInputRowIndices := s.keyToInputRowIndices[string(key)] 346 if !s.isPartialJoin { 347 // Replace missing values with nulls to appease the row container. 348 for i := range row { 349 if row[i].IsUnset() { 350 row[i].Datum = tree.DNull 351 } 352 } 353 if _, err := s.lookedUpRows.AddRow(ctx, row); err != nil { 354 return jrStateUnknown, err 355 } 356 } 357 358 // Update our map from input rows to looked up rows. 359 for _, inputRowIdx := range matchingInputRowIndices { 360 if !s.isPartialJoin { 361 s.inputRowIdxToLookedUpRowIndices[inputRowIdx] = append( 362 s.inputRowIdxToLookedUpRowIndices[inputRowIdx], s.lookedUpRowIdx) 363 continue 364 } 365 366 // During a SemiJoin or AntiJoin, we only output if we've seen no match 367 // for this input row yet. Additionally, since we don't have to render 368 // anything to output a Semi or Anti join match, we can evaluate our 369 // on condition now and only buffer if we pass it. 370 if len(s.inputRowIdxToLookedUpRowIndices[inputRowIdx]) == 0 { 371 renderedRow, err := s.render(s.inputRows[inputRowIdx], row) 372 if err != nil { 373 return jrStateUnknown, err 374 } 375 if renderedRow == nil { 376 // We failed our on-condition - don't buffer anything. 377 continue 378 } 379 s.inputRowIdxToLookedUpRowIndices[inputRowIdx] = partialJoinSentinel 380 } 381 } 382 s.lookedUpRowIdx++ 383 384 return jrPerformingLookup, nil 385 } 386 387 func (s *joinReaderOrderingStrategy) prepareToEmit(ctx context.Context) { 388 if !s.isPartialJoin { 389 s.lookedUpRows.SetupForRead(ctx, s.inputRowIdxToLookedUpRowIndices) 390 } 391 } 392 393 func (s *joinReaderOrderingStrategy) nextRowToEmit( 394 ctx context.Context, 395 ) (sqlbase.EncDatumRow, joinReaderState, error) { 396 if s.emitCursor.inputRowIdx >= len(s.inputRowIdxToLookedUpRowIndices) { 397 log.VEventf(ctx, 1, "done emitting rows") 398 // Ready for another input batch. Reset state. 399 s.emitCursor.outputRowIdx = 0 400 s.emitCursor.inputRowIdx = 0 401 s.emitCursor.seenMatch = false 402 if err := s.lookedUpRows.UnsafeReset(ctx); err != nil { 403 return nil, jrStateUnknown, err 404 } 405 s.lookedUpRowIdx = 0 406 return nil, jrReadingInput, nil 407 } 408 409 inputRow := s.inputRows[s.emitCursor.inputRowIdx] 410 lookedUpRows := s.inputRowIdxToLookedUpRowIndices[s.emitCursor.inputRowIdx] 411 if s.emitCursor.outputRowIdx >= len(lookedUpRows) { 412 // We have no more rows for the current input row. Emit an outer or anti 413 // row if we didn't see a match, and bump to the next input row. 414 s.emitCursor.inputRowIdx++ 415 s.emitCursor.outputRowIdx = 0 416 seenMatch := s.emitCursor.seenMatch 417 s.emitCursor.seenMatch = false 418 if !seenMatch { 419 switch s.joinType { 420 case sqlbase.LeftOuterJoin: 421 // An outer-join non-match means we emit the input row with NULLs for 422 // the right side (if it passes the ON-condition). 423 if renderedRow := s.renderUnmatchedRow(inputRow, leftSide); renderedRow != nil { 424 return renderedRow, jrEmittingRows, nil 425 } 426 case sqlbase.LeftAntiJoin: 427 // An anti-join non-match means we emit the input row. 428 return inputRow, jrEmittingRows, nil 429 } 430 } 431 return nil, jrEmittingRows, nil 432 } 433 434 lookedUpRowIdx := lookedUpRows[s.emitCursor.outputRowIdx] 435 s.emitCursor.outputRowIdx++ 436 switch s.joinType { 437 case sqlbase.LeftSemiJoin: 438 // A semi-join match means we emit our input row. 439 s.emitCursor.seenMatch = true 440 return inputRow, jrEmittingRows, nil 441 case sqlbase.LeftAntiJoin: 442 // An anti-join match means we emit nothing. 443 s.emitCursor.seenMatch = true 444 return nil, jrEmittingRows, nil 445 } 446 447 lookedUpRow, err := s.lookedUpRows.GetRow(s.Ctx, lookedUpRowIdx, false /* skip */) 448 if err != nil { 449 return nil, jrStateUnknown, err 450 } 451 outputRow, err := s.render(inputRow, lookedUpRow) 452 if err != nil { 453 return nil, jrStateUnknown, err 454 } 455 if outputRow != nil { 456 s.emitCursor.seenMatch = true 457 } 458 return outputRow, jrEmittingRows, nil 459 } 460 461 func (s *joinReaderOrderingStrategy) spilled() bool { 462 return s.lookedUpRows.Spilled() 463 } 464 465 func (s *joinReaderOrderingStrategy) close(ctx context.Context) { 466 if s.lookedUpRows != nil { 467 s.lookedUpRows.Close(ctx) 468 } 469 }