github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/zigzagjoiner.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "fmt" 16 17 "github.com/cockroachdb/cockroach/pkg/kv" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 21 "github.com/cockroachdb/cockroach/pkg/sql/row" 22 "github.com/cockroachdb/cockroach/pkg/sql/scrub" 23 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 24 "github.com/cockroachdb/cockroach/pkg/sql/span" 25 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 26 "github.com/cockroachdb/cockroach/pkg/sql/types" 27 "github.com/cockroachdb/cockroach/pkg/util" 28 "github.com/cockroachdb/cockroach/pkg/util/encoding" 29 "github.com/cockroachdb/cockroach/pkg/util/log" 30 "github.com/cockroachdb/errors" 31 ) 32 33 // Consider the schema: 34 // 35 // CREATE TABLE abcd (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b), 36 // INDEX c_idx (c), INDEX d_idx (d)); 37 // 38 // and the query: 39 // 40 // SELECT * FROM abcd@c_idx WHERE c = 2 AND d = 3; 41 // 42 // 43 // Without a zigzag joiner, this query would previously execute: index scan on 44 // `c_idx`, followed by an index join on the primary index, then filter out rows 45 // where `d ≠ 3`. 46 // This plan scans through all values in `c_idx` where `c = 2`, however if among 47 // these rows there are not many where `d = 3` a lot of rows are unnecessarily 48 // scanned. A zigzag join allows us to skip many of these rows and many times 49 // will also render the index join unnecessary, by making use of `d_idx`. 50 // 51 // To see how this query would be executed, consider the equivalent query: 52 // 53 // SELECT t1.* FROM abcd@c_idx AS t1 JOIN abcd@d_idx ON t1.a = t2.a AND 54 // t1.b = t2.b WHERE t1.c = 2 AND t2.d = 3; 55 // 56 // A zigzag joiner takes 2 sides as input. In the example above, the join would 57 // be between `c_idx` and `d_idx`. Both sides will have the same equality 58 // columns: (a, b) since that is the primary key of the table. The `c_idx` side 59 // fixes a prefix (c) to a specific value (2), as does the `d_idx` side (d = 3). 60 // This can be summarized as: 61 // Side 1: 62 // 63 // - Index: `abcd@c_idx`, with columns (c | a, b) 64 // - Equality columns: (a, b) 65 // - Fixed columns: (c) 66 // - Fixed values: (2) 67 // 68 // Side 2: 69 // 70 // - Index: `abcd@d_idx`, with columns (d | a, b) 71 // - Equality columns: (a, b) 72 // - Fixed columns: (d) 73 // - Fixed values: (3) 74 // 75 // The actual execution can be visualized below : 76 // 77 // c_idx d_idx 78 // c | a, b d | a, b 79 // ============= ============ 80 // --> 2 1 1 ----> 3 1 1 ---+ X 81 // | 82 // +----------------- 3 4 2 <--+ 83 // | 3 4 3 84 // | 3 5 6 85 // | 3 7 2 86 // +--> 2 8 2 -------------------+ 87 // | 88 // +----------------- 3 8 3 ----+ 89 // | 90 // +-> 2 9 3 -----> 3 9 3 --+ X 91 // | 92 // nil (Done) <--+ 93 // 94 // 95 // - The execution starts by fetching the (2, 1, 1) row from c_idx. This is the 96 // first row fetched when an index lookup in `c_idx` where `c = 2`. Let this be 97 // the `baseRow`. This is the current contender for other rows to match. 98 // - An index lookup is performed on `d_idx` for the first row where `d = 3` 99 // that has equality column (a, b) values greater than or equal to (1, 1), which 100 // are the values of the equality columns of the `baseRow`. 101 // - The index lookup on `d_idx` retrieves the row (3, 1, 1) 102 // - The equality columns of the (3, 1, 1) row are compared against the equality 103 // columns of the base row (2, 1, 1). They are found to match. 104 // - Since both indexes are sorted, once a match is found it is guaranteed that 105 // all rows which match the `baseRow` will follow the two that were matched. All 106 // of the possible matches are found and put into a container that is maintained 107 // by each side. Since the equality columns is the primary key, only one match 108 // can be produced in this example. The left container now contains (2, 1, 1) 109 // and the right container now contains (3, 1, 1). 110 // - The cross-product of the containers is emitted. In this case, just the row 111 // (1, 1, 2, 3). 112 // - The side with the latest match, (3, 1, 1) in this case, will fetch the next 113 // row in the index (3, 4, 2). This becomes the new `baseRow`. 114 // - As before, an index lookup is performed on the other side `c_idx` for the 115 // first row where `c = 2` that has the equality column (a, b) values greater 116 // than or equal to (4, 2), which are the values of the equality columns of the 117 // `baseRow`. In this example, the processor can skip a group of rows that are 118 // guaranteed to not be in the output of the join. 119 // - The first row found is (2, 8, 2). Since the equality columns do not match 120 // to the base row ((8, 2) ≠ (4, 2)), this row becomes the new base row and the 121 // process is repeated. 122 // - We are done when the index lookup returns `nil`. There were no more rows in 123 // this index that could satisfy the join. 124 // 125 // 126 // When Can a Zigzag Join Be Planned: 127 // 128 // Every side of a zigzag join has fixed columns, equality columns, and index 129 // columns. 130 // 131 // A zigzag join can be used when for each side, there exists an index with the 132 // prefix (fixed columns + equality columns). This guarantees that the rows on 133 // both sides of the join, when iterating through the index, will have both 134 // sides of the join sorted by its equality columns. 135 // 136 // When Should a Zigzag Join Be Planned: 137 // 138 // The intuition behind when a zigzag join should be used is when the carnality 139 // of the output is much smaller than the size of either side of the join. If 140 // this is not the case, it may end up being slower than other joins because it 141 // is constantly alternating between sides of the join. Alternatively, the 142 // zigzag join should be used in cases where an index scan would be used with a 143 // filter on the results. Examples would be inverted index JSON queries and 144 // queries such as the `SELECT * FROM abcd@c_idx WHERE c = 2 AND d = 3;` example 145 // above. 146 // 147 // For a description of index columns, refer to Appendix A. 148 // 149 // Additional Cases 150 // 151 // Normal Joins 152 // This algorithm can also be applied to normal joins such as: 153 // 154 // SELECT t1.a, t1.b FROM abcd t1 JOIN abcd t2 ON t1.b = t2.a WHERE t1.a = 3; 155 // 156 // (Using the same schema as above). 157 // 158 // The sides of this zigzag join would be: 159 // Side 1: 160 // 161 // - Index: `abcd@primary` 162 // - Equality columns: (b) 163 // - Fixed columns: (a) 164 // - Fixed values: (3) 165 // 166 // Side 2: 167 // 168 // - Index: `abcd@primary` 169 // - Equality columns: (a) 170 // - Fixed columns: None 171 //- Fixed values: None 172 // 173 // Note: If the query were to `SELECT *` instead of `SELECT a, b` a further 174 // index join would be needed, but this index join would only be applied on the 175 // necessary rows. 176 // 177 // No Fixed Columns 178 // As shown above, a side can have no fixed columns. This means that the 179 // equality columns will be a prefix of the index. Specifically this means that 180 // all rows in the index will be considered rather than doing a lookup on a 181 // specific prefix. 182 // 183 // Multi-Way Join [not implemented]: 184 // Also note that this algorithm can be extended to support a multi-way join by 185 // performing index lookups in a round-robin fashion iterating through all of 186 // the sides until a match is found on all sides of the join. It is expected 187 // that a zigzag join’s utility will increase as the number of sides increases 188 // because more rows will be able to be skipped. 189 // 190 // 191 // Appendix A: Indexes 192 // 193 // The zigzag joins makes use of multiple indexes. Each index is composed of a 194 // set of explicit columns, and a set of implicit columns. The union of these 195 // sets will be referred to as index columns. 196 // 197 // The purpose of implicit columns in indexes is to provide unique keys for 198 // RocksDB as well as to be able to relate the specified row back to the primary 199 // index where the full row is stored. 200 // 201 // Consider the schema: 202 // 203 // CREATE TABLE abcd (a INT, b INT, c INT, d INT, (a, b) PRIMARY KEY, 204 // INDEX c_idx (c), INDEX da_idx (d, a), INDEX db_idx (d, b)); 205 // 206 // The following three indexes are created: 207 // 208 // - Primary Index: (Key format: `/Table/abcd/primary/<a_val>/<b_val>/`) 209 // - Explicit columns: (a, b) 210 // - Implicit columns: None 211 // - Index columns: (a, b) 212 // - c_idx: (Key format: `/Table/abcd/c_idx/<c_val>/<a_val>/<b_val>/`) 213 // - Explicit columns: (c) 214 // - Implicit columns: (a, b) 215 // - Index columns (c, a, b) 216 // - da_idx: (Key format: `/Table/abcd/d_idx/<d_val>/<a_val>/<b_val>/`) 217 // - Explicit columns: (d, a) 218 // - Implicit columns (b) 219 // - Index columns: (d, a, b) 220 // - db_idx: (Key format: `/Table/abcd/d_idx/<d_val>/<b_val>/<a_val>/`) 221 // - Explicit columns: (d, b) 222 // - Implicit columns (a) 223 // - Index columns: (d, b, a) 224 type zigzagJoiner struct { 225 joinerBase 226 227 evalCtx *tree.EvalContext 228 cancelChecker *sqlbase.CancelChecker 229 230 // numTables stored the number of tables involved in the join. 231 numTables int 232 // side keeps track of which side is being processed. 233 side int 234 235 // Stores relevant information for each side of the join including table 236 // descriptors, index IDs, rowFetchers, and more. See zigzagJoinInfo for 237 // more information. 238 infos []*zigzagJoinerInfo 239 240 // Base row stores the that the algorithm is compared against and is updated 241 // with every change of side. 242 baseRow sqlbase.EncDatumRow 243 244 rowAlloc sqlbase.EncDatumRowAlloc 245 246 // TODO(andrei): get rid of this field and move the actions it gates into the 247 // Start() method. 248 started bool 249 250 // returnedMeta contains all the metadata that zigzag joiner has emitted. 251 returnedMeta []execinfrapb.ProducerMetadata 252 } 253 254 // Batch size is a parameter which determines how many rows should be fetched 255 // at a time. Increasing this will improve performance for when matched rows 256 // are grouped together, but increasing this too much will result in fetching 257 // too many rows and therefore skipping less rows. 258 const zigzagJoinerBatchSize = 5 259 260 var _ execinfra.Processor = &zigzagJoiner{} 261 var _ execinfra.RowSource = &zigzagJoiner{} 262 var _ execinfrapb.MetadataSource = &zigzagJoiner{} 263 var _ execinfra.OpNode = &zigzagJoiner{} 264 265 const zigzagJoinerProcName = "zigzagJoiner" 266 267 // newZigzagJoiner creates a new zigzag joiner given a spec and an EncDatumRow 268 // holding the values of the prefix columns of the index specified in the spec. 269 func newZigzagJoiner( 270 flowCtx *execinfra.FlowCtx, 271 processorID int32, 272 spec *execinfrapb.ZigzagJoinerSpec, 273 fixedValues []sqlbase.EncDatumRow, 274 post *execinfrapb.PostProcessSpec, 275 output execinfra.RowReceiver, 276 ) (*zigzagJoiner, error) { 277 z := &zigzagJoiner{} 278 279 leftColumnTypes := spec.Tables[0].ColumnTypes() 280 rightColumnTypes := spec.Tables[1].ColumnTypes() 281 leftEqCols := make([]uint32, 0, len(spec.EqColumns[0].Columns)) 282 rightEqCols := make([]uint32, 0, len(spec.EqColumns[1].Columns)) 283 err := z.joinerBase.init( 284 z, /* self */ 285 flowCtx, 286 processorID, 287 leftColumnTypes, 288 rightColumnTypes, 289 spec.Type, 290 spec.OnExpr, 291 leftEqCols, 292 rightEqCols, 293 0, /* numMerged */ 294 post, 295 output, 296 execinfra.ProcStateOpts{}, // zigzagJoiner doesn't have any inputs to drain. 297 ) 298 if err != nil { 299 return nil, err 300 } 301 302 z.numTables = len(spec.Tables) 303 z.infos = make([]*zigzagJoinerInfo, z.numTables) 304 z.returnedMeta = make([]execinfrapb.ProducerMetadata, 0, 1) 305 306 for i := range z.infos { 307 z.infos[i] = &zigzagJoinerInfo{} 308 } 309 310 colOffset := 0 311 for i := 0; i < z.numTables; i++ { 312 if fixedValues != nil && i < len(fixedValues) { 313 // Useful for testing. In cases where we plan a zigzagJoin in 314 // the planner, we specify fixed values as ValuesCoreSpecs in 315 // the spec itself. 316 z.infos[i].fixedValues = fixedValues[i] 317 } else if i < len(spec.FixedValues) { 318 z.infos[i].fixedValues, err = valuesSpecToEncDatum(spec.FixedValues[i]) 319 if err != nil { 320 return nil, err 321 } 322 } 323 if err := z.setupInfo(flowCtx, spec, i, colOffset); err != nil { 324 return nil, err 325 } 326 colOffset += len(z.infos[i].table.Columns) 327 } 328 z.side = 0 329 return z, nil 330 } 331 332 // Helper function to convert a values spec containing one tuple into EncDatums for 333 // each cell. Note that this function assumes that there is only one tuple in the 334 // ValuesSpec (i.e. the way fixed values are encoded in the ZigzagJoinSpec). 335 func valuesSpecToEncDatum( 336 valuesSpec *execinfrapb.ValuesCoreSpec, 337 ) (res []sqlbase.EncDatum, err error) { 338 res = make([]sqlbase.EncDatum, len(valuesSpec.Columns)) 339 rem := valuesSpec.RawBytes[0] 340 for i, colInfo := range valuesSpec.Columns { 341 res[i], rem, err = sqlbase.EncDatumFromBuffer(colInfo.Type, colInfo.Encoding, rem) 342 if err != nil { 343 return nil, err 344 } 345 } 346 return res, nil 347 } 348 349 // Start is part of the RowSource interface. 350 func (z *zigzagJoiner) Start(ctx context.Context) context.Context { 351 ctx = z.StartInternal(ctx, zigzagJoinerProcName) 352 z.evalCtx = z.FlowCtx.NewEvalCtx() 353 z.cancelChecker = sqlbase.NewCancelChecker(ctx) 354 log.VEventf(ctx, 2, "starting zigzag joiner run") 355 return ctx 356 } 357 358 // zigzagJoinerInfo contains all the information that needs to be 359 // stored for each side of the join. 360 type zigzagJoinerInfo struct { 361 fetcher row.Fetcher 362 alloc *sqlbase.DatumAlloc 363 table *sqlbase.TableDescriptor 364 index *sqlbase.IndexDescriptor 365 indexTypes []*types.T 366 indexDirs []sqlbase.IndexDescriptor_Direction 367 368 // Stores one batch of matches at a time. When all the rows are collected 369 // the cartesian product of the containers will be emitted. 370 container sqlbase.EncDatumRowContainer 371 372 // eqColumns is the ordinal positions of the equality columns. 373 eqColumns []uint32 374 375 // Prefix of the index key that has fixed values. 376 fixedValues sqlbase.EncDatumRow 377 378 // The current key being fetched by this side. 379 key roachpb.Key 380 // The prefix of the key which includes the table and index IDs. 381 prefix []byte 382 // endKey marks where this side should stop fetching, taking into account the 383 // fixedValues. 384 endKey roachpb.Key 385 386 spanBuilder *span.Builder 387 } 388 389 // Setup the curInfo struct for the current z.side, which specifies the side 390 // number of the curInfo to set up. 391 // Side specifies which the spec is associated with. 392 // colOffset is specified to determine the appropriate range of output columns 393 // to process. It is the number of columns in the tables of all previous sides 394 // of the join. 395 func (z *zigzagJoiner) setupInfo( 396 flowCtx *execinfra.FlowCtx, spec *execinfrapb.ZigzagJoinerSpec, side int, colOffset int, 397 ) error { 398 z.side = side 399 info := z.infos[side] 400 401 info.alloc = &sqlbase.DatumAlloc{} 402 info.table = &spec.Tables[side] 403 info.eqColumns = spec.EqColumns[side].Columns 404 indexOrdinal := spec.IndexOrdinals[side] 405 if indexOrdinal == 0 { 406 info.index = &info.table.PrimaryIndex 407 } else { 408 info.index = &info.table.Indexes[indexOrdinal-1] 409 } 410 411 var columnIDs []sqlbase.ColumnID 412 columnIDs, info.indexDirs = info.index.FullColumnIDs() 413 info.indexTypes = make([]*types.T, len(columnIDs)) 414 columnTypes := info.table.ColumnTypes() 415 colIdxMap := info.table.ColumnIdxMap() 416 for i, columnID := range columnIDs { 417 info.indexTypes[i] = columnTypes[colIdxMap[columnID]] 418 } 419 420 // Add the outputted columns. 421 neededCols := util.MakeFastIntSet() 422 outCols := z.Out.NeededColumns() 423 maxCol := colOffset + len(info.table.Columns) 424 for i, ok := outCols.Next(colOffset); ok && i < maxCol; i, ok = outCols.Next(i + 1) { 425 neededCols.Add(i - colOffset) 426 } 427 428 // Add the fixed columns. 429 for i := 0; i < len(info.fixedValues); i++ { 430 neededCols.Add(colIdxMap[columnIDs[i]]) 431 } 432 433 // Add the equality columns. 434 for _, col := range info.eqColumns { 435 neededCols.Add(int(col)) 436 } 437 438 // Setup the RowContainers. 439 info.container.Reset() 440 441 info.spanBuilder = span.MakeBuilder(flowCtx.Codec(), info.table, info.index) 442 443 // Setup the Fetcher. 444 _, _, err := initRowFetcher( 445 flowCtx, 446 &info.fetcher, 447 info.table, 448 int(indexOrdinal), 449 info.table.ColumnIdxMap(), 450 false, /* reverse */ 451 neededCols, 452 false, /* check */ 453 info.alloc, 454 execinfra.ScanVisibilityPublic, 455 // NB: zigzag joins are disabled when a row-level locking clause is 456 // supplied, so there is no locking strength on *ZigzagJoinerSpec. 457 sqlbase.ScanLockingStrength_FOR_NONE, 458 ) 459 if err != nil { 460 return err 461 } 462 463 info.prefix = sqlbase.MakeIndexKeyPrefix(flowCtx.Codec(), info.table, info.index.ID) 464 span, err := z.produceSpanFromBaseRow() 465 466 if err != nil { 467 return err 468 } 469 info.key = span.Key 470 info.endKey = span.EndKey 471 return nil 472 } 473 474 func (z *zigzagJoiner) close() { 475 if z.InternalClose() { 476 log.VEventf(z.Ctx, 2, "exiting zigzag joiner run") 477 } 478 } 479 480 // producerMeta constructs the ProducerMetadata after consumption of rows has 481 // terminated, either due to being indicated by the consumer, or because the 482 // processor ran out of rows or encountered an error. It is ok for err to be 483 // nil indicating that we're done producing rows even though no error occurred. 484 func (z *zigzagJoiner) producerMeta(err error) *execinfrapb.ProducerMetadata { 485 var meta *execinfrapb.ProducerMetadata 486 if !z.Closed { 487 if err != nil { 488 meta = &execinfrapb.ProducerMetadata{Err: err} 489 } else if trace := execinfra.GetTraceData(z.Ctx); trace != nil { 490 meta = &execinfrapb.ProducerMetadata{TraceData: trace} 491 } 492 // We need to close as soon as we send producer metadata as we're done 493 // sending rows. The consumer is allowed to not call ConsumerDone(). 494 z.close() 495 } 496 if meta != nil { 497 z.returnedMeta = append(z.returnedMeta, *meta) 498 } 499 return meta 500 } 501 502 func findColumnID(s []sqlbase.ColumnID, t sqlbase.ColumnID) int { 503 for i := range s { 504 if s[i] == t { 505 return i 506 } 507 } 508 return -1 509 } 510 511 // Fetches the first row from the current rowFetcher that does not have any of 512 // the equality columns set to null. 513 func (z *zigzagJoiner) fetchRow(ctx context.Context) (sqlbase.EncDatumRow, error) { 514 return z.fetchRowFromSide(ctx, z.side) 515 } 516 517 func (z *zigzagJoiner) fetchRowFromSide( 518 ctx context.Context, side int, 519 ) (fetchedRow sqlbase.EncDatumRow, err error) { 520 // Keep fetching until a row is found that does not have null in an equality 521 // column. 522 hasNull := func(row sqlbase.EncDatumRow) bool { 523 for _, c := range z.infos[side].eqColumns { 524 if row[c].IsNull() { 525 return true 526 } 527 } 528 return false 529 } 530 for { 531 fetchedRow, _, _, err = z.infos[side].fetcher.NextRow(ctx) 532 if fetchedRow == nil || err != nil { 533 return fetchedRow, err 534 } 535 if !hasNull(fetchedRow) { 536 break 537 } 538 } 539 return fetchedRow, nil 540 } 541 542 // Return the datums from the equality columns from a given non-empty row 543 // from the specified side. 544 func (z *zigzagJoiner) extractEqDatums(row sqlbase.EncDatumRow, side int) sqlbase.EncDatumRow { 545 eqCols := z.infos[side].eqColumns 546 eqDatums := make(sqlbase.EncDatumRow, len(eqCols)) 547 for i, col := range eqCols { 548 eqDatums[i] = row[col] 549 } 550 return eqDatums 551 } 552 553 // Generates a Key for an inverted index from the passed datums and side 554 // info. Used by produceKeyFromBaseRow. 555 func (z *zigzagJoiner) produceInvertedIndexKey( 556 info *zigzagJoinerInfo, datums sqlbase.EncDatumRow, 557 ) (roachpb.Span, error) { 558 // For inverted indexes, the JSON field (first column in the index) is 559 // encoded a little differently. We need to explicitly call 560 // EncodeInvertedIndexKeys to generate the prefix. The rest of the 561 // index key containing the remaining neededDatums can be generated 562 // and appended using EncodeColumns. 563 colMap := make(map[sqlbase.ColumnID]int) 564 decodedDatums := make([]tree.Datum, len(datums)) 565 566 // Ensure all EncDatums have been decoded. 567 for i, encDatum := range datums { 568 err := encDatum.EnsureDecoded(info.indexTypes[i], info.alloc) 569 if err != nil { 570 return roachpb.Span{}, err 571 } 572 573 decodedDatums[i] = encDatum.Datum 574 if i < len(info.index.ColumnIDs) { 575 colMap[info.index.ColumnIDs[i]] = i 576 } else { 577 // This column's value will be encoded in the second part (i.e. 578 // EncodeColumns). 579 colMap[info.index.ExtraColumnIDs[i-len(info.index.ColumnIDs)]] = i 580 } 581 } 582 583 keys, err := sqlbase.EncodeInvertedIndexKeys( 584 info.table, 585 info.index, 586 colMap, 587 decodedDatums, 588 info.prefix, 589 ) 590 if err != nil { 591 return roachpb.Span{}, err 592 } 593 if len(keys) != 1 { 594 return roachpb.Span{}, errors.Errorf("%d fixed values passed in for inverted index", len(keys)) 595 } 596 597 // Append remaining (non-JSON) datums to the key. 598 keyBytes, _, err := sqlbase.EncodeColumns( 599 info.index.ExtraColumnIDs[:len(datums)-1], 600 info.indexDirs[1:], 601 colMap, 602 decodedDatums, 603 keys[0], 604 ) 605 key := roachpb.Key(keyBytes) 606 return roachpb.Span{Key: key, EndKey: key.PrefixEnd()}, err 607 } 608 609 // Generates a Key, corresponding to the current `z.baseRow` in 610 // the index on the current side. 611 func (z *zigzagJoiner) produceSpanFromBaseRow() (roachpb.Span, error) { 612 info := z.infos[z.side] 613 neededDatums := info.fixedValues 614 if z.baseRow != nil { 615 eqDatums := z.extractEqDatums(z.baseRow, z.prevSide()) 616 neededDatums = append(neededDatums, eqDatums...) 617 } 618 619 // Construct correct row by concatenating right fixed datums with 620 // primary key extracted from `row`. 621 if info.index.Type == sqlbase.IndexDescriptor_INVERTED { 622 return z.produceInvertedIndexKey(info, neededDatums) 623 } 624 625 s, _, err := info.spanBuilder.SpanFromEncDatums(neededDatums, len(neededDatums)) 626 return s, err 627 } 628 629 // Returns the column types of the equality columns. 630 func (zi *zigzagJoinerInfo) eqColTypes() []*types.T { 631 eqColTypes := make([]*types.T, len(zi.eqColumns)) 632 colTypes := zi.table.ColumnTypes() 633 for i := range eqColTypes { 634 eqColTypes[i] = colTypes[zi.eqColumns[i]] 635 } 636 return eqColTypes 637 } 638 639 // Returns the ordering of the equality columns. 640 func (zi *zigzagJoinerInfo) eqOrdering() (sqlbase.ColumnOrdering, error) { 641 ordering := make(sqlbase.ColumnOrdering, len(zi.eqColumns)) 642 for i := range zi.eqColumns { 643 colID := zi.table.Columns[zi.eqColumns[i]].ID 644 // Search the index columns, then the primary keys to find an ordering for 645 // the current column, 'colID'. 646 var direction encoding.Direction 647 var err error 648 if idx := findColumnID(zi.index.ColumnIDs, colID); idx != -1 { 649 direction, err = zi.index.ColumnDirections[idx].ToEncodingDirection() 650 if err != nil { 651 return nil, err 652 } 653 } else if idx := findColumnID(zi.table.PrimaryIndex.ColumnIDs, colID); idx != -1 { 654 direction, err = zi.table.PrimaryIndex.ColumnDirections[idx].ToEncodingDirection() 655 if err != nil { 656 return nil, err 657 } 658 } else { 659 return nil, errors.New("ordering of equality column not found in index or primary key") 660 } 661 ordering[i] = sqlbase.ColumnOrderInfo{ColIdx: i, Direction: direction} 662 } 663 return ordering, nil 664 } 665 666 // matchBase compares the equality columns of the given row to `z.baseRow`, 667 // which is the previously fetched row. Returns whether or not the rows match 668 // on the equality columns. The given row is from the specified `side`. 669 func (z *zigzagJoiner) matchBase(curRow sqlbase.EncDatumRow, side int) (bool, error) { 670 if len(curRow) == 0 { 671 return false, nil 672 } 673 674 prevEqDatums := z.extractEqDatums(z.baseRow, z.prevSide()) 675 curEqDatums := z.extractEqDatums(curRow, side) 676 677 eqColTypes := z.infos[side].eqColTypes() 678 ordering, err := z.infos[side].eqOrdering() 679 if err != nil { 680 return false, err 681 } 682 683 // Compare the equality columns of the baseRow to that of the curRow. 684 da := &sqlbase.DatumAlloc{} 685 cmp, err := prevEqDatums.Compare(eqColTypes, da, ordering, z.FlowCtx.EvalCtx, curEqDatums) 686 if err != nil { 687 return false, err 688 } 689 return cmp == 0, nil 690 } 691 692 // emitFromContainers returns the next row that is to be emitted from those 693 // already stored in the containers. 694 // Since this is called after the side has been incremented, it produces the 695 // cartesian product of the previous side's container and the side before that 696 // one. 697 func (z *zigzagJoiner) emitFromContainers() (sqlbase.EncDatumRow, error) { 698 right := z.prevSide() 699 left := z.sideBefore(right) 700 for !z.infos[right].container.IsEmpty() { 701 leftRow := z.infos[left].container.Pop() 702 rightRow := z.infos[right].container.Peek() 703 704 // TODO(pbardea): Extend this logic to support multi-way joins. 705 if left == int(rightSide) { 706 leftRow, rightRow = rightRow, leftRow 707 } 708 renderedRow, err := z.render(leftRow, rightRow) 709 if err != nil { 710 return nil, err 711 } 712 if z.infos[left].container.IsEmpty() { 713 z.infos[right].container.Pop() 714 } 715 if renderedRow != nil { 716 // The pair satisfied the onExpr. 717 return renderedRow, nil 718 } 719 } 720 721 // All matches have been returned since the left index is negative. 722 // Empty the containers to reset their contents. 723 z.infos[left].container.Reset() 724 z.infos[right].container.Reset() 725 726 return nil, nil 727 } 728 729 // nextRow fetches the nextRow to emit from the join. It iterates through all 730 // sides until a match is found then emits the results of the match one result 731 // at a time. 732 func (z *zigzagJoiner) nextRow( 733 ctx context.Context, txn *kv.Txn, 734 ) (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 735 for { 736 if err := z.cancelChecker.Check(); err != nil { 737 return nil, &execinfrapb.ProducerMetadata{Err: err} 738 } 739 740 // Check if there are any rows built up in the containers that need to be 741 // emitted. 742 if rowToEmit, err := z.emitFromContainers(); err != nil { 743 return nil, z.producerMeta(err) 744 } else if rowToEmit != nil { 745 return rowToEmit, nil 746 } 747 748 // If the baseRow is nil, the last fetched row was nil. That means that 749 // that there are no more matches in the join so we break and return nil 750 // to indicate that we are done to the caller. 751 if len(z.baseRow) == 0 { 752 return nil, nil 753 } 754 755 curInfo := z.infos[z.side] 756 757 // Generate a key from the last row seen from the last side. We're about to 758 // use it to jump to the next possible match on the current side. 759 span, err := z.produceSpanFromBaseRow() 760 if err != nil { 761 return nil, z.producerMeta(err) 762 } 763 curInfo.key = span.Key 764 765 err = curInfo.fetcher.StartScan( 766 ctx, 767 txn, 768 roachpb.Spans{roachpb.Span{Key: curInfo.key, EndKey: curInfo.endKey}}, 769 true, /* batch limit */ 770 zigzagJoinerBatchSize, 771 z.FlowCtx.TraceKV, 772 ) 773 if err != nil { 774 return nil, z.producerMeta(err) 775 } 776 777 fetchedRow, err := z.fetchRow(ctx) 778 if err != nil { 779 return nil, z.producerMeta(err) 780 } 781 // If the next possible match on the current side that matches the previous 782 // row is `nil`, that means that there are no more matches in the join so 783 // we return nil to indicate that to the caller. 784 if fetchedRow == nil { 785 return nil, nil 786 } 787 788 matched, err := z.matchBase(fetchedRow, z.side) 789 if err != nil { 790 return nil, z.producerMeta(err) 791 } 792 if matched { 793 // We've detected a match! Now, we collect all subsequent matches on both 794 // sides for the current equality column values and add them to our 795 // list of rows to emit. 796 prevSide := z.prevSide() 797 798 // Store the matched rows in the appropriate container to emit. 799 prevRow := z.rowAlloc.AllocRow(len(z.baseRow)) 800 copy(prevRow, z.baseRow) 801 z.infos[prevSide].container.Push(prevRow) 802 curRow := z.rowAlloc.AllocRow(len(fetchedRow)) 803 copy(curRow, fetchedRow) 804 curInfo.container.Push(curRow) 805 806 // After collecting all matches from each side, the first unmatched 807 // row from each side is returned. We want the new baseRow to be 808 // the latest of these rows since no match can occur before the latter 809 // of the two rows. 810 prevNext, err := z.collectAllMatches(ctx, prevSide) 811 if err != nil { 812 return nil, z.producerMeta(err) 813 } 814 curNext, err := z.collectAllMatches(ctx, z.side) 815 if err != nil { 816 return nil, z.producerMeta(err) 817 } 818 819 // No more matches, so set the baseRow to nil to indicate that we should 820 // terminate after emitting all the rows stored in the container. 821 if len(prevNext) == 0 || len(curNext) == 0 { 822 z.baseRow = nil 823 continue 824 } 825 826 prevEqCols := z.extractEqDatums(prevNext, prevSide) 827 currentEqCols := z.extractEqDatums(curNext, z.side) 828 eqColTypes := curInfo.eqColTypes() 829 ordering, err := curInfo.eqOrdering() 830 if err != nil { 831 return nil, z.producerMeta(err) 832 } 833 da := &sqlbase.DatumAlloc{} 834 cmp, err := prevEqCols.Compare(eqColTypes, da, ordering, z.FlowCtx.EvalCtx, currentEqCols) 835 if err != nil { 836 return nil, z.producerMeta(err) 837 } 838 // We want the new current side to be the one that has the latest key 839 // since we know that this key will not be able to match any previous 840 // key. The current side should be the side after the baseRow's side. 841 if cmp < 0 { 842 // The current side had the later row, so increment the side. 843 z.side = z.nextSide() 844 z.baseRow = curNext 845 } else { 846 // The previous side had the later row so the side doesn't change. 847 z.baseRow = prevNext 848 } 849 } else { 850 // The current row doesn't match the base row, so update the base row to 851 // the current row and increment the side to repeat the process. 852 z.baseRow = fetchedRow 853 z.baseRow = z.rowAlloc.AllocRow(len(fetchedRow)) 854 copy(z.baseRow, fetchedRow) 855 z.side = z.nextSide() 856 } 857 } 858 } 859 860 // nextSide returns the side after the current side. 861 func (z *zigzagJoiner) nextSide() int { 862 return (z.side + 1) % z.numTables 863 } 864 865 // prevSide returns the side before the current side. 866 func (z *zigzagJoiner) prevSide() int { 867 return z.sideBefore(z.side) 868 } 869 870 // sideBefore returns the side before the given side. 871 func (z *zigzagJoiner) sideBefore(side int) int { 872 return (side + z.numTables - 1) % z.numTables 873 } 874 875 // Adds all rows that match the current base row from the specified side into 876 // the appropriate container. 877 // Returns the first row that doesn't match. 878 func (z *zigzagJoiner) collectAllMatches( 879 ctx context.Context, side int, 880 ) (sqlbase.EncDatumRow, error) { 881 matched := true 882 var row sqlbase.EncDatumRow 883 for matched { 884 var err error 885 fetchedRow, err := z.fetchRowFromSide(ctx, side) 886 row = z.rowAlloc.AllocRow(len(fetchedRow)) 887 copy(row, fetchedRow) 888 if err != nil { 889 return nil, err 890 } 891 matched, err = z.matchBase(row, side) 892 if err != nil { 893 return nil, err 894 } 895 if matched { 896 z.infos[side].container.Push(row) 897 } 898 } 899 return row, nil 900 } 901 902 // Next is part of the RowSource interface. 903 func (z *zigzagJoiner) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 904 txn := z.FlowCtx.Txn 905 906 if !z.started { 907 z.started = true 908 909 curInfo := z.infos[z.side] 910 // Fetch initial batch. 911 err := curInfo.fetcher.StartScan( 912 z.Ctx, 913 txn, 914 roachpb.Spans{roachpb.Span{Key: curInfo.key, EndKey: curInfo.endKey}}, 915 true, /* batch limit */ 916 zigzagJoinerBatchSize, 917 z.FlowCtx.TraceKV, 918 ) 919 if err != nil { 920 log.Errorf(z.Ctx, "scan error: %s", err) 921 return nil, z.producerMeta(err) 922 } 923 fetchedRow, err := z.fetchRow(z.Ctx) 924 if err != nil { 925 err = scrub.UnwrapScrubError(err) 926 return nil, z.producerMeta(err) 927 } 928 z.baseRow = z.rowAlloc.AllocRow(len(fetchedRow)) 929 copy(z.baseRow, fetchedRow) 930 z.side = z.nextSide() 931 } 932 933 if z.Closed { 934 return nil, z.producerMeta(nil /* err */) 935 } 936 937 for { 938 row, meta := z.nextRow(z.Ctx, txn) 939 if z.Closed || meta != nil { 940 if meta != nil { 941 z.returnedMeta = append(z.returnedMeta, *meta) 942 } 943 return nil, meta 944 } 945 if row == nil { 946 z.MoveToDraining(nil /* err */) 947 break 948 } 949 950 outRow := z.ProcessRowHelper(row) 951 if outRow == nil { 952 continue 953 } 954 return outRow, nil 955 } 956 meta := z.DrainHelper() 957 if meta != nil { 958 z.returnedMeta = append(z.returnedMeta, *meta) 959 } 960 return nil, meta 961 } 962 963 // ConsumerClosed is part of the RowSource interface. 964 func (z *zigzagJoiner) ConsumerClosed() { 965 // The consumer is done, Next() will not be called again. 966 z.close() 967 } 968 969 // DrainMeta is part of the MetadataSource interface. 970 func (z *zigzagJoiner) DrainMeta(_ context.Context) []execinfrapb.ProducerMetadata { 971 return z.returnedMeta 972 } 973 974 // ChildCount is part of the execinfra.OpNode interface. 975 func (z *zigzagJoiner) ChildCount(verbose bool) int { 976 return 0 977 } 978 979 // Child is part of the execinfra.OpNode interface. 980 func (z *zigzagJoiner) Child(nth int, verbose bool) execinfra.OpNode { 981 panic(fmt.Sprintf("invalid index %d", nth)) 982 }