github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/tablewriter_upsert_opt.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/kv"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/rowcontainer"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    21  	"github.com/cockroachdb/cockroach/pkg/util"
    22  )
    23  
    24  // optTableUpserter implements the upsert operation when it is planned by the
    25  // cost-based optimizer (CBO). The CBO can use a much simpler upserter because
    26  // it incorporates conflict detection, update and computed column evaluation,
    27  // and other upsert operations into the input query, rather than requiring the
    28  // upserter to do it. For example:
    29  //
    30  //   CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT)
    31  //   INSERT INTO abc VALUES (1, 2) ON CONFLICT (a) DO UPDATE SET b=10
    32  //
    33  // The CBO will generate an input expression similar to this:
    34  //
    35  //   SELECT ins_a, ins_b, ins_c, fetch_a, fetch_b, fetch_c, 10 AS upd_b
    36  //   FROM (VALUES (1, 2, NULL)) AS ins(ins_a, ins_b, ins_c)
    37  //   LEFT OUTER JOIN abc AS fetch(fetch_a, fetch_b, fetch_c)
    38  //   ON ins_a = fetch_a
    39  //
    40  // The other non-CBO upserters perform custom left lookup joins. However, that
    41  // doesn't allow sharing of optimization rules and doesn't work with correlated
    42  // SET expressions.
    43  //
    44  // For more details on how the CBO compiles UPSERT statements, see the block
    45  // comment on Builder.buildInsert in opt/optbuilder/insert.go.
    46  type optTableUpserter struct {
    47  	tableWriterBase
    48  
    49  	ri    row.Inserter
    50  	alloc *sqlbase.DatumAlloc
    51  
    52  	// Should we collect the rows for a RETURNING clause?
    53  	collectRows bool
    54  
    55  	// Rows returned if collectRows is true.
    56  	rowsUpserted *rowcontainer.RowContainer
    57  
    58  	// A mapping of column IDs to the return index used to shape the resulting
    59  	// rows to those required by the returning clause. Only required if
    60  	// collectRows is true.
    61  	colIDToReturnIndex map[sqlbase.ColumnID]int
    62  
    63  	// Do the result rows have a different order than insert rows. Only set if
    64  	// collectRows is true.
    65  	insertReorderingRequired bool
    66  
    67  	// resultCount is the number of upserts. Mirrors rowsUpserted.Len() if
    68  	// collectRows is set, counted separately otherwise.
    69  	resultCount int
    70  
    71  	// Contains all the rows to be inserted.
    72  	insertRows rowcontainer.RowContainer
    73  
    74  	// existingRows is used to store rows in a batch when checking for conflicts
    75  	// with rows earlier in the batch. Is is reused per batch.
    76  	existingRows *rowcontainer.RowContainer
    77  
    78  	// For allocation avoidance.
    79  	indexKeyPrefix []byte
    80  
    81  	// fetchCols indicate which columns need to be fetched from the target table,
    82  	// in order to detect whether a conflict has occurred, as well as to provide
    83  	// existing values for updates.
    84  	fetchCols []sqlbase.ColumnDescriptor
    85  
    86  	// updateCols indicate which columns need an update during a conflict.
    87  	updateCols []sqlbase.ColumnDescriptor
    88  
    89  	// returnCols indicate which columns need to be returned by the Upsert.
    90  	returnCols []sqlbase.ColumnDescriptor
    91  
    92  	// canaryOrdinal is the ordinal position of the column within the input row
    93  	// that is used to decide whether to execute an insert or update operation.
    94  	// If the canary column is null, then an insert will be performed; otherwise,
    95  	// an update is performed. This column will always be one of the fetchCols.
    96  	canaryOrdinal int
    97  
    98  	// resultRow is a reusable slice of Datums used to store result rows.
    99  	resultRow tree.Datums
   100  
   101  	// fkTables is used for foreign key checks in the update case.
   102  	fkTables row.FkTableMetadata
   103  
   104  	// ru is used when updating rows.
   105  	ru row.Updater
   106  
   107  	// tabColIdxToRetIdx is the mapping from the columns in the table to the
   108  	// columns in the resultRowBuffer. A value of -1 is used to indicate
   109  	// that the table column at that index is not part of the resultRowBuffer
   110  	// of the mutation. Otherwise, the value at the i-th index refers to the
   111  	// index of the resultRowBuffer where the i-th column of the table is
   112  	// to be returned.
   113  	tabColIdxToRetIdx []int
   114  }
   115  
   116  var _ tableWriter = &optTableUpserter{}
   117  
   118  // init is part of the tableWriter interface.
   119  func (tu *optTableUpserter) init(
   120  	ctx context.Context, txn *kv.Txn, evalCtx *tree.EvalContext,
   121  ) error {
   122  	tu.tableWriterBase.init(txn)
   123  	tableDesc := tu.tableDesc()
   124  
   125  	tu.insertRows.Init(
   126  		evalCtx.Mon.MakeBoundAccount(), sqlbase.ColTypeInfoFromColDescs(tu.ri.InsertCols), 0,
   127  	)
   128  
   129  	// collectRows, set upon initialization, indicates whether or not we want rows returned from the operation.
   130  	if tu.collectRows {
   131  		tu.rowsUpserted = rowcontainer.NewRowContainer(
   132  			evalCtx.Mon.MakeBoundAccount(),
   133  			sqlbase.ColTypeInfoFromColDescs(tableDesc.Columns),
   134  			tu.insertRows.Len(),
   135  		)
   136  
   137  		// Create the map from colIds to the expected columns.
   138  		// Note that this map will *not* contain any mutation columns - that's
   139  		// because even though we might insert values into mutation columns, we
   140  		// never return them back to the user.
   141  		tu.colIDToReturnIndex = map[sqlbase.ColumnID]int{}
   142  		for i := range tableDesc.Columns {
   143  			id := tableDesc.Columns[i].ID
   144  			tu.colIDToReturnIndex[id] = i
   145  		}
   146  
   147  		if len(tu.ri.InsertColIDtoRowIndex) == len(tu.colIDToReturnIndex) {
   148  			for colID, insertIndex := range tu.ri.InsertColIDtoRowIndex {
   149  				resultIndex, ok := tu.colIDToReturnIndex[colID]
   150  				if !ok || resultIndex != insertIndex {
   151  					tu.insertReorderingRequired = true
   152  					break
   153  				}
   154  			}
   155  		} else {
   156  			tu.insertReorderingRequired = true
   157  		}
   158  	}
   159  
   160  	tu.insertRows.Init(
   161  		evalCtx.Mon.MakeBoundAccount(), sqlbase.ColTypeInfoFromColDescs(tu.ri.InsertCols), 0,
   162  	)
   163  
   164  	tu.indexKeyPrefix = sqlbase.MakeIndexKeyPrefix(
   165  		evalCtx.Codec, tableDesc.TableDesc(), tableDesc.PrimaryIndex.ID,
   166  	)
   167  
   168  	if tu.collectRows {
   169  		tu.resultRow = make(tree.Datums, len(tu.returnCols))
   170  		tu.rowsUpserted = rowcontainer.NewRowContainer(
   171  			evalCtx.Mon.MakeBoundAccount(),
   172  			sqlbase.ColTypeInfoFromColDescs(tu.returnCols),
   173  			tu.insertRows.Len(),
   174  		)
   175  	}
   176  
   177  	return nil
   178  }
   179  
   180  // flushAndStartNewBatch is part of the tableWriter interface.
   181  func (tu *optTableUpserter) flushAndStartNewBatch(ctx context.Context) error {
   182  	tu.insertRows.Clear(ctx)
   183  	if tu.collectRows {
   184  		tu.rowsUpserted.Clear(ctx)
   185  	}
   186  	if tu.existingRows != nil {
   187  		tu.existingRows.Clear(ctx)
   188  	}
   189  	return tu.tableWriterBase.flushAndStartNewBatch(ctx, tu.tableDesc())
   190  }
   191  
   192  // batchedCount is part of the batchedTableWriter interface.
   193  func (tu *optTableUpserter) batchedCount() int { return tu.resultCount }
   194  
   195  // batchedValues is part of the batchedTableWriter interface.
   196  func (tu *optTableUpserter) batchedValues(rowIdx int) tree.Datums {
   197  	if !tu.collectRows {
   198  		panic("return row requested but collect rows was not set")
   199  	}
   200  	return tu.rowsUpserted.At(rowIdx)
   201  }
   202  
   203  func (tu *optTableUpserter) curBatchSize() int { return tu.insertRows.Len() }
   204  
   205  // close is part of the tableWriter interface.
   206  func (tu *optTableUpserter) close(ctx context.Context) {
   207  	tu.insertRows.Close(ctx)
   208  	if tu.existingRows != nil {
   209  		tu.existingRows.Close(ctx)
   210  	}
   211  	if tu.rowsUpserted != nil {
   212  		tu.rowsUpserted.Close(ctx)
   213  	}
   214  }
   215  
   216  // finalize is part of the tableWriter interface.
   217  func (tu *optTableUpserter) finalize(
   218  	ctx context.Context, traceKV bool,
   219  ) (*rowcontainer.RowContainer, error) {
   220  	return nil, tu.tableWriterBase.finalize(ctx, tu.tableDesc())
   221  }
   222  
   223  // makeResultFromRow reshapes a row that was inserted or updated to a row
   224  // suitable for storing for a RETURNING clause, shaped by the target table's
   225  // descriptor.
   226  // There are two main examples of this reshaping:
   227  // 1) A row may not contain values for nullable columns, so insert those NULLs.
   228  // 2) Don't return values we wrote into non-public mutation columns.
   229  func (tu *optTableUpserter) makeResultFromRow(
   230  	row tree.Datums, colIDToRowIndex map[sqlbase.ColumnID]int,
   231  ) tree.Datums {
   232  	resultRow := make(tree.Datums, len(tu.colIDToReturnIndex))
   233  	for colID, returnIndex := range tu.colIDToReturnIndex {
   234  		rowIndex, ok := colIDToRowIndex[colID]
   235  		if ok {
   236  			resultRow[returnIndex] = row[rowIndex]
   237  		} else {
   238  			// If the row doesn't have all columns filled out. Fill the columns that
   239  			// weren't included with NULLs. This will only be true for nullable
   240  			// columns.
   241  			resultRow[returnIndex] = tree.DNull
   242  		}
   243  	}
   244  	return resultRow
   245  }
   246  
   247  // desc is part of the tableWriter interface.
   248  func (*optTableUpserter) desc() string { return "opt upserter" }
   249  
   250  // row is part of the tableWriter interface.
   251  // TODO(mgartner): Use ignoreIndexes to avoid writing to partial indexes when
   252  // the row does not match the partial index predicate.
   253  func (tu *optTableUpserter) row(
   254  	ctx context.Context, row tree.Datums, ignoreIndexes util.FastIntSet, traceKV bool,
   255  ) error {
   256  	tu.batchSize++
   257  	tu.resultCount++
   258  
   259  	// Consult the canary column to determine whether to insert or update. For
   260  	// more details on how canary columns work, see the block comment on
   261  	// Builder.buildInsert in opt/optbuilder/insert.go.
   262  	insertEnd := len(tu.ri.InsertCols)
   263  	if tu.canaryOrdinal == -1 {
   264  		// No canary column means that existing row should be overwritten (i.e.
   265  		// the insert and update columns are the same, so no need to choose).
   266  		return tu.insertNonConflictingRow(ctx, tu.b, row[:insertEnd], true /* overwrite */, traceKV)
   267  	}
   268  	if row[tu.canaryOrdinal] == tree.DNull {
   269  		// No conflict, so insert a new row.
   270  		return tu.insertNonConflictingRow(ctx, tu.b, row[:insertEnd], false /* overwrite */, traceKV)
   271  	}
   272  
   273  	// If no columns need to be updated, then possibly collect the unchanged row.
   274  	fetchEnd := insertEnd + len(tu.fetchCols)
   275  	if len(tu.updateCols) == 0 {
   276  		if !tu.collectRows {
   277  			return nil
   278  		}
   279  		_, err := tu.rowsUpserted.AddRow(ctx, row[insertEnd:fetchEnd])
   280  		return err
   281  	}
   282  
   283  	// Update the row.
   284  	updateEnd := fetchEnd + len(tu.updateCols)
   285  	return tu.updateConflictingRow(
   286  		ctx,
   287  		tu.b,
   288  		row[insertEnd:fetchEnd],
   289  		row[fetchEnd:updateEnd],
   290  		tu.tableDesc(),
   291  		traceKV,
   292  	)
   293  }
   294  
   295  // atBatchEnd is part of the tableWriter interface.
   296  func (tu *optTableUpserter) atBatchEnd(ctx context.Context, traceKV bool) error {
   297  	// Nothing to do, because the row method does everything.
   298  	return nil
   299  }
   300  
   301  // insertNonConflictingRow inserts the given source row into the table when
   302  // there was no conflict. If the RETURNING clause was specified, then the
   303  // inserted row is stored in the rowsUpserted collection.
   304  func (tu *optTableUpserter) insertNonConflictingRow(
   305  	ctx context.Context, b *kv.Batch, insertRow tree.Datums, overwrite, traceKV bool,
   306  ) error {
   307  	// Perform the insert proper.
   308  	// TODO(mgartner): Pass ignoreIndexes to InsertRow and do not write index
   309  	// entries for indexes in the set.
   310  	var ignoreIndexes util.FastIntSet
   311  	if err := tu.ri.InsertRow(ctx, b, insertRow, ignoreIndexes, overwrite, row.CheckFKs, traceKV); err != nil {
   312  		return err
   313  	}
   314  
   315  	if !tu.collectRows {
   316  		return nil
   317  	}
   318  
   319  	// Reshape the row if needed.
   320  	if tu.insertReorderingRequired {
   321  		tableRow := tu.makeResultFromRow(insertRow, tu.ri.InsertColIDtoRowIndex)
   322  
   323  		// TODO(ridwanmsharif): Why didn't they update the value of tu.resultRow
   324  		//  before? Is it safe to be doing it now?
   325  		// Map the upserted columns into the result row before adding it.
   326  		for tabIdx := range tableRow {
   327  			if retIdx := tu.tabColIdxToRetIdx[tabIdx]; retIdx >= 0 {
   328  				tu.resultRow[retIdx] = tableRow[tabIdx]
   329  			}
   330  		}
   331  		_, err := tu.rowsUpserted.AddRow(ctx, tu.resultRow)
   332  		return err
   333  	}
   334  
   335  	// Map the upserted columns into the result row before adding it.
   336  	for tabIdx := range insertRow {
   337  		if retIdx := tu.tabColIdxToRetIdx[tabIdx]; retIdx >= 0 {
   338  			tu.resultRow[retIdx] = insertRow[tabIdx]
   339  		}
   340  	}
   341  	_, err := tu.rowsUpserted.AddRow(ctx, tu.resultRow)
   342  	return err
   343  }
   344  
   345  // updateConflictingRow updates an existing row in the table when there was a
   346  // conflict. The existing values from the row are provided in fetchRow, and the
   347  // updated values are provided in updateValues. The updater is assumed to
   348  // already be initialized with the descriptors for the fetch and update values.
   349  // If the RETURNING clause was specified, then the updated row is stored in the
   350  // rowsUpserted collection.
   351  func (tu *optTableUpserter) updateConflictingRow(
   352  	ctx context.Context,
   353  	b *kv.Batch,
   354  	fetchRow tree.Datums,
   355  	updateValues tree.Datums,
   356  	tableDesc *sqlbase.ImmutableTableDescriptor,
   357  	traceKV bool,
   358  ) error {
   359  	// Enforce the column constraints.
   360  	// Note: the column constraints are already enforced for fetchRow,
   361  	// because:
   362  	// - for the insert part, they were checked upstream in upsertNode
   363  	//   via GenerateInsertRow().
   364  	// - for the fetched part, we assume that the data in the table is
   365  	//   correct already.
   366  	if err := enforceLocalColumnConstraints(updateValues, tu.updateCols); err != nil {
   367  		return err
   368  	}
   369  
   370  	// Queue the update in KV. This also returns an "update row"
   371  	// containing the updated values for every column in the
   372  	// table. This is useful for RETURNING, which we collect below.
   373  	_, err := tu.ru.UpdateRow(ctx, b, fetchRow, updateValues, row.CheckFKs, traceKV)
   374  	if err != nil {
   375  		return err
   376  	}
   377  
   378  	// We only need a result row if we're collecting rows.
   379  	if !tu.collectRows {
   380  		return nil
   381  	}
   382  
   383  	// We now need a row that has the shape of the result row with
   384  	// the appropriate return columns. Make sure all the fetch columns
   385  	// are present.
   386  	tableRow := tu.makeResultFromRow(fetchRow, tu.ru.FetchColIDtoRowIndex)
   387  
   388  	// Make sure all the updated columns are present.
   389  	for colID, returnIndex := range tu.colIDToReturnIndex {
   390  		// If an update value for a given column exists, use that; else use the
   391  		// existing value of that column if it has been fetched.
   392  		rowIndex, ok := tu.ru.UpdateColIDtoRowIndex[colID]
   393  		if ok {
   394  			tableRow[returnIndex] = updateValues[rowIndex]
   395  		}
   396  	}
   397  
   398  	// Map the upserted columns into the result row before adding it.
   399  	for tabIdx := range tableRow {
   400  		if retIdx := tu.tabColIdxToRetIdx[tabIdx]; retIdx >= 0 {
   401  			tu.resultRow[retIdx] = tableRow[tabIdx]
   402  		}
   403  	}
   404  
   405  	// The resulting row may have nil values for columns that aren't
   406  	// being upserted, updated or fetched.
   407  	_, err = tu.rowsUpserted.AddRow(ctx, tu.resultRow)
   408  	return err
   409  }
   410  
   411  // tableDesc is part of the tableWriter interface.
   412  func (tu *optTableUpserter) tableDesc() *sqlbase.ImmutableTableDescriptor {
   413  	return tu.ri.Helper.TableDesc
   414  }
   415  
   416  // walkExprs is part of the tableWriter interface.
   417  func (tu *optTableUpserter) walkExprs(walk func(desc string, index int, expr tree.TypedExpr)) {
   418  }