github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/tablewriter_upsert_opt.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/kv" 17 "github.com/cockroachdb/cockroach/pkg/sql/row" 18 "github.com/cockroachdb/cockroach/pkg/sql/rowcontainer" 19 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 20 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 21 "github.com/cockroachdb/cockroach/pkg/util" 22 ) 23 24 // optTableUpserter implements the upsert operation when it is planned by the 25 // cost-based optimizer (CBO). The CBO can use a much simpler upserter because 26 // it incorporates conflict detection, update and computed column evaluation, 27 // and other upsert operations into the input query, rather than requiring the 28 // upserter to do it. For example: 29 // 30 // CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT) 31 // INSERT INTO abc VALUES (1, 2) ON CONFLICT (a) DO UPDATE SET b=10 32 // 33 // The CBO will generate an input expression similar to this: 34 // 35 // SELECT ins_a, ins_b, ins_c, fetch_a, fetch_b, fetch_c, 10 AS upd_b 36 // FROM (VALUES (1, 2, NULL)) AS ins(ins_a, ins_b, ins_c) 37 // LEFT OUTER JOIN abc AS fetch(fetch_a, fetch_b, fetch_c) 38 // ON ins_a = fetch_a 39 // 40 // The other non-CBO upserters perform custom left lookup joins. However, that 41 // doesn't allow sharing of optimization rules and doesn't work with correlated 42 // SET expressions. 43 // 44 // For more details on how the CBO compiles UPSERT statements, see the block 45 // comment on Builder.buildInsert in opt/optbuilder/insert.go. 46 type optTableUpserter struct { 47 tableWriterBase 48 49 ri row.Inserter 50 alloc *sqlbase.DatumAlloc 51 52 // Should we collect the rows for a RETURNING clause? 53 collectRows bool 54 55 // Rows returned if collectRows is true. 56 rowsUpserted *rowcontainer.RowContainer 57 58 // A mapping of column IDs to the return index used to shape the resulting 59 // rows to those required by the returning clause. Only required if 60 // collectRows is true. 61 colIDToReturnIndex map[sqlbase.ColumnID]int 62 63 // Do the result rows have a different order than insert rows. Only set if 64 // collectRows is true. 65 insertReorderingRequired bool 66 67 // resultCount is the number of upserts. Mirrors rowsUpserted.Len() if 68 // collectRows is set, counted separately otherwise. 69 resultCount int 70 71 // Contains all the rows to be inserted. 72 insertRows rowcontainer.RowContainer 73 74 // existingRows is used to store rows in a batch when checking for conflicts 75 // with rows earlier in the batch. Is is reused per batch. 76 existingRows *rowcontainer.RowContainer 77 78 // For allocation avoidance. 79 indexKeyPrefix []byte 80 81 // fetchCols indicate which columns need to be fetched from the target table, 82 // in order to detect whether a conflict has occurred, as well as to provide 83 // existing values for updates. 84 fetchCols []sqlbase.ColumnDescriptor 85 86 // updateCols indicate which columns need an update during a conflict. 87 updateCols []sqlbase.ColumnDescriptor 88 89 // returnCols indicate which columns need to be returned by the Upsert. 90 returnCols []sqlbase.ColumnDescriptor 91 92 // canaryOrdinal is the ordinal position of the column within the input row 93 // that is used to decide whether to execute an insert or update operation. 94 // If the canary column is null, then an insert will be performed; otherwise, 95 // an update is performed. This column will always be one of the fetchCols. 96 canaryOrdinal int 97 98 // resultRow is a reusable slice of Datums used to store result rows. 99 resultRow tree.Datums 100 101 // fkTables is used for foreign key checks in the update case. 102 fkTables row.FkTableMetadata 103 104 // ru is used when updating rows. 105 ru row.Updater 106 107 // tabColIdxToRetIdx is the mapping from the columns in the table to the 108 // columns in the resultRowBuffer. A value of -1 is used to indicate 109 // that the table column at that index is not part of the resultRowBuffer 110 // of the mutation. Otherwise, the value at the i-th index refers to the 111 // index of the resultRowBuffer where the i-th column of the table is 112 // to be returned. 113 tabColIdxToRetIdx []int 114 } 115 116 var _ tableWriter = &optTableUpserter{} 117 118 // init is part of the tableWriter interface. 119 func (tu *optTableUpserter) init( 120 ctx context.Context, txn *kv.Txn, evalCtx *tree.EvalContext, 121 ) error { 122 tu.tableWriterBase.init(txn) 123 tableDesc := tu.tableDesc() 124 125 tu.insertRows.Init( 126 evalCtx.Mon.MakeBoundAccount(), sqlbase.ColTypeInfoFromColDescs(tu.ri.InsertCols), 0, 127 ) 128 129 // collectRows, set upon initialization, indicates whether or not we want rows returned from the operation. 130 if tu.collectRows { 131 tu.rowsUpserted = rowcontainer.NewRowContainer( 132 evalCtx.Mon.MakeBoundAccount(), 133 sqlbase.ColTypeInfoFromColDescs(tableDesc.Columns), 134 tu.insertRows.Len(), 135 ) 136 137 // Create the map from colIds to the expected columns. 138 // Note that this map will *not* contain any mutation columns - that's 139 // because even though we might insert values into mutation columns, we 140 // never return them back to the user. 141 tu.colIDToReturnIndex = map[sqlbase.ColumnID]int{} 142 for i := range tableDesc.Columns { 143 id := tableDesc.Columns[i].ID 144 tu.colIDToReturnIndex[id] = i 145 } 146 147 if len(tu.ri.InsertColIDtoRowIndex) == len(tu.colIDToReturnIndex) { 148 for colID, insertIndex := range tu.ri.InsertColIDtoRowIndex { 149 resultIndex, ok := tu.colIDToReturnIndex[colID] 150 if !ok || resultIndex != insertIndex { 151 tu.insertReorderingRequired = true 152 break 153 } 154 } 155 } else { 156 tu.insertReorderingRequired = true 157 } 158 } 159 160 tu.insertRows.Init( 161 evalCtx.Mon.MakeBoundAccount(), sqlbase.ColTypeInfoFromColDescs(tu.ri.InsertCols), 0, 162 ) 163 164 tu.indexKeyPrefix = sqlbase.MakeIndexKeyPrefix( 165 evalCtx.Codec, tableDesc.TableDesc(), tableDesc.PrimaryIndex.ID, 166 ) 167 168 if tu.collectRows { 169 tu.resultRow = make(tree.Datums, len(tu.returnCols)) 170 tu.rowsUpserted = rowcontainer.NewRowContainer( 171 evalCtx.Mon.MakeBoundAccount(), 172 sqlbase.ColTypeInfoFromColDescs(tu.returnCols), 173 tu.insertRows.Len(), 174 ) 175 } 176 177 return nil 178 } 179 180 // flushAndStartNewBatch is part of the tableWriter interface. 181 func (tu *optTableUpserter) flushAndStartNewBatch(ctx context.Context) error { 182 tu.insertRows.Clear(ctx) 183 if tu.collectRows { 184 tu.rowsUpserted.Clear(ctx) 185 } 186 if tu.existingRows != nil { 187 tu.existingRows.Clear(ctx) 188 } 189 return tu.tableWriterBase.flushAndStartNewBatch(ctx, tu.tableDesc()) 190 } 191 192 // batchedCount is part of the batchedTableWriter interface. 193 func (tu *optTableUpserter) batchedCount() int { return tu.resultCount } 194 195 // batchedValues is part of the batchedTableWriter interface. 196 func (tu *optTableUpserter) batchedValues(rowIdx int) tree.Datums { 197 if !tu.collectRows { 198 panic("return row requested but collect rows was not set") 199 } 200 return tu.rowsUpserted.At(rowIdx) 201 } 202 203 func (tu *optTableUpserter) curBatchSize() int { return tu.insertRows.Len() } 204 205 // close is part of the tableWriter interface. 206 func (tu *optTableUpserter) close(ctx context.Context) { 207 tu.insertRows.Close(ctx) 208 if tu.existingRows != nil { 209 tu.existingRows.Close(ctx) 210 } 211 if tu.rowsUpserted != nil { 212 tu.rowsUpserted.Close(ctx) 213 } 214 } 215 216 // finalize is part of the tableWriter interface. 217 func (tu *optTableUpserter) finalize( 218 ctx context.Context, traceKV bool, 219 ) (*rowcontainer.RowContainer, error) { 220 return nil, tu.tableWriterBase.finalize(ctx, tu.tableDesc()) 221 } 222 223 // makeResultFromRow reshapes a row that was inserted or updated to a row 224 // suitable for storing for a RETURNING clause, shaped by the target table's 225 // descriptor. 226 // There are two main examples of this reshaping: 227 // 1) A row may not contain values for nullable columns, so insert those NULLs. 228 // 2) Don't return values we wrote into non-public mutation columns. 229 func (tu *optTableUpserter) makeResultFromRow( 230 row tree.Datums, colIDToRowIndex map[sqlbase.ColumnID]int, 231 ) tree.Datums { 232 resultRow := make(tree.Datums, len(tu.colIDToReturnIndex)) 233 for colID, returnIndex := range tu.colIDToReturnIndex { 234 rowIndex, ok := colIDToRowIndex[colID] 235 if ok { 236 resultRow[returnIndex] = row[rowIndex] 237 } else { 238 // If the row doesn't have all columns filled out. Fill the columns that 239 // weren't included with NULLs. This will only be true for nullable 240 // columns. 241 resultRow[returnIndex] = tree.DNull 242 } 243 } 244 return resultRow 245 } 246 247 // desc is part of the tableWriter interface. 248 func (*optTableUpserter) desc() string { return "opt upserter" } 249 250 // row is part of the tableWriter interface. 251 // TODO(mgartner): Use ignoreIndexes to avoid writing to partial indexes when 252 // the row does not match the partial index predicate. 253 func (tu *optTableUpserter) row( 254 ctx context.Context, row tree.Datums, ignoreIndexes util.FastIntSet, traceKV bool, 255 ) error { 256 tu.batchSize++ 257 tu.resultCount++ 258 259 // Consult the canary column to determine whether to insert or update. For 260 // more details on how canary columns work, see the block comment on 261 // Builder.buildInsert in opt/optbuilder/insert.go. 262 insertEnd := len(tu.ri.InsertCols) 263 if tu.canaryOrdinal == -1 { 264 // No canary column means that existing row should be overwritten (i.e. 265 // the insert and update columns are the same, so no need to choose). 266 return tu.insertNonConflictingRow(ctx, tu.b, row[:insertEnd], true /* overwrite */, traceKV) 267 } 268 if row[tu.canaryOrdinal] == tree.DNull { 269 // No conflict, so insert a new row. 270 return tu.insertNonConflictingRow(ctx, tu.b, row[:insertEnd], false /* overwrite */, traceKV) 271 } 272 273 // If no columns need to be updated, then possibly collect the unchanged row. 274 fetchEnd := insertEnd + len(tu.fetchCols) 275 if len(tu.updateCols) == 0 { 276 if !tu.collectRows { 277 return nil 278 } 279 _, err := tu.rowsUpserted.AddRow(ctx, row[insertEnd:fetchEnd]) 280 return err 281 } 282 283 // Update the row. 284 updateEnd := fetchEnd + len(tu.updateCols) 285 return tu.updateConflictingRow( 286 ctx, 287 tu.b, 288 row[insertEnd:fetchEnd], 289 row[fetchEnd:updateEnd], 290 tu.tableDesc(), 291 traceKV, 292 ) 293 } 294 295 // atBatchEnd is part of the tableWriter interface. 296 func (tu *optTableUpserter) atBatchEnd(ctx context.Context, traceKV bool) error { 297 // Nothing to do, because the row method does everything. 298 return nil 299 } 300 301 // insertNonConflictingRow inserts the given source row into the table when 302 // there was no conflict. If the RETURNING clause was specified, then the 303 // inserted row is stored in the rowsUpserted collection. 304 func (tu *optTableUpserter) insertNonConflictingRow( 305 ctx context.Context, b *kv.Batch, insertRow tree.Datums, overwrite, traceKV bool, 306 ) error { 307 // Perform the insert proper. 308 // TODO(mgartner): Pass ignoreIndexes to InsertRow and do not write index 309 // entries for indexes in the set. 310 var ignoreIndexes util.FastIntSet 311 if err := tu.ri.InsertRow(ctx, b, insertRow, ignoreIndexes, overwrite, row.CheckFKs, traceKV); err != nil { 312 return err 313 } 314 315 if !tu.collectRows { 316 return nil 317 } 318 319 // Reshape the row if needed. 320 if tu.insertReorderingRequired { 321 tableRow := tu.makeResultFromRow(insertRow, tu.ri.InsertColIDtoRowIndex) 322 323 // TODO(ridwanmsharif): Why didn't they update the value of tu.resultRow 324 // before? Is it safe to be doing it now? 325 // Map the upserted columns into the result row before adding it. 326 for tabIdx := range tableRow { 327 if retIdx := tu.tabColIdxToRetIdx[tabIdx]; retIdx >= 0 { 328 tu.resultRow[retIdx] = tableRow[tabIdx] 329 } 330 } 331 _, err := tu.rowsUpserted.AddRow(ctx, tu.resultRow) 332 return err 333 } 334 335 // Map the upserted columns into the result row before adding it. 336 for tabIdx := range insertRow { 337 if retIdx := tu.tabColIdxToRetIdx[tabIdx]; retIdx >= 0 { 338 tu.resultRow[retIdx] = insertRow[tabIdx] 339 } 340 } 341 _, err := tu.rowsUpserted.AddRow(ctx, tu.resultRow) 342 return err 343 } 344 345 // updateConflictingRow updates an existing row in the table when there was a 346 // conflict. The existing values from the row are provided in fetchRow, and the 347 // updated values are provided in updateValues. The updater is assumed to 348 // already be initialized with the descriptors for the fetch and update values. 349 // If the RETURNING clause was specified, then the updated row is stored in the 350 // rowsUpserted collection. 351 func (tu *optTableUpserter) updateConflictingRow( 352 ctx context.Context, 353 b *kv.Batch, 354 fetchRow tree.Datums, 355 updateValues tree.Datums, 356 tableDesc *sqlbase.ImmutableTableDescriptor, 357 traceKV bool, 358 ) error { 359 // Enforce the column constraints. 360 // Note: the column constraints are already enforced for fetchRow, 361 // because: 362 // - for the insert part, they were checked upstream in upsertNode 363 // via GenerateInsertRow(). 364 // - for the fetched part, we assume that the data in the table is 365 // correct already. 366 if err := enforceLocalColumnConstraints(updateValues, tu.updateCols); err != nil { 367 return err 368 } 369 370 // Queue the update in KV. This also returns an "update row" 371 // containing the updated values for every column in the 372 // table. This is useful for RETURNING, which we collect below. 373 _, err := tu.ru.UpdateRow(ctx, b, fetchRow, updateValues, row.CheckFKs, traceKV) 374 if err != nil { 375 return err 376 } 377 378 // We only need a result row if we're collecting rows. 379 if !tu.collectRows { 380 return nil 381 } 382 383 // We now need a row that has the shape of the result row with 384 // the appropriate return columns. Make sure all the fetch columns 385 // are present. 386 tableRow := tu.makeResultFromRow(fetchRow, tu.ru.FetchColIDtoRowIndex) 387 388 // Make sure all the updated columns are present. 389 for colID, returnIndex := range tu.colIDToReturnIndex { 390 // If an update value for a given column exists, use that; else use the 391 // existing value of that column if it has been fetched. 392 rowIndex, ok := tu.ru.UpdateColIDtoRowIndex[colID] 393 if ok { 394 tableRow[returnIndex] = updateValues[rowIndex] 395 } 396 } 397 398 // Map the upserted columns into the result row before adding it. 399 for tabIdx := range tableRow { 400 if retIdx := tu.tabColIdxToRetIdx[tabIdx]; retIdx >= 0 { 401 tu.resultRow[retIdx] = tableRow[tabIdx] 402 } 403 } 404 405 // The resulting row may have nil values for columns that aren't 406 // being upserted, updated or fetched. 407 _, err = tu.rowsUpserted.AddRow(ctx, tu.resultRow) 408 return err 409 } 410 411 // tableDesc is part of the tableWriter interface. 412 func (tu *optTableUpserter) tableDesc() *sqlbase.ImmutableTableDescriptor { 413 return tu.ri.Helper.TableDesc 414 } 415 416 // walkExprs is part of the tableWriter interface. 417 func (tu *optTableUpserter) walkExprs(walk func(desc string, index int, expr tree.TypedExpr)) { 418 }