github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/optbuilder/insert.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package optbuilder 12 13 import ( 14 "fmt" 15 "sort" 16 17 "github.com/cockroachdb/cockroach/pkg/sql/opt" 18 "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" 19 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 20 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 21 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 22 "github.com/cockroachdb/cockroach/pkg/sql/privilege" 23 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 24 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 25 "github.com/cockroachdb/cockroach/pkg/sql/types" 26 "github.com/cockroachdb/cockroach/pkg/util" 27 "github.com/cockroachdb/errors" 28 ) 29 30 // duplicateUpsertErrText is error text used when a row is modified twice by 31 // an upsert statement. 32 const duplicateUpsertErrText = "UPSERT or INSERT...ON CONFLICT command cannot affect row a second time" 33 34 // excludedTableName is the name of a special Upsert data source. When a row 35 // cannot be inserted due to a conflict, the "excluded" data source contains 36 // that row, so that its columns can be referenced in the conflict clause: 37 // 38 // INSERT INTO ab VALUES (1, 2) ON CONFLICT (a) DO UPDATE b=excluded.b+1 39 // 40 // It is located in the special crdb_internal schema so that it never overlaps 41 // with user data sources. 42 var excludedTableName tree.TableName 43 44 func init() { 45 // Clear explicit schema and catalog so that they're not printed in error 46 // messages. 47 excludedTableName = tree.MakeTableNameWithSchema("", "crdb_internal", "excluded") 48 excludedTableName.ExplicitSchema = false 49 excludedTableName.ExplicitCatalog = false 50 } 51 52 // buildInsert builds a memo group for an InsertOp or UpsertOp expression. To 53 // begin, an input expression is constructed which outputs these columns to 54 // insert into the target table: 55 // 56 // 1. Columns explicitly specified by the user in SELECT or VALUES expression. 57 // 58 // 2. Columns not specified by the user, but having a default value declared 59 // in schema (or being nullable). 60 // 61 // 3. Computed columns. 62 // 63 // 4. Mutation columns which are being added or dropped by an online schema 64 // change. 65 // 66 // buildInsert starts by constructing the input expression, and then wraps it 67 // with Project operators which add default, computed, and mutation columns. The 68 // final input expression will project values for all columns in the target 69 // table. For example, if this is the schema and INSERT statement: 70 // 71 // CREATE TABLE abcd ( 72 // a INT PRIMARY KEY, 73 // b INT, 74 // c INT DEFAULT(10), 75 // d INT AS (b+c) STORED 76 // ) 77 // INSERT INTO abcd (a) VALUES (1) 78 // 79 // Then an input expression equivalent to this would be built: 80 // 81 // SELECT ins_a, ins_b, ins_c, ins_b + ins_c AS ins_d 82 // FROM (VALUES (1, NULL, 10)) AS t(ins_a, ins_b, ins_c) 83 // 84 // If an ON CONFLICT clause is present (or if it was an UPSERT statement), then 85 // additional columns are added to the input expression: 86 // 87 // 1. Columns containing existing values fetched from the target table and 88 // used to detect conflicts and to formulate the key/value update commands. 89 // 90 // 2. Columns containing updated values to set when a conflict is detected, as 91 // specified by the user. 92 // 93 // 3. Computed columns which will be updated when a conflict is detected and 94 // that are dependent on one or more updated columns. 95 // 96 // A LEFT OUTER JOIN associates each row to insert with the corresponding 97 // existing row (#1 above). If the row does not exist, then the existing columns 98 // will be null-extended, per the semantics of LEFT OUTER JOIN. This behavior 99 // allows the execution engine to test whether a given insert row conflicts with 100 // an existing row in the table. One of the existing columns that is declared as 101 // NOT NULL in the table schema is designated as a "canary column". When the 102 // canary column is null after the join step, then it must have been null- 103 // extended by the LEFT OUTER JOIN. Therefore, there is no existing row, and no 104 // conflict. If the canary column is not null, then there is an existing row, 105 // and a conflict. 106 // 107 // The canary column is used in CASE statements to toggle between the insert and 108 // update values for each row. If there is no conflict, the insert value is 109 // used. Otherwise, the update value is used (or the existing value if there is 110 // no update value for that column). 111 // 112 // In addition, upsert cases have another complication that arises from the 113 // requirement that no mutation statement updates the same row more than once. 114 // Primary key violations prevent INSERT statements from inserting the same row 115 // twice. DELETE statements do not encounter a problem because their input never 116 // contains duplicate rows. And UPDATE statements are equivalent to DELETE 117 // followed by an INSERT, so they're safe as well. By contrast, UPSERT and 118 // INSERT..ON CONFLICT statements can have duplicate input rows that trigger 119 // updates of the same row after insertion conflicts. 120 // 121 // Detecting (and raising an error) or ignoring (in case of DO NOTHING) 122 // duplicate rows requires wrapping the input with one or more DISTINCT ON 123 // operators that ensure the input is distinct on at least one unique index. 124 // Because the input is distinct on a unique index of the target table, the 125 // statement will never attempt to update the same row twice. 126 // 127 // Putting it all together, if this is the schema and INSERT..ON CONFLICT 128 // statement: 129 // 130 // CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT) 131 // INSERT INTO abc VALUES (1, 2), (1, 3) ON CONFLICT (a) DO UPDATE SET b=10 132 // 133 // Then an input expression roughly equivalent to this would be built (note that 134 // the DISTINCT ON is really the UpsertDistinctOn operator, which behaves a bit 135 // differently than the DistinctOn operator): 136 // 137 // SELECT 138 // fetch_a, 139 // fetch_b, 140 // fetch_c, 141 // CASE WHEN fetch_a IS NULL ins_a ELSE fetch_a END AS ups_a, 142 // CASE WHEN fetch_a IS NULL ins_b ELSE 10 END AS ups_b, 143 // CASE WHEN fetch_a IS NULL ins_c ELSE fetch_c END AS ups_c, 144 // FROM ( 145 // SELECT DISTINCT ON (ins_a) * 146 // FROM (VALUES (1, 2, NULL), (1, 3, NULL)) AS ins(ins_a, ins_b, ins_c) 147 // ) 148 // LEFT OUTER JOIN abc AS fetch(fetch_a, fetch_b, fetch_c) 149 // ON ins_a = fetch_a 150 // 151 // Here, the fetch_a column has been designated as the canary column, since it 152 // is NOT NULL in the schema. It is used as the CASE condition to decide between 153 // the insert and update values for each row. The CASE expressions will often 154 // prevent the unnecessary evaluation of the update expression in the case where 155 // an insertion needs to occur. In addition, it simplifies logical property 156 // calculation, since a 1:1 mapping to each target table column from a 157 // corresponding input column is maintained. 158 // 159 // If the ON CONFLICT clause contains a DO NOTHING clause, then each UNIQUE 160 // index on the target table requires its own DISTINCT ON to ensure that the 161 // input has no duplicates, and its own LEFT OUTER JOIN to check whether a 162 // conflict exists. For example: 163 // 164 // CREATE TABLE ab (a INT PRIMARY KEY, b INT) 165 // INSERT INTO ab (a, b) VALUES (1, 2), (1, 3) ON CONFLICT DO NOTHING 166 // 167 // Then an input expression roughly equivalent to this would be built: 168 // 169 // SELECT x, y 170 // FROM (SELECT DISTINCT ON (x) * FROM (VALUES (1, 2), (1, 3))) AS input(x, y) 171 // LEFT OUTER JOIN ab 172 // ON input.x = ab.a 173 // WHERE ab.a IS NULL 174 // 175 // Note that an ordered input to the INSERT does not provide any guarantee about 176 // the order in which mutations are applied, or the order of any returned rows 177 // (i.e. it won't become a physical property required of the Insert or Upsert 178 // operator). Not propagating input orderings avoids an extra sort when the 179 // ON CONFLICT clause is present, since it joins a new set of rows to the input 180 // and thereby scrambles the input ordering. 181 func (b *Builder) buildInsert(ins *tree.Insert, inScope *scope) (outScope *scope) { 182 // Find which table we're working on, check the permissions. 183 tab, depName, alias, refColumns := b.resolveTableForMutation(ins.Table, privilege.INSERT) 184 185 // It is possible to insert into specific columns using table reference 186 // syntax: 187 // INSERT INTO [<table_id>(<col1_id>,<col2_id>) AS <alias>] ... 188 // is equivalent to 189 // INSERT INTO [<table_id> AS <alias>] (col1_name, col2_name) ... 190 if refColumns != nil { 191 if len(ins.Columns) != 0 { 192 panic(pgerror.Newf(pgcode.Syntax, 193 "cannot specify both a list of column IDs and a list of column names")) 194 } 195 196 ins.Columns = make(tree.NameList, len(refColumns)) 197 for i, ord := range cat.ConvertColumnIDsToOrdinals(tab, refColumns) { 198 ins.Columns[i] = tab.Column(ord).ColName() 199 } 200 } 201 202 if ins.OnConflict != nil { 203 // UPSERT and INDEX ON CONFLICT will read from the table to check for 204 // duplicates. 205 b.checkPrivilege(depName, tab, privilege.SELECT) 206 207 if !ins.OnConflict.DoNothing { 208 // UPSERT and INDEX ON CONFLICT DO UPDATE may modify rows if the 209 // DO NOTHING clause is not present. 210 b.checkPrivilege(depName, tab, privilege.UPDATE) 211 } 212 } 213 214 var mb mutationBuilder 215 if ins.OnConflict != nil && ins.OnConflict.IsUpsertAlias() { 216 mb.init(b, "upsert", tab, alias) 217 } else { 218 mb.init(b, "insert", tab, alias) 219 } 220 221 // Compute target columns in two cases: 222 // 223 // 1. When explicitly specified by name: 224 // 225 // INSERT INTO <table> (<col1>, <col2>, ...) ... 226 // 227 // 2. When implicitly targeted by VALUES expression: 228 // 229 // INSERT INTO <table> VALUES (...) 230 // 231 // Target columns for other cases can't be derived until the input expression 232 // is built, at which time the number of input columns is known. At the same 233 // time, the input expression cannot be built until DEFAULT expressions are 234 // replaced and named target columns are known. So this step must come first. 235 if len(ins.Columns) != 0 { 236 // Target columns are explicitly specified by name. 237 mb.addTargetNamedColsForInsert(ins.Columns) 238 } else { 239 values := mb.extractValuesInput(ins.Rows) 240 if values != nil { 241 // Target columns are implicitly targeted by VALUES expression in the 242 // same order they appear in the target table schema. 243 mb.addTargetTableColsForInsert(len(values.Rows[0])) 244 } 245 } 246 247 // Build the input rows expression if one was specified: 248 // 249 // INSERT INTO <table> VALUES ... 250 // INSERT INTO <table> SELECT ... FROM ... 251 // 252 // or initialize an empty input if inserting default values (default values 253 // will be added later): 254 // 255 // INSERT INTO <table> DEFAULT VALUES 256 // 257 if !ins.DefaultValues() { 258 // Replace any DEFAULT expressions in the VALUES clause, if a VALUES clause 259 // exists: 260 // 261 // INSERT INTO <table> VALUES (..., DEFAULT, ...) 262 // 263 rows := mb.replaceDefaultExprs(ins.Rows) 264 265 mb.buildInputForInsert(inScope, rows) 266 } else { 267 mb.buildInputForInsert(inScope, nil /* rows */) 268 } 269 270 // Add default columns that were not explicitly specified by name or 271 // implicitly targeted by input columns. Also add any computed columns. In 272 // both cases, include columns undergoing mutations in the write-only state. 273 mb.addSynthesizedColsForInsert() 274 275 var returning tree.ReturningExprs 276 if resultsNeeded(ins.Returning) { 277 returning = *ins.Returning.(*tree.ReturningExprs) 278 } 279 280 switch { 281 // Case 1: Simple INSERT statement. 282 case ins.OnConflict == nil: 283 // Build the final insert statement, including any returned expressions. 284 mb.buildInsert(returning) 285 286 // Case 2: INSERT..ON CONFLICT DO NOTHING. 287 case ins.OnConflict.DoNothing: 288 // Wrap the input in one LEFT OUTER JOIN per UNIQUE index, and filter out 289 // rows that have conflicts. See the buildInputForDoNothing comment for 290 // more details. 291 conflictOrds := mb.mapColumnNamesToOrdinals(ins.OnConflict.Columns) 292 mb.buildInputForDoNothing(inScope, conflictOrds) 293 294 // Since buildInputForDoNothing filters out rows with conflicts, always 295 // insert rows that are not filtered. 296 mb.buildInsert(returning) 297 298 // Case 3: UPSERT statement. 299 case ins.OnConflict.IsUpsertAlias(): 300 // Add columns which will be updated by the Upsert when a conflict occurs. 301 // These are derived from the insert columns. 302 mb.setUpsertCols(ins.Columns) 303 304 // Check whether the existing rows need to be fetched in order to detect 305 // conflicts. 306 if mb.needExistingRows() { 307 // Left-join each input row to the target table, using conflict columns 308 // derived from the primary index as the join condition. 309 primaryOrds := getIndexLaxKeyOrdinals(mb.tab.Index(cat.PrimaryIndex)) 310 mb.buildInputForUpsert(inScope, primaryOrds, nil /* whereClause */) 311 312 // Add additional columns for computed expressions that may depend on any 313 // updated columns, as well as mutation columns with default values. 314 mb.addSynthesizedColsForUpdate() 315 } 316 317 // Build the final upsert statement, including any returned expressions. 318 mb.buildUpsert(returning) 319 320 // Case 4: INSERT..ON CONFLICT..DO UPDATE statement. 321 default: 322 // Left-join each input row to the target table, using the conflict columns 323 // as the join condition. 324 conflictOrds := mb.mapColumnNamesToOrdinals(ins.OnConflict.Columns) 325 mb.buildInputForUpsert(inScope, conflictOrds, ins.OnConflict.Where) 326 327 // Derive the columns that will be updated from the SET expressions. 328 mb.addTargetColsForUpdate(ins.OnConflict.Exprs) 329 330 // Build each of the SET expressions. 331 mb.addUpdateCols(ins.OnConflict.Exprs) 332 333 // Build the final upsert statement, including any returned expressions. 334 mb.buildUpsert(returning) 335 } 336 337 return mb.outScope 338 } 339 340 // needExistingRows returns true if an Upsert statement needs to fetch existing 341 // rows in order to detect conflicts. In some cases, it is not necessary to 342 // fetch existing rows, and then the KV Put operation can be used to blindly 343 // insert a new record or overwrite an existing record. This is possible when: 344 // 345 // 1. There are no secondary indexes. Existing values are needed to delete 346 // secondary index rows when the update causes them to move. 347 // 2. All non-key columns (including mutation columns) have insert and update 348 // values specified for them. 349 // 3. Each update value is the same as the corresponding insert value. 350 // 351 // TODO(radu): once FKs no longer require indexes, this function will have to 352 // take FKs into account explicitly. 353 // 354 // TODO(andyk): The fast path is currently only enabled when the UPSERT alias 355 // is explicitly selected by the user. It's possible to fast path some queries 356 // of the form INSERT ... ON CONFLICT, but the utility is low and there are lots 357 // of edge cases (that caused real correctness bugs #13437 #13962). As a result, 358 // this support was removed and needs to re-enabled. See #14482. 359 func (mb *mutationBuilder) needExistingRows() bool { 360 if mb.tab.DeletableIndexCount() > 1 { 361 return true 362 } 363 364 // Key columns are never updated and are assumed to be the same as the insert 365 // values. 366 // TODO(andyk): This is not true in the case of composite key encodings. See 367 // issue #34518. 368 keyOrds := getIndexLaxKeyOrdinals(mb.tab.Index(cat.PrimaryIndex)) 369 for i, n := 0, mb.tab.DeletableColumnCount(); i < n; i++ { 370 if keyOrds.Contains(i) { 371 // #1: Don't consider key columns. 372 continue 373 } 374 insertColID := mb.insertColID(i) 375 if insertColID == 0 { 376 // #2: Non-key column does not have insert value specified. 377 return true 378 } 379 if insertColID != mb.scopeOrdToColID(mb.updateOrds[i]) { 380 // #3: Update value is not same as corresponding insert value. 381 return true 382 } 383 } 384 return false 385 } 386 387 // addTargetNamedColsForInsert adds a list of user-specified column names to the 388 // list of table columns that are the target of the Insert operation. 389 func (mb *mutationBuilder) addTargetNamedColsForInsert(names tree.NameList) { 390 if len(mb.targetColList) != 0 { 391 panic(errors.AssertionFailedf("addTargetNamedColsForInsert cannot be called more than once")) 392 } 393 394 // Add target table columns by the names specified in the Insert statement. 395 mb.addTargetColsByName(names) 396 397 // Ensure that primary key columns are in the target column list, or that 398 // they have default values. 399 mb.checkPrimaryKeyForInsert() 400 401 // Ensure that foreign keys columns are in the target column list, or that 402 // they have default values. 403 mb.checkForeignKeysForInsert() 404 } 405 406 // checkPrimaryKeyForInsert ensures that the columns of the primary key are 407 // either assigned values by the INSERT statement, or else have default/computed 408 // values. If neither condition is true, checkPrimaryKeyForInsert raises an 409 // error. 410 func (mb *mutationBuilder) checkPrimaryKeyForInsert() { 411 primary := mb.tab.Index(cat.PrimaryIndex) 412 for i, n := 0, primary.KeyColumnCount(); i < n; i++ { 413 col := primary.Column(i) 414 if col.HasDefault() || col.IsComputed() { 415 // The column has a default or computed value. 416 continue 417 } 418 419 colID := mb.tabID.ColumnID(col.Ordinal) 420 if mb.targetColSet.Contains(colID) { 421 // The column is explicitly specified in the target name list. 422 continue 423 } 424 425 panic(pgerror.Newf(pgcode.InvalidForeignKey, 426 "missing %q primary key column", col.ColName())) 427 } 428 } 429 430 // checkForeignKeysForInsert ensures that all composite foreign keys that 431 // specify the matching method as MATCH FULL have all of their columns assigned 432 // values by the INSERT statement, or else have default/computed values. 433 // Alternatively, all columns can be unspecified. If neither condition is true, 434 // checkForeignKeys raises an error. Here is an example: 435 // 436 // CREATE TABLE orders ( 437 // id INT, 438 // cust_id INT, 439 // state STRING, 440 // FOREIGN KEY (cust_id, state) REFERENCES customers (id, state) MATCH FULL 441 // ) 442 // 443 // INSERT INTO orders (cust_id) VALUES (1) 444 // 445 // This INSERT statement would trigger a static error, because only cust_id is 446 // specified in the INSERT statement. Either the state column must be specified 447 // as well, or else neither column can be specified. 448 func (mb *mutationBuilder) checkForeignKeysForInsert() { 449 for i, n := 0, mb.tab.OutboundForeignKeyCount(); i < n; i++ { 450 fk := mb.tab.OutboundForeignKey(i) 451 numCols := fk.ColumnCount() 452 453 // This check should only be performed on composite foreign keys that use 454 // the MATCH FULL method. 455 if numCols < 2 || fk.MatchMethod() != tree.MatchFull { 456 continue 457 } 458 459 var missingCols []string 460 allMissing := true 461 for j := 0; j < numCols; j++ { 462 ord := fk.OriginColumnOrdinal(mb.tab, j) 463 col := mb.tab.Column(ord) 464 if col.HasDefault() || col.IsComputed() { 465 // The column has a default value. 466 allMissing = false 467 continue 468 } 469 470 colID := mb.tabID.ColumnID(ord) 471 if mb.targetColSet.Contains(colID) { 472 // The column is explicitly specified in the target name list. 473 allMissing = false 474 continue 475 } 476 477 missingCols = append(missingCols, string(col.ColName())) 478 } 479 if allMissing { 480 continue 481 } 482 483 switch len(missingCols) { 484 case 0: 485 // Do nothing. 486 case 1: 487 panic(pgerror.Newf(pgcode.ForeignKeyViolation, 488 "missing value for column %q in multi-part foreign key", missingCols[0])) 489 default: 490 sort.Strings(missingCols) 491 panic(pgerror.Newf(pgcode.ForeignKeyViolation, 492 "missing values for columns %q in multi-part foreign key", missingCols)) 493 } 494 } 495 } 496 497 // addTargetTableColsForInsert adds up to maxCols columns to the list of columns 498 // that will be set by an INSERT operation. Non-mutation columns are added from 499 // the target table in the same order they appear in its schema. This method is 500 // used when the target columns are not explicitly specified in the INSERT 501 // statement: 502 // 503 // INSERT INTO t VALUES (1, 2, 3) 504 // 505 // In this example, the first three columns of table t would be added as target 506 // columns. 507 func (mb *mutationBuilder) addTargetTableColsForInsert(maxCols int) { 508 if len(mb.targetColList) != 0 { 509 panic(errors.AssertionFailedf("addTargetTableColsForInsert cannot be called more than once")) 510 } 511 512 // Only consider non-mutation columns, since mutation columns are hidden from 513 // the SQL user. 514 numCols := 0 515 for i, n := 0, mb.tab.ColumnCount(); i < n && numCols < maxCols; i++ { 516 // Skip hidden columns. 517 if mb.tab.Column(i).IsHidden() { 518 continue 519 } 520 521 mb.addTargetCol(i) 522 numCols++ 523 } 524 525 // Ensure that the number of input columns does not exceed the number of 526 // target columns. 527 mb.checkNumCols(len(mb.targetColList), maxCols) 528 } 529 530 // buildInputForInsert constructs the memo group for the input expression and 531 // constructs a new output scope containing that expression's output columns. 532 func (mb *mutationBuilder) buildInputForInsert(inScope *scope, inputRows *tree.Select) { 533 // Handle DEFAULT VALUES case by creating a single empty row as input. 534 if inputRows == nil { 535 mb.outScope = inScope.push() 536 mb.outScope.expr = mb.b.factory.ConstructValues(memo.ScalarListWithEmptyTuple, &memo.ValuesPrivate{ 537 Cols: opt.ColList{}, 538 ID: mb.md.NextUniqueID(), 539 }) 540 return 541 } 542 543 // If there are already required target columns, then those will provide 544 // desired input types. Otherwise, input columns are mapped to the table's 545 // non-hidden columns by corresponding ordinal position. Exclude hidden 546 // columns to prevent this statement from writing hidden columns: 547 // 548 // INSERT INTO <table> VALUES (...) 549 // 550 // However, hidden columns can be written if the target columns were 551 // explicitly specified: 552 // 553 // INSERT INTO <table> (...) VALUES (...) 554 // 555 var desiredTypes []*types.T 556 if len(mb.targetColList) != 0 { 557 desiredTypes = make([]*types.T, len(mb.targetColList)) 558 for i, colID := range mb.targetColList { 559 desiredTypes[i] = mb.md.ColumnMeta(colID).Type 560 } 561 } else { 562 // Do not target mutation columns. 563 desiredTypes = make([]*types.T, 0, mb.tab.ColumnCount()) 564 for i, n := 0, mb.tab.ColumnCount(); i < n; i++ { 565 tabCol := mb.tab.Column(i) 566 if !tabCol.IsHidden() { 567 desiredTypes = append(desiredTypes, tabCol.DatumType()) 568 } 569 } 570 } 571 572 mb.outScope = mb.b.buildStmt(inputRows, desiredTypes, inScope) 573 574 if len(mb.targetColList) != 0 { 575 // Target columns already exist, so ensure that the number of input 576 // columns exactly matches the number of target columns. 577 mb.checkNumCols(len(mb.targetColList), len(mb.outScope.cols)) 578 } else { 579 // No target columns have been added by previous steps, so add columns 580 // that are implicitly targeted by the input expression. 581 mb.addTargetTableColsForInsert(len(mb.outScope.cols)) 582 } 583 584 // Loop over input columns and: 585 // 1. Type check each column 586 // 2. Assign name to each column 587 // 3. Add scope column ordinal to the insertOrds list. 588 for i := range mb.outScope.cols { 589 inCol := &mb.outScope.cols[i] 590 ord := mb.tabID.ColumnOrdinal(mb.targetColList[i]) 591 592 // Type check the input column against the corresponding table column. 593 checkDatumTypeFitsColumnType(mb.tab.Column(ord), inCol.typ) 594 595 // Assign name of input column. 596 inCol.name = tree.Name(mb.md.ColumnMeta(mb.targetColList[i]).Alias) 597 598 // Record the ordinal position of the scope column that contains the 599 // value to be inserted into the corresponding target table column. 600 mb.insertOrds[ord] = scopeOrdinal(i) 601 } 602 } 603 604 // addSynthesizedColsForInsert wraps an Insert input expression with a Project 605 // operator containing any default (or nullable) columns and any computed 606 // columns that are not yet part of the target column list. This includes all 607 // write-only mutation columns, since they must always have default or computed 608 // values. 609 func (mb *mutationBuilder) addSynthesizedColsForInsert() { 610 // Start by adding non-computed columns that have not already been explicitly 611 // specified in the query. Do this before adding computed columns, since those 612 // may depend on non-computed columns. 613 mb.addSynthesizedCols( 614 mb.insertOrds, 615 func(colOrd int) bool { return !mb.tab.Column(colOrd).IsComputed() }, 616 ) 617 618 // Possibly round DECIMAL-related columns containing insertion values (whether 619 // synthesized or not). 620 mb.roundDecimalValues(mb.insertOrds, false /* roundComputedCols */) 621 622 // Now add all computed columns. 623 mb.addSynthesizedCols( 624 mb.insertOrds, 625 func(colOrd int) bool { return mb.tab.Column(colOrd).IsComputed() }, 626 ) 627 628 // Possibly round DECIMAL-related computed columns. 629 mb.roundDecimalValues(mb.insertOrds, true /* roundComputedCols */) 630 } 631 632 // buildInsert constructs an Insert operator, possibly wrapped by a Project 633 // operator that corresponds to the given RETURNING clause. 634 func (mb *mutationBuilder) buildInsert(returning tree.ReturningExprs) { 635 // Disambiguate names so that references in any expressions, such as a 636 // check constraint, refer to the correct columns. 637 mb.disambiguateColumns() 638 639 // Add any check constraint boolean columns to the input. 640 mb.addCheckConstraintCols() 641 642 // Add any partial index boolean columns to the input. 643 mb.addPartialIndexPredicateCols() 644 645 mb.buildFKChecksForInsert() 646 647 private := mb.makeMutationPrivate(returning != nil) 648 mb.outScope.expr = mb.b.factory.ConstructInsert(mb.outScope.expr, mb.checks, private) 649 650 mb.buildReturning(returning) 651 } 652 653 // buildInputForDoNothing wraps the input expression in LEFT OUTER JOIN 654 // expressions, one for each UNIQUE index on the target table. It then adds a 655 // filter that discards rows that have a conflict (by checking a not-null table 656 // column to see if it was null-extended by the left join). See the comment 657 // header for Builder.buildInsert for an example. 658 func (mb *mutationBuilder) buildInputForDoNothing(inScope *scope, conflictOrds util.FastIntSet) { 659 // DO NOTHING clause does not require ON CONFLICT columns. 660 var conflictIndex cat.Index 661 if !conflictOrds.Empty() { 662 // Check that the ON CONFLICT columns reference at most one target row by 663 // ensuring they match columns of a UNIQUE index. Using LEFT OUTER JOIN 664 // to detect conflicts relies upon this being true (otherwise result 665 // cardinality could increase). This is also a Postgres requirement. 666 conflictIndex = mb.ensureUniqueConflictCols(conflictOrds) 667 } 668 669 insertColSet := mb.outScope.expr.Relational().OutputCols 670 671 // Ignore any ordering requested by the input. 672 // TODO(andyk): do we need to do more here? 673 mb.outScope.ordering = nil 674 675 // Loop again over each UNIQUE index, potentially creating a left join + 676 // filter for each one. 677 for idx, idxCount := 0, mb.tab.IndexCount(); idx < idxCount; idx++ { 678 index := mb.tab.Index(idx) 679 if !index.IsUnique() { 680 continue 681 } 682 683 // If conflict columns were explicitly specified, then only check for a 684 // conflict on a single index. Otherwise, check on all indexes. 685 if conflictIndex != nil && conflictIndex != index { 686 continue 687 } 688 689 // Build the right side of the left outer join. Use a new metadata instance 690 // of the mutation table so that a different set of column IDs are used for 691 // the two tables in the self-join. 692 scanScope := mb.b.buildScan( 693 mb.b.addTable(mb.tab, &mb.alias), 694 nil, /* ordinals */ 695 nil, /* indexFlags */ 696 noRowLocking, 697 excludeMutations, 698 inScope, 699 ) 700 701 // Remember the column ID of a scan column that is not null. This will be 702 // used to detect whether a conflict was detected for a row. Such a column 703 // must always exist, since the index always contains the primary key 704 // columns, either explicitly or implicitly. 705 notNullColID := scanScope.cols[findNotNullIndexCol(index)].id 706 707 // Build the join condition by creating a conjunction of equality conditions 708 // that test each conflict column: 709 // 710 // ON ins.x = scan.a AND ins.y = scan.b 711 // 712 var on memo.FiltersExpr 713 for i, n := 0, index.LaxKeyColumnCount(); i < n; i++ { 714 indexCol := index.Column(i) 715 scanColID := scanScope.cols[indexCol.Ordinal].id 716 717 condition := mb.b.factory.ConstructEq( 718 mb.b.factory.ConstructVariable(mb.insertColID(indexCol.Ordinal)), 719 mb.b.factory.ConstructVariable(scanColID), 720 ) 721 on = append(on, mb.b.factory.ConstructFiltersItem(condition)) 722 } 723 724 // Construct the left join + filter. 725 // TODO(andyk): Convert this to use anti-join once we have support for 726 // lookup anti-joins. 727 mb.outScope.expr = mb.b.factory.ConstructProject( 728 mb.b.factory.ConstructSelect( 729 mb.b.factory.ConstructLeftJoin( 730 mb.outScope.expr, 731 scanScope.expr, 732 on, 733 memo.EmptyJoinPrivate, 734 ), 735 memo.FiltersExpr{mb.b.factory.ConstructFiltersItem( 736 mb.b.factory.ConstructIs( 737 mb.b.factory.ConstructVariable(notNullColID), 738 memo.NullSingleton, 739 ), 740 )}, 741 ), 742 memo.EmptyProjectionsExpr, 743 insertColSet, 744 ) 745 746 // Add an UpsertDistinctOn operator to ensure there are no duplicate input 747 // rows for this unique index. Duplicate rows can trigger conflict errors 748 // at runtime, which DO NOTHING is not supposed to do. See issue #37880. 749 var conflictCols opt.ColSet 750 for i, n := 0, index.LaxKeyColumnCount(); i < n; i++ { 751 indexCol := index.Column(i) 752 conflictCols.Add(mb.outScope.cols[mb.insertOrds[indexCol.Ordinal]].id) 753 } 754 755 // Treat NULL values as distinct from one another. And if duplicates are 756 // detected, remove them rather than raising an error. 757 mb.outScope = mb.b.buildDistinctOn( 758 conflictCols, mb.outScope, true /* nullsAreDistinct */, "" /* errorOnDup */) 759 } 760 761 mb.targetColList = make(opt.ColList, 0, mb.tab.DeletableColumnCount()) 762 mb.targetColSet = opt.ColSet{} 763 } 764 765 // buildInputForUpsert assumes that the output scope already contains the insert 766 // columns. It left-joins each insert row to the target table, using the given 767 // conflict columns as the join condition. It also selects one of the table 768 // columns to be a "canary column" that can be tested to determine whether a 769 // given insert row conflicts with an existing row in the table. If it is null, 770 // then there is no conflict. 771 func (mb *mutationBuilder) buildInputForUpsert( 772 inScope *scope, conflictOrds util.FastIntSet, whereClause *tree.Where, 773 ) { 774 // Check that the ON CONFLICT columns reference at most one target row. 775 // Using LEFT OUTER JOIN to detect conflicts relies upon this being true 776 // (otherwise result cardinality could increase). This is also a Postgres 777 // requirement. 778 mb.ensureUniqueConflictCols(conflictOrds) 779 780 // Ensure that input is distinct on the conflict columns. Otherwise, the 781 // Upsert could affect the same row more than once, which can lead to index 782 // corruption. See issue #44466 for more context. 783 // 784 // Ignore any ordering requested by the input. Since the 785 // EnsureUpsertDistinctOn operator does not allow multiple rows in distinct 786 // groupings, the internal ordering is meaningless (and can trigger a 787 // misleading error in buildDistinctOn if present). 788 var conflictCols opt.ColSet 789 for ord, ok := conflictOrds.Next(0); ok; ord, ok = conflictOrds.Next(ord + 1) { 790 conflictCols.Add(mb.outScope.cols[mb.insertOrds[ord]].id) 791 } 792 mb.outScope.ordering = nil 793 mb.outScope = mb.b.buildDistinctOn( 794 conflictCols, mb.outScope, true /* nullsAreDistinct */, duplicateUpsertErrText) 795 796 // Re-alias all INSERT columns so that they are accessible as if they were 797 // part of a special data source named "crdb_internal.excluded". 798 for i := range mb.outScope.cols { 799 mb.outScope.cols[i].table = excludedTableName 800 } 801 802 // Build the right side of the left outer join. Use a different instance of 803 // table metadata so that col IDs do not overlap. 804 // 805 // NOTE: Include mutation columns, but be careful to never use them for any 806 // reason other than as "fetch columns". See buildScan comment. 807 // TODO(andyk): Why does execution engine need mutation columns for Insert? 808 fetchScope := mb.b.buildScan( 809 mb.b.addTable(mb.tab, &mb.alias), 810 nil, /* ordinals */ 811 nil, /* indexFlags */ 812 noRowLocking, 813 includeMutations, 814 inScope, 815 ) 816 817 // Record a not-null "canary" column. After the left-join, this will be null 818 // if no conflict has been detected, or not null otherwise. At least one not- 819 // null column must exist, since primary key columns are not-null. 820 canaryScopeCol := &fetchScope.cols[findNotNullIndexCol(mb.tab.Index(cat.PrimaryIndex))] 821 mb.canaryColID = canaryScopeCol.id 822 823 // Set fetchOrds to point to the scope columns created for the fetch values. 824 for i := range fetchScope.cols { 825 // Fetch columns come after insert columns. 826 mb.fetchOrds[i] = scopeOrdinal(len(mb.outScope.cols) + i) 827 } 828 829 // Add the fetch columns to the current scope. It's OK to modify the current 830 // scope because it contains only INSERT columns that were added by the 831 // mutationBuilder, and which aren't needed for any other purpose. 832 mb.outScope.appendColumnsFromScope(fetchScope) 833 834 // Build the join condition by creating a conjunction of equality conditions 835 // that test each conflict column: 836 // 837 // ON ins.x = scan.a AND ins.y = scan.b 838 // 839 var on memo.FiltersExpr 840 for i := range fetchScope.cols { 841 // Include fetch columns with ordinal positions in conflictOrds. 842 if conflictOrds.Contains(i) { 843 condition := mb.b.factory.ConstructEq( 844 mb.b.factory.ConstructVariable(mb.insertColID(i)), 845 mb.b.factory.ConstructVariable(fetchScope.cols[i].id), 846 ) 847 on = append(on, mb.b.factory.ConstructFiltersItem(condition)) 848 } 849 } 850 851 // Construct the left join. 852 mb.outScope.expr = mb.b.factory.ConstructLeftJoin( 853 mb.outScope.expr, 854 fetchScope.expr, 855 on, 856 memo.EmptyJoinPrivate, 857 ) 858 859 // Add a filter from the WHERE clause if one exists. 860 if whereClause != nil { 861 where := &tree.Where{ 862 Type: whereClause.Type, 863 Expr: &tree.OrExpr{ 864 Left: &tree.ComparisonExpr{ 865 Operator: tree.IsNotDistinctFrom, 866 Left: canaryScopeCol, 867 Right: tree.DNull, 868 }, 869 Right: whereClause.Expr, 870 }, 871 } 872 mb.b.buildWhere(where, mb.outScope) 873 } 874 875 mb.targetColList = make(opt.ColList, 0, mb.tab.DeletableColumnCount()) 876 mb.targetColSet = opt.ColSet{} 877 } 878 879 // setUpsertCols sets the list of columns to be updated in case of conflict. 880 // There are two cases to handle: 881 // 882 // 1. Target columns are explicitly specified: 883 // UPSERT INTO abc (col1, col2, ...) <input-expr> 884 // 885 // 2. Target columns are implicitly derived: 886 // UPSERT INTO abc <input-expr> 887 // 888 // In case #1, only the columns that were specified by the user will be updated. 889 // In case #2, all non-mutation columns in the table will be updated. 890 // 891 // Note that primary key columns (i.e. the conflict detection columns) are never 892 // updated. This can have an impact in unusual cases where equal SQL values have 893 // different representations. For example: 894 // 895 // CREATE TABLE abc (a DECIMAL PRIMARY KEY, b DECIMAL) 896 // INSERT INTO abc VALUES (1, 2.0) 897 // UPSERT INTO abc VALUES (1.0, 2) 898 // 899 // The UPSERT statement will update the value of column "b" from 2 => 2.0, but 900 // will not modify column "a". 901 func (mb *mutationBuilder) setUpsertCols(insertCols tree.NameList) { 902 if len(insertCols) != 0 { 903 for _, name := range insertCols { 904 // Table column must exist, since existence of insertCols has already 905 // been checked previously. 906 ord := cat.FindTableColumnByName(mb.tab, name) 907 mb.updateOrds[ord] = mb.insertOrds[ord] 908 } 909 } else { 910 copy(mb.updateOrds, mb.insertOrds) 911 } 912 913 // Never update mutation columns. 914 for i, n := mb.tab.ColumnCount(), mb.tab.DeletableColumnCount(); i < n; i++ { 915 mb.updateOrds[i] = -1 916 } 917 918 // Never update primary key columns. 919 conflictIndex := mb.tab.Index(cat.PrimaryIndex) 920 for i, n := 0, conflictIndex.KeyColumnCount(); i < n; i++ { 921 mb.updateOrds[conflictIndex.Column(i).Ordinal] = -1 922 } 923 } 924 925 // buildUpsert constructs an Upsert operator, possibly wrapped by a Project 926 // operator that corresponds to the given RETURNING clause. 927 func (mb *mutationBuilder) buildUpsert(returning tree.ReturningExprs) { 928 // Merge input insert and update columns using CASE expressions. 929 mb.projectUpsertColumns() 930 931 // Disambiguate names so that references in any expressions, such as a 932 // check constraint, refer to the correct columns. 933 mb.disambiguateColumns() 934 935 // Add any check constraint boolean columns to the input. 936 mb.addCheckConstraintCols() 937 938 mb.buildFKChecksForUpsert() 939 940 private := mb.makeMutationPrivate(returning != nil) 941 mb.outScope.expr = mb.b.factory.ConstructUpsert(mb.outScope.expr, mb.checks, private) 942 943 mb.buildReturning(returning) 944 } 945 946 // projectUpsertColumns projects a set of merged columns that will be either 947 // inserted into the target table, or else used to update an existing row, 948 // depending on whether the canary column is null. For example: 949 // 950 // UPSERT INTO ab VALUES (ins_a, ins_b) ON CONFLICT (a) DO UPDATE SET b=upd_b 951 // 952 // will cause the columns to be projected: 953 // 954 // SELECT 955 // fetch_a, 956 // fetch_b, 957 // CASE WHEN fetch_a IS NULL ins_a ELSE fetch_a END AS ups_a, 958 // CASE WHEN fetch_b IS NULL ins_b ELSE upd_b END AS ups_b, 959 // FROM (SELECT ins_a, ins_b, upd_b, fetch_a, fetch_b FROM ...) 960 // 961 // For each column, a CASE expression is created that toggles between the insert 962 // and update values depending on whether the canary column is null. These 963 // columns can then feed into any constraint checking expressions, which operate 964 // on the final result values. 965 func (mb *mutationBuilder) projectUpsertColumns() { 966 projectionsScope := mb.outScope.replace() 967 projectionsScope.appendColumnsFromScope(mb.outScope) 968 969 // Add a new column for each target table column that needs to be upserted. 970 // This can include mutation columns. 971 for i, n := 0, mb.tab.DeletableColumnCount(); i < n; i++ { 972 insertScopeOrd := mb.insertOrds[i] 973 updateScopeOrd := mb.updateOrds[i] 974 if updateScopeOrd == -1 { 975 updateScopeOrd = mb.fetchOrds[i] 976 } 977 978 // Skip columns that will only be inserted or only updated. 979 if insertScopeOrd == -1 || updateScopeOrd == -1 { 980 continue 981 } 982 983 // Skip columns where the insert value and update value are the same. 984 if mb.scopeOrdToColID(insertScopeOrd) == mb.scopeOrdToColID(updateScopeOrd) { 985 continue 986 } 987 988 // Generate CASE that toggles between insert and update column. 989 caseExpr := mb.b.factory.ConstructCase( 990 memo.TrueSingleton, 991 memo.ScalarListExpr{ 992 mb.b.factory.ConstructWhen( 993 mb.b.factory.ConstructIs( 994 mb.b.factory.ConstructVariable(mb.canaryColID), 995 memo.NullSingleton, 996 ), 997 mb.b.factory.ConstructVariable(mb.outScope.cols[insertScopeOrd].id), 998 ), 999 }, 1000 mb.b.factory.ConstructVariable(mb.outScope.cols[updateScopeOrd].id), 1001 ) 1002 1003 alias := fmt.Sprintf("upsert_%s", mb.tab.Column(i).ColName()) 1004 typ := mb.outScope.cols[insertScopeOrd].typ 1005 scopeCol := mb.b.synthesizeColumn(projectionsScope, alias, typ, nil /* expr */, caseExpr) 1006 scopeColOrd := scopeOrdinal(len(projectionsScope.cols) - 1) 1007 1008 // Assign name to synthesized column. 1009 scopeCol.name = mb.tab.Column(i).ColName() 1010 1011 // Update the scope ordinals for the update columns that are involved in 1012 // the Upsert. The new columns will be used by the Upsert operator in place 1013 // of the original columns. Also set the scope ordinals for the upsert 1014 // columns, as those columns can be used by RETURNING columns. 1015 if mb.updateOrds[i] != -1 { 1016 mb.updateOrds[i] = scopeColOrd 1017 } 1018 mb.upsertOrds[i] = scopeColOrd 1019 } 1020 1021 mb.b.constructProjectForScope(mb.outScope, projectionsScope) 1022 mb.outScope = projectionsScope 1023 } 1024 1025 // ensureUniqueConflictCols tries to prove that the given set of column ordinals 1026 // correspond to the columns of at least one UNIQUE index on the target table. 1027 // If true, then ensureUniqueConflictCols returns the matching index. Otherwise, 1028 // it reports an error. 1029 func (mb *mutationBuilder) ensureUniqueConflictCols(conflictOrds util.FastIntSet) cat.Index { 1030 for idx, idxCount := 0, mb.tab.IndexCount(); idx < idxCount; idx++ { 1031 index := mb.tab.Index(idx) 1032 1033 // Skip non-unique indexes. Use lax key columns, which always contain 1034 // the minimum columns that ensure uniqueness. Null values are considered 1035 // to be *not* equal, but that's OK because the join condition rejects 1036 // nulls anyway. 1037 if !index.IsUnique() || index.LaxKeyColumnCount() != conflictOrds.Len() { 1038 continue 1039 } 1040 1041 // Determine whether the conflict columns match the columns in the lax key. 1042 indexOrds := getIndexLaxKeyOrdinals(index) 1043 if indexOrds.Equals(conflictOrds) { 1044 return index 1045 } 1046 } 1047 panic(pgerror.Newf(pgcode.InvalidColumnReference, 1048 "there is no unique or exclusion constraint matching the ON CONFLICT specification")) 1049 } 1050 1051 // mapColumnNamesToOrdinals returns the set of ordinal positions within the 1052 // target table that correspond to the given names. 1053 func (mb *mutationBuilder) mapColumnNamesToOrdinals(names tree.NameList) util.FastIntSet { 1054 var ords util.FastIntSet 1055 for _, name := range names { 1056 found := false 1057 for i, n := 0, mb.tab.ColumnCount(); i < n; i++ { 1058 tabCol := mb.tab.Column(i) 1059 if tabCol.ColName() == name { 1060 ords.Add(i) 1061 found = true 1062 break 1063 } 1064 } 1065 1066 if !found { 1067 panic(sqlbase.NewUndefinedColumnError(string(name))) 1068 } 1069 } 1070 return ords 1071 }