github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/backfill.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "fmt" 16 "sort" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/clusterversion" 20 "github.com/cockroachdb/cockroach/pkg/geo/geoindex" 21 "github.com/cockroachdb/cockroach/pkg/jobs" 22 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 23 "github.com/cockroachdb/cockroach/pkg/kv" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/settings" 27 "github.com/cockroachdb/cockroach/pkg/sql/backfill" 28 "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" 29 "github.com/cockroachdb/cockroach/pkg/sql/catalog/lease" 30 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 31 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 32 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 33 "github.com/cockroachdb/cockroach/pkg/sql/row" 34 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 35 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 36 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 37 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 38 "github.com/cockroachdb/cockroach/pkg/util/hlc" 39 "github.com/cockroachdb/cockroach/pkg/util/log" 40 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 41 "github.com/cockroachdb/errors" 42 ) 43 44 const ( 45 // TODO(vivek): Replace these constants with a runtime budget for the 46 // operation chunk involved. 47 48 // columnTruncateAndBackfillChunkSize is the maximum number of columns 49 // processed per chunk during column truncate or backfill. 50 columnTruncateAndBackfillChunkSize = 200 51 52 // indexTruncateChunkSize is the maximum number of index entries truncated 53 // per chunk during an index truncation. This value is larger than the 54 // other chunk constants because the operation involves only running a 55 // DeleteRange(). 56 indexTruncateChunkSize = 600 57 58 // indexTxnBackfillChunkSize is the maximum number index entries backfilled 59 // per chunk during an index backfill done in a txn. The index backfill 60 // involves a table scan, and a number of individual ops presented in a batch. 61 // This value is smaller than ColumnTruncateAndBackfillChunkSize, because it 62 // involves a number of individual index row updates that can be scattered 63 // over many ranges. 64 indexTxnBackfillChunkSize = 100 65 66 // checkpointInterval is the interval after which a checkpoint of the 67 // schema change is posted. 68 checkpointInterval = 2 * time.Minute 69 ) 70 71 var indexBulkBackfillChunkSize = settings.RegisterIntSetting( 72 "schemachanger.bulk_index_backfill.batch_size", 73 "number of rows to process at a time during bulk index backfill", 74 50000, 75 ) 76 77 var _ sort.Interface = columnsByID{} 78 var _ sort.Interface = indexesByID{} 79 80 type columnsByID []sqlbase.ColumnDescriptor 81 82 func (cds columnsByID) Len() int { 83 return len(cds) 84 } 85 func (cds columnsByID) Less(i, j int) bool { 86 return cds[i].ID < cds[j].ID 87 } 88 func (cds columnsByID) Swap(i, j int) { 89 cds[i], cds[j] = cds[j], cds[i] 90 } 91 92 type indexesByID []sqlbase.IndexDescriptor 93 94 func (ids indexesByID) Len() int { 95 return len(ids) 96 } 97 func (ids indexesByID) Less(i, j int) bool { 98 return ids[i].ID < ids[j].ID 99 } 100 func (ids indexesByID) Swap(i, j int) { 101 ids[i], ids[j] = ids[j], ids[i] 102 } 103 104 func (sc *SchemaChanger) getChunkSize(chunkSize int64) int64 { 105 if sc.testingKnobs.BackfillChunkSize > 0 { 106 return sc.testingKnobs.BackfillChunkSize 107 } 108 return chunkSize 109 } 110 111 // scTxnFn is the type of functions that operates using transactions in the backfiller. 112 type scTxnFn func(ctx context.Context, txn *kv.Txn, evalCtx *extendedEvalContext) error 113 114 // historicalTxnRunner is the type of the callback used by the various 115 // helper functions to run checks at a fixed timestamp (logically, at 116 // the start of the backfill). 117 type historicalTxnRunner func(ctx context.Context, fn scTxnFn) error 118 119 // makeFixedTimestampRunner creates a historicalTxnRunner suitable for use by the helpers. 120 func (sc *SchemaChanger) makeFixedTimestampRunner(readAsOf hlc.Timestamp) historicalTxnRunner { 121 runner := func(ctx context.Context, retryable scTxnFn) error { 122 return sc.fixedTimestampTxn(ctx, readAsOf, func(ctx context.Context, txn *kv.Txn) error { 123 // We need to re-create the evalCtx since the txn may retry. 124 evalCtx := createSchemaChangeEvalCtx(ctx, sc.execCfg, readAsOf, sc.ieFactory) 125 return retryable(ctx, txn, &evalCtx) 126 }) 127 } 128 return runner 129 } 130 131 func (sc *SchemaChanger) fixedTimestampTxn( 132 ctx context.Context, 133 readAsOf hlc.Timestamp, 134 retryable func(ctx context.Context, txn *kv.Txn) error, 135 ) error { 136 return sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 137 txn.SetFixedTimestamp(ctx, readAsOf) 138 return retryable(ctx, txn) 139 }) 140 } 141 142 // runBackfill runs the backfill for the schema changer. 143 // 144 // This operates over multiple goroutines concurrently and is thus not 145 // able to reuse the original kv.Txn safely. The various 146 // function that it calls make their own txns. 147 func (sc *SchemaChanger) runBackfill(ctx context.Context) error { 148 if sc.testingKnobs.RunBeforeBackfill != nil { 149 if err := sc.testingKnobs.RunBeforeBackfill(); err != nil { 150 return err 151 } 152 } 153 154 // Mutations are applied in a FIFO order. Only apply the first set of 155 // mutations. Collect the elements that are part of the mutation. 156 var droppedIndexDescs []sqlbase.IndexDescriptor 157 var addedIndexSpans []roachpb.Span 158 159 var constraintsToDrop []sqlbase.ConstraintToUpdate 160 var constraintsToAddBeforeValidation []sqlbase.ConstraintToUpdate 161 var constraintsToValidate []sqlbase.ConstraintToUpdate 162 163 tableDesc, err := sc.updateJobRunningStatus(ctx, RunningStatusBackfill) 164 if err != nil { 165 return err 166 } 167 168 // Short circuit the backfill if the table has been deleted. 169 if tableDesc.Dropped() { 170 return nil 171 } 172 version := tableDesc.Version 173 174 log.Infof(ctx, "Running backfill for %q, v=%d, m=%d", 175 tableDesc.Name, tableDesc.Version, sc.mutationID) 176 177 needColumnBackfill := false 178 for _, m := range tableDesc.Mutations { 179 if m.MutationID != sc.mutationID { 180 break 181 } 182 switch m.Direction { 183 case sqlbase.DescriptorMutation_ADD: 184 switch t := m.Descriptor_.(type) { 185 case *sqlbase.DescriptorMutation_Column: 186 if sqlbase.ColumnNeedsBackfill(m.GetColumn()) { 187 needColumnBackfill = true 188 } 189 case *sqlbase.DescriptorMutation_Index: 190 addedIndexSpans = append(addedIndexSpans, tableDesc.IndexSpan(sc.execCfg.Codec, t.Index.ID)) 191 case *sqlbase.DescriptorMutation_Constraint: 192 switch t.Constraint.ConstraintType { 193 case sqlbase.ConstraintToUpdate_CHECK: 194 if t.Constraint.Check.Validity == sqlbase.ConstraintValidity_Validating { 195 constraintsToAddBeforeValidation = append(constraintsToAddBeforeValidation, *t.Constraint) 196 constraintsToValidate = append(constraintsToValidate, *t.Constraint) 197 } 198 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 199 if t.Constraint.ForeignKey.Validity == sqlbase.ConstraintValidity_Validating { 200 constraintsToAddBeforeValidation = append(constraintsToAddBeforeValidation, *t.Constraint) 201 constraintsToValidate = append(constraintsToValidate, *t.Constraint) 202 } 203 case sqlbase.ConstraintToUpdate_NOT_NULL: 204 // NOT NULL constraints are always validated before they can be added 205 constraintsToAddBeforeValidation = append(constraintsToAddBeforeValidation, *t.Constraint) 206 constraintsToValidate = append(constraintsToValidate, *t.Constraint) 207 } 208 case *sqlbase.DescriptorMutation_PrimaryKeySwap, *sqlbase.DescriptorMutation_ComputedColumnSwap: 209 // The backfiller doesn't need to do anything here. 210 default: 211 return errors.AssertionFailedf( 212 "unsupported mutation: %+v", m) 213 } 214 215 case sqlbase.DescriptorMutation_DROP: 216 switch t := m.Descriptor_.(type) { 217 case *sqlbase.DescriptorMutation_Column: 218 needColumnBackfill = true 219 case *sqlbase.DescriptorMutation_Index: 220 if !canClearRangeForDrop(t.Index) { 221 droppedIndexDescs = append(droppedIndexDescs, *t.Index) 222 } 223 case *sqlbase.DescriptorMutation_Constraint: 224 constraintsToDrop = append(constraintsToDrop, *t.Constraint) 225 case *sqlbase.DescriptorMutation_PrimaryKeySwap, *sqlbase.DescriptorMutation_ComputedColumnSwap: 226 // The backfiller doesn't need to do anything here. 227 default: 228 return errors.AssertionFailedf( 229 "unsupported mutation: %+v", m) 230 } 231 } 232 } 233 234 // First drop constraints and indexes, then add/drop columns, and only then 235 // add indexes and constraints. 236 237 // Drop constraints. 238 if len(constraintsToDrop) > 0 { 239 descs, err := sc.dropConstraints(ctx, constraintsToDrop) 240 if err != nil { 241 return err 242 } 243 version = descs[tableDesc.ID].Version 244 } 245 246 // Drop indexes not to be removed by `ClearRange`. 247 if len(droppedIndexDescs) > 0 { 248 if err := sc.truncateIndexes(ctx, version, droppedIndexDescs); err != nil { 249 return err 250 } 251 } 252 253 // Add and drop columns. 254 if needColumnBackfill { 255 if err := sc.truncateAndBackfillColumns(ctx, version); err != nil { 256 return err 257 } 258 } 259 260 // Add new indexes. 261 if len(addedIndexSpans) > 0 { 262 // Check if bulk-adding is enabled and supported by indexes (ie non-unique). 263 if err := sc.backfillIndexes(ctx, version, addedIndexSpans); err != nil { 264 return err 265 } 266 } 267 268 // Add check and foreign key constraints, publish the new version of the table descriptor, 269 // and wait until the entire cluster is on the new version. This is basically 270 // a state transition for the schema change, which must happen after the 271 // columns are backfilled and before constraint validation begins. This 272 // ensures that 1) all columns are writable and backfilled when the constraint 273 // starts being enforced on insert/update (which is relevant in the case where 274 // a constraint references both public and non-public columns), and 2) the 275 // validation occurs only when the entire cluster is already enforcing the 276 // constraint on insert/update. 277 if len(constraintsToAddBeforeValidation) > 0 { 278 if err := sc.addConstraints(ctx, constraintsToAddBeforeValidation); err != nil { 279 return err 280 } 281 } 282 283 // Validate check and foreign key constraints. 284 if len(constraintsToValidate) > 0 { 285 if err := sc.validateConstraints(ctx, constraintsToValidate); err != nil { 286 return err 287 } 288 } 289 290 log.Infof(ctx, "Completed backfill for %q, v=%d, m=%d", 291 tableDesc.Name, tableDesc.Version, sc.mutationID) 292 293 if sc.testingKnobs.RunAfterBackfill != nil { 294 if err := sc.testingKnobs.RunAfterBackfill(*sc.job.ID()); err != nil { 295 return err 296 } 297 } 298 299 return nil 300 } 301 302 // dropConstraints publishes a new version of the given table descriptor with 303 // the given constraint removed from it, and waits until the entire cluster is 304 // on the new version of the table descriptor. It returns the new table descs. 305 func (sc *SchemaChanger) dropConstraints( 306 ctx context.Context, constraints []sqlbase.ConstraintToUpdate, 307 ) (map[sqlbase.ID]*ImmutableTableDescriptor, error) { 308 fksByBackrefTable := make(map[sqlbase.ID][]*sqlbase.ConstraintToUpdate) 309 for i := range constraints { 310 c := &constraints[i] 311 if c.ConstraintType == sqlbase.ConstraintToUpdate_FOREIGN_KEY && 312 c.ForeignKey.ReferencedTableID != sc.tableID { 313 fksByBackrefTable[c.ForeignKey.ReferencedTableID] = append(fksByBackrefTable[c.ForeignKey.ReferencedTableID], c) 314 } 315 } 316 tableIDsToUpdate := make([]sqlbase.ID, 0, len(fksByBackrefTable)+1) 317 tableIDsToUpdate = append(tableIDsToUpdate, sc.tableID) 318 for id := range fksByBackrefTable { 319 tableIDsToUpdate = append(tableIDsToUpdate, id) 320 } 321 322 // Create update closure for the table and all other tables with backreferences 323 update := func(_ *kv.Txn, descs map[sqlbase.ID]*sqlbase.MutableTableDescriptor) error { 324 scTable, ok := descs[sc.tableID] 325 if !ok { 326 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 327 } 328 for i := range constraints { 329 constraint := &constraints[i] 330 switch constraint.ConstraintType { 331 case sqlbase.ConstraintToUpdate_CHECK, sqlbase.ConstraintToUpdate_NOT_NULL: 332 found := false 333 for j, c := range scTable.Checks { 334 if c.Name == constraint.Name { 335 scTable.Checks = append(scTable.Checks[:j], scTable.Checks[j+1:]...) 336 found = true 337 break 338 } 339 } 340 if !found { 341 log.VEventf( 342 ctx, 2, 343 "backfiller tried to drop constraint %+v but it was not found, "+ 344 "presumably due to a retry or rollback", 345 constraint, 346 ) 347 } 348 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 349 var foundExisting bool 350 for j := range scTable.OutboundFKs { 351 def := &scTable.OutboundFKs[j] 352 if def.Name == constraint.Name { 353 backrefTable, ok := descs[constraint.ForeignKey.ReferencedTableID] 354 if !ok { 355 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 356 } 357 if err := removeFKBackReferenceFromTable(backrefTable, def.Name, scTable.TableDesc()); err != nil { 358 return err 359 } 360 scTable.OutboundFKs = append(scTable.OutboundFKs[:j], scTable.OutboundFKs[j+1:]...) 361 foundExisting = true 362 break 363 } 364 } 365 if !foundExisting { 366 log.VEventf( 367 ctx, 2, 368 "backfiller tried to drop constraint %+v but it was not found, "+ 369 "presumably due to a retry or rollback", 370 constraint, 371 ) 372 } 373 } 374 } 375 return nil 376 } 377 378 descs, err := sc.leaseMgr.PublishMultiple(ctx, tableIDsToUpdate, update, nil) 379 if err != nil { 380 return nil, err 381 } 382 if err := sc.waitToUpdateLeases(ctx, sc.tableID); err != nil { 383 return nil, err 384 } 385 for id := range fksByBackrefTable { 386 if err := sc.waitToUpdateLeases(ctx, id); err != nil { 387 return nil, err 388 } 389 } 390 return descs, nil 391 } 392 393 // addConstraints publishes a new version of the given table descriptor with the 394 // given constraint added to it, and waits until the entire cluster is on 395 // the new version of the table descriptor. 396 func (sc *SchemaChanger) addConstraints( 397 ctx context.Context, constraints []sqlbase.ConstraintToUpdate, 398 ) error { 399 fksByBackrefTable := make(map[sqlbase.ID][]*sqlbase.ConstraintToUpdate) 400 for i := range constraints { 401 c := &constraints[i] 402 if c.ConstraintType == sqlbase.ConstraintToUpdate_FOREIGN_KEY && 403 c.ForeignKey.ReferencedTableID != sc.tableID { 404 fksByBackrefTable[c.ForeignKey.ReferencedTableID] = append(fksByBackrefTable[c.ForeignKey.ReferencedTableID], c) 405 } 406 } 407 tableIDsToUpdate := make([]sqlbase.ID, 0, len(fksByBackrefTable)+1) 408 tableIDsToUpdate = append(tableIDsToUpdate, sc.tableID) 409 for id := range fksByBackrefTable { 410 tableIDsToUpdate = append(tableIDsToUpdate, id) 411 } 412 413 // Create update closure for the table and all other tables with backreferences 414 update := func(_ *kv.Txn, descs map[sqlbase.ID]*sqlbase.MutableTableDescriptor) error { 415 scTable, ok := descs[sc.tableID] 416 if !ok { 417 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 418 } 419 for i := range constraints { 420 constraint := &constraints[i] 421 switch constraint.ConstraintType { 422 case sqlbase.ConstraintToUpdate_CHECK, sqlbase.ConstraintToUpdate_NOT_NULL: 423 found := false 424 for _, c := range scTable.Checks { 425 if c.Name == constraint.Name { 426 log.VEventf( 427 ctx, 2, 428 "backfiller tried to add constraint %+v but found existing constraint %+v, "+ 429 "presumably due to a retry or rollback", 430 constraint, c, 431 ) 432 // Ensure the constraint on the descriptor is set to Validating, in 433 // case we're in the middle of rolling back DROP CONSTRAINT 434 c.Validity = sqlbase.ConstraintValidity_Validating 435 found = true 436 break 437 } 438 } 439 if !found { 440 scTable.Checks = append(scTable.Checks, &constraints[i].Check) 441 } 442 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 443 var foundExisting bool 444 for j := range scTable.OutboundFKs { 445 def := &scTable.OutboundFKs[j] 446 if def.Name == constraint.Name { 447 if log.V(2) { 448 log.VEventf( 449 ctx, 2, 450 "backfiller tried to add constraint %+v but found existing constraint %+v, "+ 451 "presumably due to a retry or rollback", 452 constraint, def, 453 ) 454 } 455 // Ensure the constraint on the descriptor is set to Validating, in 456 // case we're in the middle of rolling back DROP CONSTRAINT 457 def.Validity = sqlbase.ConstraintValidity_Validating 458 foundExisting = true 459 break 460 } 461 } 462 if !foundExisting { 463 scTable.OutboundFKs = append(scTable.OutboundFKs, constraint.ForeignKey) 464 backrefTable, ok := descs[constraint.ForeignKey.ReferencedTableID] 465 if !ok { 466 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 467 } 468 backrefTable.InboundFKs = append(backrefTable.InboundFKs, constraint.ForeignKey) 469 } 470 } 471 } 472 return nil 473 } 474 475 if _, err := sc.leaseMgr.PublishMultiple(ctx, tableIDsToUpdate, update, nil); err != nil { 476 return err 477 } 478 if err := sc.waitToUpdateLeases(ctx, sc.tableID); err != nil { 479 return err 480 } 481 for id := range fksByBackrefTable { 482 if err := sc.waitToUpdateLeases(ctx, id); err != nil { 483 return err 484 } 485 } 486 return nil 487 } 488 489 // validateConstraints checks that the current table data obeys the 490 // provided constraints. 491 // 492 // This operates over multiple goroutines concurrently and is thus not 493 // able to reuse the original kv.Txn safely, so it makes its own. 494 func (sc *SchemaChanger) validateConstraints( 495 ctx context.Context, constraints []sqlbase.ConstraintToUpdate, 496 ) error { 497 if lease.TestingTableLeasesAreDisabled() { 498 return nil 499 } 500 501 _, err := sc.updateJobRunningStatus(ctx, RunningStatusValidation) 502 if err != nil { 503 return err 504 } 505 506 if fn := sc.testingKnobs.RunBeforeConstraintValidation; fn != nil { 507 if err := fn(); err != nil { 508 return err 509 } 510 } 511 512 readAsOf := sc.clock.Now() 513 var tableDesc *sqlbase.TableDescriptor 514 515 if err := sc.fixedTimestampTxn(ctx, readAsOf, func(ctx context.Context, txn *kv.Txn) error { 516 tableDesc, err = sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 517 return err 518 }); err != nil { 519 return err 520 } 521 522 grp := ctxgroup.WithContext(ctx) 523 // The various checks below operate at a fixed timestamp. 524 runHistoricalTxn := sc.makeFixedTimestampRunner(readAsOf) 525 526 for i := range constraints { 527 c := constraints[i] 528 grp.GoCtx(func(ctx context.Context) error { 529 // Make the mutations public in a private copy of the descriptor 530 // and add it to the Collection, so that we can use SQL below to perform 531 // the validation. We wouldn't have needed to do this if we could have 532 // updated the descriptor and run validation in the same transaction. However, 533 // our current system is incapable of running long running schema changes 534 // (the validation can take many minutes). So we pretend that the schema 535 // has been updated and actually update it in a separate transaction that 536 // follows this one. 537 desc, err := sqlbase.NewImmutableTableDescriptor(*tableDesc).MakeFirstMutationPublic(sqlbase.IgnoreConstraints) 538 if err != nil { 539 return err 540 } 541 // Each check operates at the historical timestamp. 542 return runHistoricalTxn(ctx, func(ctx context.Context, txn *kv.Txn, evalCtx *extendedEvalContext) error { 543 switch c.ConstraintType { 544 case sqlbase.ConstraintToUpdate_CHECK: 545 if err := validateCheckInTxn(ctx, sc.leaseMgr, &evalCtx.EvalContext, desc, txn, c.Check.Name); err != nil { 546 return err 547 } 548 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 549 if err := validateFkInTxn(ctx, sc.leaseMgr, &evalCtx.EvalContext, desc, txn, c.Name); err != nil { 550 return err 551 } 552 case sqlbase.ConstraintToUpdate_NOT_NULL: 553 if err := validateCheckInTxn(ctx, sc.leaseMgr, &evalCtx.EvalContext, desc, txn, c.Check.Name); err != nil { 554 // TODO (lucy): This should distinguish between constraint 555 // validation errors and other types of unexpected errors, and 556 // return a different error code in the former case 557 return errors.Wrap(err, "validation of NOT NULL constraint failed") 558 } 559 default: 560 return errors.Errorf("unsupported constraint type: %d", c.ConstraintType) 561 } 562 return nil 563 }) 564 }) 565 } 566 return grp.Wait() 567 } 568 569 // getTableVersion retrieves the descriptor for the table being 570 // targeted by the schema changer using the provided txn, and asserts 571 // that the retrieved descriptor is at the given version. An error is 572 // returned otherwise. 573 // 574 // It operates entirely on the current goroutine and is thus able to 575 // reuse an existing kv.Txn safely. 576 func (sc *SchemaChanger) getTableVersion( 577 ctx context.Context, txn *kv.Txn, tc *descs.Collection, version sqlbase.DescriptorVersion, 578 ) (*sqlbase.ImmutableTableDescriptor, error) { 579 tableDesc, err := tc.GetTableVersionByID(ctx, txn, sc.tableID, tree.ObjectLookupFlags{}) 580 if err != nil { 581 return nil, err 582 } 583 if version != tableDesc.Version { 584 return nil, makeErrTableVersionMismatch(tableDesc.Version, version) 585 } 586 return tableDesc, nil 587 } 588 589 // truncateIndexes truncate the KV ranges corresponding to dropped indexes. 590 // 591 // The indexes are dropped chunk by chunk, each chunk being deleted in 592 // its own txn. 593 func (sc *SchemaChanger) truncateIndexes( 594 ctx context.Context, version sqlbase.DescriptorVersion, dropped []sqlbase.IndexDescriptor, 595 ) error { 596 chunkSize := sc.getChunkSize(indexTruncateChunkSize) 597 if sc.testingKnobs.BackfillChunkSize > 0 { 598 chunkSize = sc.testingKnobs.BackfillChunkSize 599 } 600 alloc := &sqlbase.DatumAlloc{} 601 for _, desc := range dropped { 602 var resume roachpb.Span 603 for rowIdx, done := int64(0), false; !done; rowIdx += chunkSize { 604 resumeAt := resume 605 if log.V(2) { 606 log.Infof(ctx, "drop index (%d, %d) at row: %d, span: %s", 607 sc.tableID, sc.mutationID, rowIdx, resume) 608 } 609 610 // Make a new txn just to drop this chunk. 611 if err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 612 if fn := sc.execCfg.DistSQLRunTestingKnobs.RunBeforeBackfillChunk; fn != nil { 613 if err := fn(resume); err != nil { 614 return err 615 } 616 } 617 if fn := sc.execCfg.DistSQLRunTestingKnobs.RunAfterBackfillChunk; fn != nil { 618 defer fn() 619 } 620 621 // Retrieve a lease for this table inside the current txn. 622 tc := descs.NewCollection(sc.leaseMgr, sc.settings) 623 defer tc.ReleaseAll(ctx) 624 tableDesc, err := sc.getTableVersion(ctx, txn, tc, version) 625 if err != nil { 626 return err 627 } 628 629 rd, err := row.MakeDeleter( 630 ctx, 631 txn, 632 sc.execCfg.Codec, 633 tableDesc, 634 nil, 635 nil, 636 row.SkipFKs, 637 nil, /* *tree.EvalContext */ 638 alloc, 639 ) 640 if err != nil { 641 return err 642 } 643 td := tableDeleter{rd: rd, alloc: alloc} 644 if err := td.init(ctx, txn, nil /* *tree.EvalContext */); err != nil { 645 return err 646 } 647 if !canClearRangeForDrop(&desc) { 648 resume, err = td.deleteIndex( 649 ctx, 650 &desc, 651 resumeAt, 652 chunkSize, 653 false, /* traceKV */ 654 ) 655 done = resume.Key == nil 656 return err 657 } 658 done = true 659 return td.clearIndex(ctx, &desc) 660 }); err != nil { 661 return err 662 } 663 } 664 665 // All the data chunks have been removed. Now also removed the 666 // zone configs for the dropped indexes, if any. 667 if err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 668 return RemoveIndexZoneConfigs(ctx, txn, sc.execCfg, sc.tableID, dropped) 669 }); err != nil { 670 return err 671 } 672 } 673 return nil 674 } 675 676 type backfillType int 677 678 const ( 679 _ backfillType = iota 680 columnBackfill 681 indexBackfill 682 ) 683 684 // getJobIDForMutationWithDescriptor returns a job id associated with a mutation given 685 // a table descriptor. Unlike getJobIDForMutation this doesn't need transaction. 686 // TODO (lucy): This is not a good way to look up all schema change jobs 687 // associated with a table. We should get rid of MutationJobs and start looking 688 // up the jobs in the jobs table instead. 689 func getJobIDForMutationWithDescriptor( 690 ctx context.Context, tableDesc *sqlbase.TableDescriptor, mutationID sqlbase.MutationID, 691 ) (int64, error) { 692 for _, job := range tableDesc.MutationJobs { 693 if job.MutationID == mutationID { 694 return job.JobID, nil 695 } 696 } 697 698 return 0, errors.AssertionFailedf( 699 "job not found for table id %d, mutation %d", tableDesc.ID, mutationID) 700 } 701 702 // nRanges returns the number of ranges that cover a set of spans. 703 // 704 // It operates entirely on the current goroutine and is thus able to 705 // reuse an existing kv.Txn safely. 706 func (sc *SchemaChanger) nRanges( 707 ctx context.Context, txn *kv.Txn, spans []roachpb.Span, 708 ) (int, error) { 709 spanResolver := sc.distSQLPlanner.spanResolver.NewSpanResolverIterator(txn) 710 rangeIds := make(map[int64]struct{}) 711 for _, span := range spans { 712 // For each span, iterate the spanResolver until it's exhausted, storing 713 // the found range ids in the map to de-duplicate them. 714 spanResolver.Seek(ctx, span, kvcoord.Ascending) 715 for { 716 if !spanResolver.Valid() { 717 return 0, spanResolver.Error() 718 } 719 rangeIds[int64(spanResolver.Desc().RangeID)] = struct{}{} 720 if !spanResolver.NeedAnother() { 721 break 722 } 723 spanResolver.Next(ctx) 724 } 725 } 726 727 return len(rangeIds), nil 728 } 729 730 // distBackfill runs (or continues) a backfill for the first mutation 731 // enqueued on the SchemaChanger's table descriptor that passes the input 732 // MutationFilter. 733 // 734 // This operates over multiple goroutines concurrently and is thus not 735 // able to reuse the original kv.Txn safely, so it makes its own. 736 func (sc *SchemaChanger) distBackfill( 737 ctx context.Context, 738 version sqlbase.DescriptorVersion, 739 backfillType backfillType, 740 backfillChunkSize int64, 741 filter backfill.MutationFilter, 742 targetSpans []roachpb.Span, 743 ) error { 744 inMemoryStatusEnabled := sc.execCfg.Settings.Version.IsActive( 745 ctx, clusterversion.VersionAtomicChangeReplicasTrigger) 746 duration := checkpointInterval 747 if sc.testingKnobs.WriteCheckpointInterval > 0 { 748 duration = sc.testingKnobs.WriteCheckpointInterval 749 } 750 chunkSize := sc.getChunkSize(backfillChunkSize) 751 752 origNRanges := -1 753 origFractionCompleted := sc.job.FractionCompleted() 754 fractionLeft := 1 - origFractionCompleted 755 readAsOf := sc.clock.Now() 756 // Index backfilling ingests SSTs that don't play nicely with running txns 757 // since they just add their keys blindly. Running a Scan of the target 758 // spans at the time the SSTs' keys will be written will calcify history up 759 // to then since the scan will resolve intents and populate tscache to keep 760 // anything else from sneaking under us. Since these are new indexes, these 761 // spans should be essentially empty, so this should be a pretty quick and 762 // cheap scan. 763 if backfillType == indexBackfill { 764 const pageSize = 10000 765 noop := func(_ []kv.KeyValue) error { return nil } 766 if err := sc.fixedTimestampTxn(ctx, readAsOf, func(ctx context.Context, txn *kv.Txn) error { 767 for _, span := range targetSpans { 768 // TODO(dt): a Count() request would be nice here if the target isn't 769 // empty, since we don't need to drag all the results back just to 770 // then ignore them -- we just need the iteration on the far end. 771 if err := txn.Iterate(ctx, span.Key, span.EndKey, pageSize, noop); err != nil { 772 return err 773 } 774 } 775 return nil 776 }); err != nil { 777 return err 778 } 779 } 780 781 // Gather the initial resume spans for the table. 782 var todoSpans []roachpb.Span 783 var mutationIdx int 784 if err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 785 var err error 786 todoSpans, _, mutationIdx, err = rowexec.GetResumeSpans( 787 ctx, sc.jobRegistry, txn, sc.execCfg.Codec, sc.tableID, sc.mutationID, filter) 788 return err 789 }); err != nil { 790 return err 791 } 792 793 for len(todoSpans) > 0 { 794 log.VEventf(ctx, 2, "backfill: process %+v spans", todoSpans) 795 if err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 796 // Report schema change progress. We define progress at this point 797 // as the the fraction of fully-backfilled ranges of the primary index of 798 // the table being scanned. Since we may have already modified the 799 // fraction completed of our job from the 10% allocated to completing the 800 // schema change state machine or from a previous backfill attempt, 801 // we scale that fraction of ranges completed by the remaining fraction 802 // of the job's progress bar. 803 nRanges, err := sc.nRanges(ctx, txn, todoSpans) 804 if err != nil { 805 return err 806 } 807 if origNRanges == -1 { 808 origNRanges = nRanges 809 } 810 811 if nRanges < origNRanges { 812 fractionRangesFinished := float32(origNRanges-nRanges) / float32(origNRanges) 813 fractionCompleted := origFractionCompleted + fractionLeft*fractionRangesFinished 814 if err := sc.job.FractionProgressed(ctx, jobs.FractionUpdater(fractionCompleted)); err != nil { 815 return jobs.SimplifyInvalidStatusError(err) 816 } 817 } 818 819 tc := descs.NewCollection(sc.leaseMgr, sc.settings) 820 // Use a leased table descriptor for the backfill. 821 defer tc.ReleaseAll(ctx) 822 tableDesc, err := sc.getTableVersion(ctx, txn, tc, version) 823 if err != nil { 824 return err 825 } 826 // otherTableDescs contains any other table descriptors required by the 827 // backfiller processor. 828 var otherTableDescs []sqlbase.TableDescriptor 829 if backfillType == columnBackfill { 830 fkTables, err := row.MakeFkMetadata( 831 ctx, 832 tableDesc, 833 row.CheckUpdates, 834 row.NoLookup, 835 row.NoCheckPrivilege, 836 nil, /* AnalyzeExprFunction */ 837 nil, /* CheckHelper */ 838 ) 839 if err != nil { 840 return err 841 } 842 843 for k := range fkTables { 844 table, err := tc.GetTableVersionByID(ctx, txn, k, tree.ObjectLookupFlags{}) 845 if err != nil { 846 return err 847 } 848 otherTableDescs = append(otherTableDescs, *table.TableDesc()) 849 } 850 } 851 metaFn := func(_ context.Context, meta *execinfrapb.ProducerMetadata) error { 852 if meta.BulkProcessorProgress != nil { 853 todoSpans = roachpb.SubtractSpans(todoSpans, 854 meta.BulkProcessorProgress.CompletedSpans) 855 } 856 return nil 857 } 858 cbw := metadataCallbackWriter{rowResultWriter: &errOnlyResultWriter{}, fn: metaFn} 859 evalCtx := createSchemaChangeEvalCtx(ctx, sc.execCfg, txn.ReadTimestamp(), sc.ieFactory) 860 recv := MakeDistSQLReceiver( 861 ctx, 862 &cbw, 863 tree.Rows, /* stmtType - doesn't matter here since no result are produced */ 864 sc.rangeDescriptorCache, 865 sc.leaseHolderCache, 866 nil, /* txn - the flow does not run wholly in a txn */ 867 func(ts hlc.Timestamp) { 868 sc.clock.Update(ts) 869 }, 870 evalCtx.Tracing, 871 ) 872 defer recv.Release() 873 874 planCtx := sc.distSQLPlanner.NewPlanningCtx(ctx, &evalCtx, txn, true /* distribute */) 875 plan, err := sc.distSQLPlanner.createBackfiller( 876 planCtx, backfillType, *tableDesc.TableDesc(), duration, chunkSize, todoSpans, otherTableDescs, readAsOf, 877 ) 878 if err != nil { 879 return err 880 } 881 sc.distSQLPlanner.Run( 882 planCtx, 883 nil, /* txn - the processors manage their own transactions */ 884 &plan, recv, &evalCtx, 885 nil, /* finishedSetupFn */ 886 )() 887 return cbw.Err() 888 }); err != nil { 889 return err 890 } 891 if !inMemoryStatusEnabled { 892 var resumeSpans []roachpb.Span 893 // There is a worker node of older version that will communicate 894 // its done work by writing to the jobs table. 895 // In this case we intersect todoSpans with what the old node(s) 896 // have set in the jobs table not to overwrite their done work. 897 if err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 898 var err error 899 resumeSpans, _, _, err = rowexec.GetResumeSpans( 900 ctx, sc.jobRegistry, txn, sc.execCfg.Codec, sc.tableID, sc.mutationID, filter) 901 return err 902 }); err != nil { 903 return err 904 } 905 // A \intersect B = A - (A - B) 906 todoSpans = roachpb.SubtractSpans(todoSpans, roachpb.SubtractSpans(todoSpans, resumeSpans)) 907 908 } 909 // Record what is left to do for the job. 910 // TODO(spaskob): Execute this at a regular cadence. 911 if err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 912 return rowexec.SetResumeSpansInJob(ctx, todoSpans, mutationIdx, txn, sc.job) 913 }); err != nil { 914 return err 915 } 916 } 917 return nil 918 } 919 920 // updateJobRunningStatus updates the status field in the job entry 921 // with the given value. 922 // 923 // The update is performed in a separate txn at the current logical 924 // timestamp. 925 func (sc *SchemaChanger) updateJobRunningStatus( 926 ctx context.Context, status jobs.RunningStatus, 927 ) (*sqlbase.TableDescriptor, error) { 928 var tableDesc *sqlbase.TableDescriptor 929 err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 930 var err error 931 tableDesc, err = sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 932 if err != nil { 933 return err 934 } 935 // Update running status of job. 936 updateJobRunningProgress := false 937 for _, mutation := range tableDesc.Mutations { 938 if mutation.MutationID != sc.mutationID { 939 // Mutations are applied in a FIFO order. Only apply the first set of 940 // mutations if they have the mutation ID we're looking for. 941 break 942 } 943 944 switch mutation.Direction { 945 case sqlbase.DescriptorMutation_ADD: 946 switch mutation.State { 947 case sqlbase.DescriptorMutation_DELETE_AND_WRITE_ONLY: 948 updateJobRunningProgress = true 949 } 950 951 case sqlbase.DescriptorMutation_DROP: 952 switch mutation.State { 953 case sqlbase.DescriptorMutation_DELETE_ONLY: 954 updateJobRunningProgress = true 955 } 956 } 957 } 958 if updateJobRunningProgress && !tableDesc.Dropped() { 959 if err := sc.job.WithTxn(txn).RunningStatus(ctx, func( 960 ctx context.Context, details jobspb.Details) (jobs.RunningStatus, error) { 961 return status, nil 962 }); err != nil { 963 return errors.Wrapf(err, "failed to update running status of job %d", errors.Safe(*sc.job.ID())) 964 } 965 } 966 return nil 967 }) 968 return tableDesc, err 969 } 970 971 // validateIndexes checks that the new indexes have entries for all the rows. 972 // 973 // This operates over multiple goroutines concurrently and is thus not 974 // able to reuse the original kv.Txn safely, so it makes its own. 975 func (sc *SchemaChanger) validateIndexes(ctx context.Context) error { 976 if lease.TestingTableLeasesAreDisabled() { 977 return nil 978 } 979 980 _, err := sc.updateJobRunningStatus(ctx, RunningStatusValidation) 981 if err != nil { 982 return err 983 } 984 985 if fn := sc.testingKnobs.RunBeforeIndexValidation; fn != nil { 986 if err := fn(); err != nil { 987 return err 988 } 989 } 990 991 readAsOf := sc.clock.Now() 992 var tableDesc *sqlbase.TableDescriptor 993 if err := sc.fixedTimestampTxn(ctx, readAsOf, func(ctx context.Context, txn *kv.Txn) error { 994 tableDesc, err = sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 995 return err 996 }); err != nil { 997 return err 998 } 999 1000 var forwardIndexes []*sqlbase.IndexDescriptor 1001 var invertedIndexes []*sqlbase.IndexDescriptor 1002 1003 for _, m := range tableDesc.Mutations { 1004 if sc.mutationID != m.MutationID { 1005 break 1006 } 1007 idx := m.GetIndex() 1008 if idx == nil || m.Direction == sqlbase.DescriptorMutation_DROP { 1009 continue 1010 } 1011 switch idx.Type { 1012 case sqlbase.IndexDescriptor_FORWARD: 1013 forwardIndexes = append(forwardIndexes, idx) 1014 case sqlbase.IndexDescriptor_INVERTED: 1015 invertedIndexes = append(invertedIndexes, idx) 1016 } 1017 } 1018 if len(forwardIndexes) == 0 && len(invertedIndexes) == 0 { 1019 return nil 1020 } 1021 1022 grp := ctxgroup.WithContext(ctx) 1023 runHistoricalTxn := sc.makeFixedTimestampRunner(readAsOf) 1024 1025 if len(forwardIndexes) > 0 { 1026 grp.GoCtx(func(ctx context.Context) error { 1027 return sc.validateForwardIndexes(ctx, tableDesc, forwardIndexes, runHistoricalTxn) 1028 }) 1029 } 1030 if len(invertedIndexes) > 0 { 1031 grp.GoCtx(func(ctx context.Context) error { 1032 return sc.validateInvertedIndexes(ctx, tableDesc, invertedIndexes, runHistoricalTxn) 1033 }) 1034 } 1035 return grp.Wait() 1036 } 1037 1038 // validateInvertedIndexes checks that the indexes have entries for 1039 // all the items of data in rows. 1040 // 1041 // This operates over multiple goroutines concurrently and is thus not 1042 // able to reuse the original kv.Txn safely. 1043 // Instead it uses the provided runHistoricalTxn which can operate 1044 // at the historical fixed timestamp for checks. 1045 func (sc *SchemaChanger) validateInvertedIndexes( 1046 ctx context.Context, 1047 tableDesc *TableDescriptor, 1048 indexes []*sqlbase.IndexDescriptor, 1049 runHistoricalTxn historicalTxnRunner, 1050 ) error { 1051 grp := ctxgroup.WithContext(ctx) 1052 1053 expectedCount := make([]int64, len(indexes)) 1054 countReady := make([]chan struct{}, len(indexes)) 1055 1056 for i, idx := range indexes { 1057 i, idx := i, idx 1058 countReady[i] = make(chan struct{}) 1059 1060 grp.GoCtx(func(ctx context.Context) error { 1061 // Inverted indexes currently can't be interleaved, so a KV scan can be 1062 // used to get the index length. 1063 // TODO (lucy): Switch to using DistSQL to get the count, so that we get 1064 // distributed execution and avoid bypassing the SQL decoding 1065 start := timeutil.Now() 1066 var idxLen int64 1067 span := tableDesc.IndexSpan(sc.execCfg.Codec, idx.ID) 1068 key := span.Key 1069 endKey := span.EndKey 1070 if err := runHistoricalTxn(ctx, func(ctx context.Context, txn *kv.Txn, _ *extendedEvalContext) error { 1071 for { 1072 kvs, err := txn.Scan(ctx, key, endKey, 1000000) 1073 if err != nil { 1074 return err 1075 } 1076 if len(kvs) == 0 { 1077 break 1078 } 1079 idxLen += int64(len(kvs)) 1080 key = kvs[len(kvs)-1].Key.PrefixEnd() 1081 } 1082 return nil 1083 }); err != nil { 1084 return err 1085 } 1086 log.Infof(ctx, "inverted index %s/%s count = %d, took %s", 1087 tableDesc.Name, idx.Name, idxLen, timeutil.Since(start)) 1088 select { 1089 case <-countReady[i]: 1090 if idxLen != expectedCount[i] { 1091 // JSON columns cannot have unique indexes, so if the expected and 1092 // actual counts do not match, it's always a bug rather than a 1093 // uniqueness violation. 1094 return errors.AssertionFailedf( 1095 "validation of index %s failed: expected %d rows, found %d", 1096 idx.Name, errors.Safe(expectedCount[i]), errors.Safe(idxLen)) 1097 } 1098 case <-ctx.Done(): 1099 return ctx.Err() 1100 } 1101 return nil 1102 }) 1103 1104 grp.GoCtx(func(ctx context.Context) error { 1105 defer close(countReady[i]) 1106 1107 start := timeutil.Now() 1108 if len(idx.ColumnNames) != 1 { 1109 panic(fmt.Sprintf("expected inverted index %s to have exactly 1 column, but found columns %+v", 1110 idx.Name, idx.ColumnNames)) 1111 } 1112 col := idx.ColumnNames[0] 1113 1114 if err := runHistoricalTxn(ctx, func(ctx context.Context, txn *kv.Txn, evalCtx *extendedEvalContext) error { 1115 ie := evalCtx.InternalExecutor.(*InternalExecutor) 1116 var stmt string 1117 if geoindex.IsEmptyConfig(&idx.GeoConfig) { 1118 stmt = fmt.Sprintf( 1119 `SELECT coalesce(sum_int(crdb_internal.num_inverted_index_entries(%q)), 0) FROM [%d AS t]`, 1120 col, tableDesc.ID, 1121 ) 1122 } else { 1123 stmt = fmt.Sprintf( 1124 `SELECT coalesce(sum_int(crdb_internal.num_geo_inverted_index_entries(%d, %d, %q)), 0) FROM [%d AS t]`, 1125 tableDesc.ID, idx.ID, col, tableDesc.ID, 1126 ) 1127 } 1128 row, err := ie.QueryRowEx(ctx, "verify-inverted-idx-count", txn, 1129 sqlbase.InternalExecutorSessionDataOverride{}, stmt) 1130 if err != nil { 1131 return err 1132 } 1133 expectedCount[i] = int64(tree.MustBeDInt(row[0])) 1134 return nil 1135 }); err != nil { 1136 return err 1137 } 1138 log.Infof(ctx, "JSON column %s/%s expected inverted index count = %d, took %s", 1139 tableDesc.Name, col, expectedCount[i], timeutil.Since(start)) 1140 return nil 1141 }) 1142 } 1143 1144 return grp.Wait() 1145 } 1146 1147 // validateForwardIndexes checks that the indexes have entries for all the rows. 1148 // 1149 // This operates over multiple goroutines concurrently and is thus not 1150 // able to reuse the original kv.Txn safely. 1151 // Instead it uses the provided runHistoricalTxn which can operate 1152 // at the historical fixed timestamp for checks. 1153 func (sc *SchemaChanger) validateForwardIndexes( 1154 ctx context.Context, 1155 tableDesc *TableDescriptor, 1156 indexes []*sqlbase.IndexDescriptor, 1157 runHistoricalTxn historicalTxnRunner, 1158 ) error { 1159 grp := ctxgroup.WithContext(ctx) 1160 1161 var tableRowCount int64 1162 // Close when table count is ready. 1163 tableCountReady := make(chan struct{}) 1164 // Compute the size of each index. 1165 for _, idx := range indexes { 1166 idx := idx 1167 grp.GoCtx(func(ctx context.Context) error { 1168 start := timeutil.Now() 1169 // Make the mutations public in a private copy of the descriptor 1170 // and add it to the Collection, so that we can use SQL below to perform 1171 // the validation. We wouldn't have needed to do this if we could have 1172 // updated the descriptor and run validation in the same transaction. However, 1173 // our current system is incapable of running long running schema changes 1174 // (the validation can take many minutes). So we pretend that the schema 1175 // has been updated and actually update it in a separate transaction that 1176 // follows this one. 1177 desc, err := sqlbase.NewImmutableTableDescriptor(*tableDesc).MakeFirstMutationPublic(sqlbase.IgnoreConstraints) 1178 if err != nil { 1179 return err 1180 } 1181 tc := descs.NewCollection(sc.leaseMgr, sc.settings) 1182 // pretend that the schema has been modified. 1183 if err := tc.AddUncommittedTable(*desc); err != nil { 1184 return err 1185 } 1186 1187 // Retrieve the row count in the index. 1188 var idxLen int64 1189 if err := runHistoricalTxn(ctx, func(ctx context.Context, txn *kv.Txn, evalCtx *extendedEvalContext) error { 1190 // TODO(vivek): This is not a great API. Leaving #34304 open. 1191 ie := evalCtx.InternalExecutor.(*InternalExecutor) 1192 ie.tcModifier = tc 1193 defer func() { 1194 ie.tcModifier = nil 1195 }() 1196 1197 row, err := ie.QueryRowEx(ctx, "verify-idx-count", txn, 1198 sqlbase.InternalExecutorSessionDataOverride{}, 1199 fmt.Sprintf(`SELECT count(1) FROM [%d AS t]@[%d]`, tableDesc.ID, idx.ID)) 1200 if err != nil { 1201 return err 1202 } 1203 idxLen = int64(tree.MustBeDInt(row[0])) 1204 return nil 1205 }); err != nil { 1206 return err 1207 } 1208 1209 log.Infof(ctx, "validation: index %s/%s row count = %d, time so far %s", 1210 tableDesc.Name, idx.Name, idxLen, timeutil.Since(start)) 1211 1212 // Now compare with the row count in the table. 1213 select { 1214 case <-tableCountReady: 1215 if idxLen != tableRowCount { 1216 // TODO(vivek): find the offending row and include it in the error. 1217 return pgerror.Newf( 1218 pgcode.UniqueViolation, 1219 "%d entries, expected %d violates unique constraint %q", 1220 idxLen, tableRowCount, idx.Name, 1221 ) 1222 } 1223 1224 case <-ctx.Done(): 1225 return ctx.Err() 1226 } 1227 1228 return nil 1229 }) 1230 } 1231 1232 grp.GoCtx(func(ctx context.Context) error { 1233 defer close(tableCountReady) 1234 var tableRowCountTime time.Duration 1235 start := timeutil.Now() 1236 1237 // Count the number of rows in the table. 1238 if err := runHistoricalTxn(ctx, func(ctx context.Context, txn *kv.Txn, evalCtx *extendedEvalContext) error { 1239 ie := evalCtx.InternalExecutor.(*InternalExecutor) 1240 cnt, err := ie.QueryRowEx(ctx, "VERIFY INDEX", txn, 1241 sqlbase.InternalExecutorSessionDataOverride{}, 1242 fmt.Sprintf(`SELECT count(1) FROM [%d AS t]`, tableDesc.ID)) 1243 if err != nil { 1244 return err 1245 } 1246 tableRowCount = int64(tree.MustBeDInt(cnt[0])) 1247 return nil 1248 }); err != nil { 1249 return err 1250 } 1251 1252 tableRowCountTime = timeutil.Since(start) 1253 log.Infof(ctx, "validation: table %s row count = %d, took %s", 1254 tableDesc.Name, tableRowCount, tableRowCountTime) 1255 return nil 1256 }) 1257 1258 return grp.Wait() 1259 } 1260 1261 // backfillIndexes fills the missing columns in the indexes of the 1262 // leased tables. 1263 // 1264 // This operates over multiple goroutines concurrently and is thus not 1265 // able to reuse the original kv.Txn safely. 1266 func (sc *SchemaChanger) backfillIndexes( 1267 ctx context.Context, version sqlbase.DescriptorVersion, addingSpans []roachpb.Span, 1268 ) error { 1269 if fn := sc.testingKnobs.RunBeforeIndexBackfill; fn != nil { 1270 fn() 1271 } 1272 1273 expirationTime := sc.db.Clock().Now().Add(time.Hour.Nanoseconds(), 0) 1274 1275 for _, span := range addingSpans { 1276 if err := sc.db.AdminSplit(ctx, span.Key, span.Key, expirationTime); err != nil { 1277 return err 1278 } 1279 } 1280 1281 chunkSize := indexBulkBackfillChunkSize.Get(&sc.settings.SV) 1282 if err := sc.distBackfill( 1283 ctx, version, indexBackfill, chunkSize, 1284 backfill.IndexMutationFilter, addingSpans); err != nil { 1285 return err 1286 } 1287 return sc.validateIndexes(ctx) 1288 } 1289 1290 // truncateAndBackfillColumns performs the backfill operation on the given leased 1291 // table descriptors. 1292 // 1293 // This operates over multiple goroutines concurrently and is thus not 1294 // able to reuse the original kv.Txn safely. 1295 func (sc *SchemaChanger) truncateAndBackfillColumns( 1296 ctx context.Context, version sqlbase.DescriptorVersion, 1297 ) error { 1298 return sc.distBackfill( 1299 ctx, version, columnBackfill, columnTruncateAndBackfillChunkSize, 1300 backfill.ColumnMutationFilter, nil) 1301 } 1302 1303 // runSchemaChangesInTxn runs all the schema changes immediately in a 1304 // transaction. This is called when a CREATE TABLE is followed by 1305 // schema changes in the same transaction. The CREATE TABLE is 1306 // invisible to the rest of the cluster, so the schema changes 1307 // can be executed immediately on the same version of the table. 1308 // 1309 // It operates entirely on the current goroutine and is thus able to 1310 // reuse the planner's kv.Txn safely. 1311 func runSchemaChangesInTxn( 1312 ctx context.Context, planner *planner, tableDesc *sqlbase.MutableTableDescriptor, traceKV bool, 1313 ) error { 1314 if len(tableDesc.DrainingNames) > 0 { 1315 // Reclaim all the old names. Leave the data and descriptor 1316 // cleanup for later. 1317 for _, drain := range tableDesc.DrainingNames { 1318 err := sqlbase.RemoveObjectNamespaceEntry(ctx, planner.Txn(), planner.ExecCfg().Codec, 1319 drain.ParentID, drain.ParentSchemaID, drain.Name, false /* KVTrace */) 1320 if err != nil { 1321 return err 1322 } 1323 } 1324 tableDesc.DrainingNames = nil 1325 } 1326 1327 if tableDesc.Dropped() { 1328 return nil 1329 } 1330 1331 // Only needed because columnBackfillInTxn() backfills 1332 // all column mutations. 1333 doneColumnBackfill := false 1334 // Checks are validated after all other mutations have been applied. 1335 var constraintsToValidate []sqlbase.ConstraintToUpdate 1336 1337 // We use a range loop here as the processing of some mutations 1338 // such as the primary key swap mutations result in queueing more 1339 // mutations that need to be processed. 1340 for i := 0; i < len(tableDesc.Mutations); i++ { 1341 m := tableDesc.Mutations[i] 1342 immutDesc := sqlbase.NewImmutableTableDescriptor(*tableDesc.TableDesc()) 1343 switch m.Direction { 1344 case sqlbase.DescriptorMutation_ADD: 1345 switch t := m.Descriptor_.(type) { 1346 case *sqlbase.DescriptorMutation_PrimaryKeySwap: 1347 // Don't need to do anything here, as the call to MakeMutationComplete 1348 // will perform the steps for this operation. 1349 case *sqlbase.DescriptorMutation_ComputedColumnSwap: 1350 return AlterColTypeInTxnNotSupportedErr 1351 case *sqlbase.DescriptorMutation_Column: 1352 if doneColumnBackfill || !sqlbase.ColumnNeedsBackfill(m.GetColumn()) { 1353 break 1354 } 1355 if err := columnBackfillInTxn(ctx, planner.Txn(), planner.Tables(), planner.EvalContext(), immutDesc, traceKV); err != nil { 1356 return err 1357 } 1358 doneColumnBackfill = true 1359 1360 case *sqlbase.DescriptorMutation_Index: 1361 if err := indexBackfillInTxn(ctx, planner.Txn(), planner.EvalContext(), immutDesc, traceKV); err != nil { 1362 return err 1363 } 1364 1365 case *sqlbase.DescriptorMutation_Constraint: 1366 switch t.Constraint.ConstraintType { 1367 case sqlbase.ConstraintToUpdate_CHECK, sqlbase.ConstraintToUpdate_NOT_NULL: 1368 tableDesc.Checks = append(tableDesc.Checks, &t.Constraint.Check) 1369 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 1370 fk := t.Constraint.ForeignKey 1371 var referencedTableDesc *sqlbase.MutableTableDescriptor 1372 // We don't want to lookup/edit a second copy of the same table. 1373 selfReference := tableDesc.ID == fk.ReferencedTableID 1374 if selfReference { 1375 referencedTableDesc = tableDesc 1376 } else { 1377 lookup, err := planner.Tables().GetMutableTableVersionByID(ctx, fk.ReferencedTableID, planner.Txn()) 1378 if err != nil { 1379 return errors.Errorf("error resolving referenced table ID %d: %v", fk.ReferencedTableID, err) 1380 } 1381 referencedTableDesc = lookup 1382 } 1383 referencedTableDesc.InboundFKs = append(referencedTableDesc.InboundFKs, fk) 1384 tableDesc.OutboundFKs = append(tableDesc.OutboundFKs, fk) 1385 1386 // Write the other table descriptor here if it's not the current table 1387 // we're already modifying. 1388 if !selfReference { 1389 // TODO (lucy): Have more consistent/informative names for dependent jobs. 1390 if err := planner.writeSchemaChange( 1391 ctx, referencedTableDesc, sqlbase.InvalidMutationID, "updating referenced table", 1392 ); err != nil { 1393 return err 1394 } 1395 } 1396 default: 1397 return errors.AssertionFailedf( 1398 "unsupported constraint type: %d", errors.Safe(t.Constraint.ConstraintType)) 1399 } 1400 constraintsToValidate = append(constraintsToValidate, *t.Constraint) 1401 1402 default: 1403 return errors.AssertionFailedf( 1404 "unsupported mutation: %+v", m) 1405 } 1406 1407 case sqlbase.DescriptorMutation_DROP: 1408 // Drop the name and drop the associated data later. 1409 switch t := m.Descriptor_.(type) { 1410 case *sqlbase.DescriptorMutation_Column: 1411 if doneColumnBackfill { 1412 break 1413 } 1414 if err := columnBackfillInTxn( 1415 ctx, planner.Txn(), planner.Tables(), planner.EvalContext(), immutDesc, traceKV, 1416 ); err != nil { 1417 return err 1418 } 1419 doneColumnBackfill = true 1420 1421 case *sqlbase.DescriptorMutation_Index: 1422 if err := indexTruncateInTxn( 1423 ctx, planner.Txn(), planner.ExecCfg(), planner.EvalContext(), immutDesc, t.Index, traceKV, 1424 ); err != nil { 1425 return err 1426 } 1427 1428 case *sqlbase.DescriptorMutation_Constraint: 1429 switch t.Constraint.ConstraintType { 1430 case sqlbase.ConstraintToUpdate_CHECK, sqlbase.ConstraintToUpdate_NOT_NULL: 1431 for i := range tableDesc.Checks { 1432 if tableDesc.Checks[i].Name == t.Constraint.Name { 1433 tableDesc.Checks = append(tableDesc.Checks[:i], tableDesc.Checks[i+1:]...) 1434 break 1435 } 1436 } 1437 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 1438 for i := range tableDesc.OutboundFKs { 1439 fk := &tableDesc.OutboundFKs[i] 1440 if fk.Name == t.Constraint.Name { 1441 if err := planner.removeFKBackReference(ctx, tableDesc, fk); err != nil { 1442 return err 1443 } 1444 tableDesc.OutboundFKs = append(tableDesc.OutboundFKs[:i], tableDesc.OutboundFKs[i+1:]...) 1445 break 1446 } 1447 } 1448 default: 1449 return errors.AssertionFailedf( 1450 "unsupported constraint type: %d", errors.Safe(t.Constraint.ConstraintType)) 1451 } 1452 1453 default: 1454 return errors.AssertionFailedf("unsupported mutation: %+v", m) 1455 } 1456 1457 } 1458 // TODO (lucy): This seems suspicious, since MakeMutationsComplete should 1459 // add unvalidated foreign keys, but we unconditionally add them above. Do 1460 // unvalidated FKs get added twice? 1461 if err := tableDesc.MakeMutationComplete(m); err != nil { 1462 return err 1463 } 1464 1465 // If the mutation we processed was a primary key swap, there is some 1466 // extra work that needs to be done. Note that we don't need to create 1467 // a job to clean up the dropped indexes because those mutations can 1468 // get processed in this txn on the new table. 1469 if pkSwap := m.GetPrimaryKeySwap(); pkSwap != nil { 1470 // If any old index had an interleaved parent, remove the 1471 // backreference from the parent. 1472 // N.B. This logic needs to be kept up to date with the 1473 // corresponding piece in (*SchemaChanger).done. It is slightly 1474 // different because of how it access tables and how it needs to 1475 // write the modified table descriptors explicitly. 1476 for _, idxID := range append( 1477 []sqlbase.IndexID{pkSwap.OldPrimaryIndexId}, pkSwap.OldIndexes...) { 1478 oldIndex, err := tableDesc.FindIndexByID(idxID) 1479 if err != nil { 1480 return err 1481 } 1482 if len(oldIndex.Interleave.Ancestors) != 0 { 1483 ancestorInfo := oldIndex.Interleave.Ancestors[len(oldIndex.Interleave.Ancestors)-1] 1484 ancestor, err := planner.Tables().GetMutableTableVersionByID(ctx, ancestorInfo.TableID, planner.txn) 1485 if err != nil { 1486 return err 1487 } 1488 ancestorIdx, err := ancestor.FindIndexByID(ancestorInfo.IndexID) 1489 if err != nil { 1490 return err 1491 } 1492 foundAncestor := false 1493 for k, ref := range ancestorIdx.InterleavedBy { 1494 if ref.Table == tableDesc.ID && ref.Index == oldIndex.ID { 1495 if foundAncestor { 1496 return errors.AssertionFailedf( 1497 "ancestor entry in %s for %s@%s found more than once", 1498 ancestor.Name, tableDesc.Name, oldIndex.Name) 1499 } 1500 ancestorIdx.InterleavedBy = append( 1501 ancestorIdx.InterleavedBy[:k], ancestorIdx.InterleavedBy[k+1:]...) 1502 foundAncestor = true 1503 if err := planner.writeSchemaChange(ctx, ancestor, sqlbase.InvalidMutationID, ""); err != nil { 1504 return err 1505 } 1506 } 1507 } 1508 } 1509 } 1510 } 1511 } 1512 tableDesc.Mutations = nil 1513 1514 // Now that the table descriptor is in a valid state with all column and index 1515 // mutations applied, it can be used for validating check constraints 1516 for _, c := range constraintsToValidate { 1517 switch c.ConstraintType { 1518 case sqlbase.ConstraintToUpdate_CHECK, sqlbase.ConstraintToUpdate_NOT_NULL: 1519 if err := validateCheckInTxn( 1520 ctx, planner.Tables().LeaseManager(), planner.EvalContext(), tableDesc, planner.txn, c.Check.Name, 1521 ); err != nil { 1522 return err 1523 } 1524 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 1525 // We can't support adding a validated foreign key constraint in the same 1526 // transaction as the CREATE TABLE statement. This would require adding 1527 // the backreference to the other table and then validating the constraint 1528 // for whatever rows were inserted into the referencing table in this 1529 // transaction, which requires multiple schema changer states across 1530 // multiple transactions. 1531 // TODO (lucy): Add a validation job that runs after the user transaction. 1532 // This won't roll back the original transaction if validation fails, but 1533 // it will at least leave the constraint in the Validated state if 1534 // validation succeeds. 1535 1536 // For now, revert the constraint to an unvalidated state. 1537 for i := range tableDesc.OutboundFKs { 1538 desc := &tableDesc.OutboundFKs[i] 1539 if desc.Name == c.ForeignKey.Name { 1540 desc.Validity = sqlbase.ConstraintValidity_Unvalidated 1541 break 1542 } 1543 } 1544 default: 1545 return errors.AssertionFailedf( 1546 "unsupported constraint type: %d", errors.Safe(c.ConstraintType)) 1547 } 1548 } 1549 return nil 1550 } 1551 1552 // validateCheckInTxn validates check constraints within the provided 1553 // transaction. If the provided table descriptor version is newer than the 1554 // cluster version, it will be used in the InternalExecutor that performs the 1555 // validation query. 1556 // 1557 // TODO (lucy): The special case where the table descriptor version is the same 1558 // as the cluster version only happens because the query in VALIDATE CONSTRAINT 1559 // still runs in the user transaction instead of a step in the schema changer. 1560 // When that's no longer true, this function should be updated. 1561 // 1562 // It operates entirely on the current goroutine and is thus able to 1563 // reuse an existing kv.Txn safely. 1564 func validateCheckInTxn( 1565 ctx context.Context, 1566 leaseMgr *lease.Manager, 1567 evalCtx *tree.EvalContext, 1568 tableDesc *MutableTableDescriptor, 1569 txn *kv.Txn, 1570 checkName string, 1571 ) error { 1572 ie := evalCtx.InternalExecutor.(*InternalExecutor) 1573 if tableDesc.Version > tableDesc.ClusterVersion.Version { 1574 newTc := descs.NewCollection(leaseMgr, evalCtx.Settings) 1575 // pretend that the schema has been modified. 1576 if err := newTc.AddUncommittedTable(*tableDesc); err != nil { 1577 return err 1578 } 1579 1580 ie.tcModifier = newTc 1581 defer func() { 1582 ie.tcModifier = nil 1583 }() 1584 } 1585 1586 check, err := tableDesc.FindCheckByName(checkName) 1587 if err != nil { 1588 return err 1589 } 1590 return validateCheckExpr(ctx, check.Expr, tableDesc.TableDesc(), ie, txn) 1591 } 1592 1593 // validateFkInTxn validates foreign key constraints within the provided 1594 // transaction. If the provided table descriptor version is newer than the 1595 // cluster version, it will be used in the InternalExecutor that performs the 1596 // validation query. 1597 // 1598 // TODO (lucy): The special case where the table descriptor version is the same 1599 // as the cluster version only happens because the query in VALIDATE CONSTRAINT 1600 // still runs in the user transaction instead of a step in the schema changer. 1601 // When that's no longer true, this function should be updated. 1602 // 1603 // It operates entirely on the current goroutine and is thus able to 1604 // reuse an existing kv.Txn safely. 1605 func validateFkInTxn( 1606 ctx context.Context, 1607 leaseMgr *lease.Manager, 1608 evalCtx *tree.EvalContext, 1609 tableDesc *MutableTableDescriptor, 1610 txn *kv.Txn, 1611 fkName string, 1612 ) error { 1613 ie := evalCtx.InternalExecutor.(*InternalExecutor) 1614 if tableDesc.Version > tableDesc.ClusterVersion.Version { 1615 newTc := descs.NewCollection(leaseMgr, evalCtx.Settings) 1616 // pretend that the schema has been modified. 1617 if err := newTc.AddUncommittedTable(*tableDesc); err != nil { 1618 return err 1619 } 1620 1621 ie.tcModifier = newTc 1622 defer func() { 1623 ie.tcModifier = nil 1624 }() 1625 } 1626 1627 var fk *sqlbase.ForeignKeyConstraint 1628 for i := range tableDesc.OutboundFKs { 1629 def := &tableDesc.OutboundFKs[i] 1630 if def.Name == fkName { 1631 fk = def 1632 break 1633 } 1634 } 1635 if fk == nil { 1636 return errors.AssertionFailedf("foreign key %s does not exist", fkName) 1637 } 1638 1639 return validateForeignKey(ctx, tableDesc.TableDesc(), fk, ie, txn, evalCtx.Codec) 1640 } 1641 1642 // columnBackfillInTxn backfills columns for all mutation columns in 1643 // the mutation list. 1644 // 1645 // It operates entirely on the current goroutine and is thus able to 1646 // reuse an existing kv.Txn safely. 1647 func columnBackfillInTxn( 1648 ctx context.Context, 1649 txn *kv.Txn, 1650 tc *descs.Collection, 1651 evalCtx *tree.EvalContext, 1652 tableDesc *sqlbase.ImmutableTableDescriptor, 1653 traceKV bool, 1654 ) error { 1655 // A column backfill in the ADD state is a noop. 1656 if tableDesc.Adding() { 1657 return nil 1658 } 1659 var backfiller backfill.ColumnBackfiller 1660 if err := backfiller.Init(ctx, evalCtx, tableDesc); err != nil { 1661 return err 1662 } 1663 // otherTableDescs contains any other table descriptors required by the 1664 // backfiller processor. 1665 var otherTableDescs []*sqlbase.ImmutableTableDescriptor 1666 fkTables, err := row.MakeFkMetadata( 1667 ctx, 1668 tableDesc, 1669 row.CheckUpdates, 1670 row.NoLookup, 1671 row.NoCheckPrivilege, 1672 nil, /* AnalyzeExprFunction */ 1673 nil, /* CheckHelper */ 1674 ) 1675 if err != nil { 1676 return err 1677 } 1678 // All the FKs here are guaranteed to be created in the same transaction 1679 // or else this table would be created in the ADD state. 1680 for k := range fkTables { 1681 t := tc.GetUncommittedTableByID(k) 1682 if (descs.UncommittedTable{}) == t || !t.IsNewTable() { 1683 return errors.AssertionFailedf( 1684 "table %s not created in the same transaction as id = %d", tableDesc.Name, k) 1685 } 1686 otherTableDescs = append(otherTableDescs, t.ImmutableTableDescriptor) 1687 } 1688 sp := tableDesc.PrimaryIndexSpan(evalCtx.Codec) 1689 for sp.Key != nil { 1690 var err error 1691 sp.Key, err = backfiller.RunColumnBackfillChunk(ctx, 1692 txn, tableDesc, otherTableDescs, sp, columnTruncateAndBackfillChunkSize, 1693 false /*alsoCommit*/, traceKV) 1694 if err != nil { 1695 return err 1696 } 1697 } 1698 return nil 1699 } 1700 1701 // indexBackfillInTxn runs one chunk of the index backfill on the 1702 // primary index. 1703 // 1704 // It operates entirely on the current goroutine and is thus able to 1705 // reuse an existing kv.Txn safely. 1706 func indexBackfillInTxn( 1707 ctx context.Context, 1708 txn *kv.Txn, 1709 evalCtx *tree.EvalContext, 1710 tableDesc *sqlbase.ImmutableTableDescriptor, 1711 traceKV bool, 1712 ) error { 1713 var backfiller backfill.IndexBackfiller 1714 if err := backfiller.Init(evalCtx, tableDesc); err != nil { 1715 return err 1716 } 1717 sp := tableDesc.PrimaryIndexSpan(evalCtx.Codec) 1718 for sp.Key != nil { 1719 var err error 1720 sp.Key, err = backfiller.RunIndexBackfillChunk(ctx, 1721 txn, tableDesc, sp, indexTxnBackfillChunkSize, false /* alsoCommit */, traceKV) 1722 if err != nil { 1723 return err 1724 } 1725 } 1726 return nil 1727 } 1728 1729 // indexTruncateInTxn deletes an index from a table. 1730 // It operates entirely on the current goroutine and is thus able to 1731 // reuse an existing kv.Txn safely. 1732 func indexTruncateInTxn( 1733 ctx context.Context, 1734 txn *kv.Txn, 1735 execCfg *ExecutorConfig, 1736 evalCtx *tree.EvalContext, 1737 tableDesc *sqlbase.ImmutableTableDescriptor, 1738 idx *sqlbase.IndexDescriptor, 1739 traceKV bool, 1740 ) error { 1741 alloc := &sqlbase.DatumAlloc{} 1742 var sp roachpb.Span 1743 for done := false; !done; done = sp.Key == nil { 1744 rd, err := row.MakeDeleter( 1745 ctx, txn, execCfg.Codec, tableDesc, nil, nil, row.SkipFKs, evalCtx, alloc, 1746 ) 1747 if err != nil { 1748 return err 1749 } 1750 td := tableDeleter{rd: rd, alloc: alloc} 1751 if err := td.init(ctx, txn, evalCtx); err != nil { 1752 return err 1753 } 1754 sp, err = td.deleteIndex( 1755 ctx, idx, sp, indexTruncateChunkSize, traceKV, 1756 ) 1757 if err != nil { 1758 return err 1759 } 1760 } 1761 // Remove index zone configs. 1762 return RemoveIndexZoneConfigs(ctx, txn, execCfg, tableDesc.ID, []sqlbase.IndexDescriptor{*idx}) 1763 }