github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/schema_changer.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "strings" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/base" 21 "github.com/cockroachdb/cockroach/pkg/jobs" 22 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 23 "github.com/cockroachdb/cockroach/pkg/kv" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/security" 27 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 28 "github.com/cockroachdb/cockroach/pkg/sql/catalog/lease" 29 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 30 "github.com/cockroachdb/cockroach/pkg/sql/parser" 31 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 32 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 33 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 34 "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" 35 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 36 "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" 37 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 38 "github.com/cockroachdb/cockroach/pkg/util/grpcutil" 39 "github.com/cockroachdb/cockroach/pkg/util/hlc" 40 "github.com/cockroachdb/cockroach/pkg/util/log" 41 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 42 "github.com/cockroachdb/cockroach/pkg/util/retry" 43 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 44 "github.com/cockroachdb/errors" 45 "github.com/cockroachdb/logtags" 46 ) 47 48 const ( 49 // RunningStatusDrainingNames used to indicate that the job was draining names 50 // for dropped descriptors. This constant is now deprecated and only exists 51 // to be used for migrating old jobs. 52 RunningStatusDrainingNames jobs.RunningStatus = "draining names" 53 // RunningStatusWaitingGC is for jobs that are currently in progress and 54 // are waiting for the GC interval to expire 55 RunningStatusWaitingGC jobs.RunningStatus = "waiting for GC TTL" 56 // RunningStatusDeleteOnly is for jobs that are currently waiting on 57 // the cluster to converge to seeing the schema element in the DELETE_ONLY 58 // state. 59 RunningStatusDeleteOnly jobs.RunningStatus = "waiting in DELETE-ONLY" 60 // RunningStatusDeleteAndWriteOnly is for jobs that are currently waiting on 61 // the cluster to converge to seeing the schema element in the 62 // DELETE_AND_WRITE_ONLY state. 63 RunningStatusDeleteAndWriteOnly jobs.RunningStatus = "waiting in DELETE-AND-WRITE_ONLY" 64 // RunningStatusBackfill is for jobs that are currently running a backfill 65 // for a schema element. 66 RunningStatusBackfill jobs.RunningStatus = "populating schema" 67 // RunningStatusValidation is for jobs that are currently validating 68 // a schema element. 69 RunningStatusValidation jobs.RunningStatus = "validating schema" 70 ) 71 72 // SchemaChanger is used to change the schema on a table. 73 type SchemaChanger struct { 74 tableID sqlbase.ID 75 mutationID sqlbase.MutationID 76 droppedDatabaseID sqlbase.ID 77 sqlInstanceID base.SQLInstanceID 78 db *kv.DB 79 leaseMgr *lease.Manager 80 81 testingKnobs *SchemaChangerTestingKnobs 82 distSQLPlanner *DistSQLPlanner 83 jobRegistry *jobs.Registry 84 // Keep a reference to the job related to this schema change 85 // so that we don't need to read the job again while updating 86 // the status of the job. 87 job *jobs.Job 88 // Caches updated by DistSQL. 89 rangeDescriptorCache *kvcoord.RangeDescriptorCache 90 leaseHolderCache *kvcoord.LeaseHolderCache 91 clock *hlc.Clock 92 settings *cluster.Settings 93 execCfg *ExecutorConfig 94 ieFactory sqlutil.SessionBoundInternalExecutorFactory 95 } 96 97 // NewSchemaChangerForTesting only for tests. 98 func NewSchemaChangerForTesting( 99 tableID sqlbase.ID, 100 mutationID sqlbase.MutationID, 101 sqlInstanceID base.SQLInstanceID, 102 db kv.DB, 103 leaseMgr *lease.Manager, 104 jobRegistry *jobs.Registry, 105 execCfg *ExecutorConfig, 106 settings *cluster.Settings, 107 ) SchemaChanger { 108 return SchemaChanger{ 109 tableID: tableID, 110 mutationID: mutationID, 111 sqlInstanceID: sqlInstanceID, 112 db: &db, 113 leaseMgr: leaseMgr, 114 jobRegistry: jobRegistry, 115 settings: settings, 116 execCfg: execCfg, 117 } 118 } 119 120 // isPermanentSchemaChangeError returns true if the error results in 121 // a permanent failure of a schema change. This function is a whitelist 122 // instead of a blacklist: only known safe errors are confirmed to not be 123 // permanent errors. Anything unknown is assumed to be permanent. 124 func isPermanentSchemaChangeError(err error) bool { 125 if err == nil { 126 return false 127 } 128 129 if grpcutil.IsClosedConnection(err) { 130 return false 131 } 132 133 // Ignore error thrown because of a read at a very old timestamp. 134 // The Backfill will grab a new timestamp to read at for the rest 135 // of the backfill. 136 // TODO(knz): this should really use errors.Is(). However until/unless 137 // we are not receiving errors from 19.1 any more, a string 138 // comparison must remain. 139 if strings.Contains(err.Error(), "must be after replica GC threshold") { 140 return false 141 } 142 143 if pgerror.IsSQLRetryableError(err) { 144 return false 145 } 146 147 if errors.IsAny(err, 148 context.Canceled, 149 context.DeadlineExceeded, 150 errExistingSchemaChangeLease, 151 errExpiredSchemaChangeLease, 152 errNotHitGCTTLDeadline, 153 errSchemaChangeDuringDrain, 154 errSchemaChangeNotFirstInLine, 155 errTableVersionMismatchSentinel, 156 ) { 157 return false 158 } 159 160 switch pgerror.GetPGCode(err) { 161 case pgcode.SerializationFailure, pgcode.InternalConnectionFailure, pgcode.DeprecatedInternalConnectionFailure: 162 return false 163 164 case pgcode.Internal, pgcode.RangeUnavailable, pgcode.DeprecatedRangeUnavailable: 165 if strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { 166 return false 167 } 168 } 169 170 return true 171 } 172 173 var ( 174 errExistingSchemaChangeLease = errors.Newf("an outstanding schema change lease exists") 175 errExpiredSchemaChangeLease = errors.Newf("the schema change lease has expired") 176 errSchemaChangeNotFirstInLine = errors.Newf("schema change not first in line") 177 errNotHitGCTTLDeadline = errors.Newf("not hit gc ttl deadline") 178 errSchemaChangeDuringDrain = errors.Newf("a schema change ran during the drain phase, re-increment") 179 ) 180 181 type errTableVersionMismatch struct { 182 version sqlbase.DescriptorVersion 183 expected sqlbase.DescriptorVersion 184 } 185 186 var errTableVersionMismatchSentinel = errTableVersionMismatch{} 187 188 func makeErrTableVersionMismatch(version, expected sqlbase.DescriptorVersion) error { 189 return errors.Mark(errors.WithStack(errTableVersionMismatch{ 190 version: version, 191 expected: expected, 192 }), errTableVersionMismatchSentinel) 193 } 194 195 func (e errTableVersionMismatch) Error() string { 196 return fmt.Sprintf("table version mismatch: %d, expected: %d", e.version, e.expected) 197 } 198 199 // maybe backfill a created table by executing the AS query. Return nil if 200 // successfully backfilled. 201 // 202 // Note that this does not connect to the tracing settings of the 203 // surrounding SQL transaction. This should be OK as (at the time of 204 // this writing) this code path is only used for standalone CREATE 205 // TABLE AS statements, which cannot be traced. 206 func (sc *SchemaChanger) maybeBackfillCreateTableAs( 207 ctx context.Context, table *sqlbase.TableDescriptor, 208 ) error { 209 if !(table.Adding() && table.IsAs()) { 210 return nil 211 } 212 213 return sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 214 txn.SetFixedTimestamp(ctx, table.CreateAsOfTime) 215 216 // Create an internal planner as the planner used to serve the user query 217 // would have committed by this point. 218 p, cleanup := NewInternalPlanner("ctasBackfill", txn, security.RootUser, &MemoryMetrics{}, sc.execCfg) 219 defer cleanup() 220 localPlanner := p.(*planner) 221 stmt, err := parser.ParseOne(table.CreateQuery) 222 if err != nil { 223 return err 224 } 225 226 // Construct an optimized logical plan of the AS source stmt. 227 localPlanner.stmt = &Statement{Statement: stmt} 228 localPlanner.optPlanningCtx.init(localPlanner) 229 230 localPlanner.runWithOptions(resolveFlags{skipCache: true}, func() { 231 err = localPlanner.makeOptimizerPlan(ctx) 232 }) 233 234 if err != nil { 235 return err 236 } 237 defer localPlanner.curPlan.close(ctx) 238 239 res := roachpb.BulkOpSummary{} 240 rw := newCallbackResultWriter(func(ctx context.Context, row tree.Datums) error { 241 // TODO(adityamaru): Use the BulkOpSummary for either telemetry or to 242 // return to user. 243 var counts roachpb.BulkOpSummary 244 if err := protoutil.Unmarshal([]byte(*row[0].(*tree.DBytes)), &counts); err != nil { 245 return err 246 } 247 res.Add(counts) 248 return nil 249 }) 250 recv := MakeDistSQLReceiver( 251 ctx, 252 rw, 253 tree.Rows, 254 sc.execCfg.RangeDescriptorCache, 255 sc.execCfg.LeaseHolderCache, 256 txn, 257 func(ts hlc.Timestamp) { 258 sc.clock.Update(ts) 259 }, 260 // Make a session tracing object on-the-fly. This is OK 261 // because it sets "enabled: false" and thus none of the 262 // other fields are used. 263 &SessionTracing{}, 264 ) 265 defer recv.Release() 266 267 willDistribute := willDistributePlan( 268 ctx, localPlanner.execCfg.NodeID, 269 localPlanner.extendedEvalCtx.SessionData.DistSQLMode, 270 localPlanner.curPlan.main, 271 ) 272 var planAndRunErr error 273 localPlanner.runWithOptions(resolveFlags{skipCache: true}, func() { 274 // Resolve subqueries before running the queries' physical plan. 275 if len(localPlanner.curPlan.subqueryPlans) != 0 { 276 if !sc.distSQLPlanner.PlanAndRunSubqueries( 277 ctx, localPlanner, localPlanner.ExtendedEvalContextCopy, 278 localPlanner.curPlan.subqueryPlans, recv, willDistribute, 279 ) { 280 if planAndRunErr = rw.Err(); planAndRunErr != nil { 281 return 282 } 283 if planAndRunErr = recv.commErr; planAndRunErr != nil { 284 return 285 } 286 } 287 } 288 289 isLocal := !willDistribute 290 out := execinfrapb.ProcessorCoreUnion{BulkRowWriter: &execinfrapb.BulkRowWriterSpec{ 291 Table: *table, 292 }} 293 294 PlanAndRunCTAS(ctx, sc.distSQLPlanner, localPlanner, 295 txn, isLocal, localPlanner.curPlan.main, out, recv) 296 if planAndRunErr = rw.Err(); planAndRunErr != nil { 297 return 298 } 299 if planAndRunErr = recv.commErr; planAndRunErr != nil { 300 return 301 } 302 }) 303 304 return planAndRunErr 305 }) 306 } 307 308 // maybe make a table PUBLIC if it's in the ADD state. 309 func (sc *SchemaChanger) maybeMakeAddTablePublic( 310 ctx context.Context, table *sqlbase.TableDescriptor, 311 ) error { 312 if table.Adding() { 313 fks := table.AllActiveAndInactiveForeignKeys() 314 for _, fk := range fks { 315 if err := sc.waitToUpdateLeases(ctx, fk.ReferencedTableID); err != nil { 316 return err 317 } 318 } 319 320 if _, err := sc.leaseMgr.Publish( 321 ctx, 322 table.ID, 323 func(tbl *sqlbase.MutableTableDescriptor) error { 324 if !tbl.Adding() { 325 return lease.ErrDidntUpdateDescriptor 326 } 327 tbl.State = sqlbase.TableDescriptor_PUBLIC 328 return nil 329 }, 330 func(txn *kv.Txn) error { return nil }, 331 ); err != nil { 332 return err 333 } 334 } 335 336 return nil 337 } 338 339 // Drain old names from the cluster. 340 func (sc *SchemaChanger) drainNames(ctx context.Context) error { 341 // Publish a new version with all the names drained after everyone 342 // has seen the version with the new name. All the draining names 343 // can be reused henceforth. 344 var namesToReclaim []sqlbase.TableDescriptor_NameInfo 345 _, err := sc.leaseMgr.Publish( 346 ctx, 347 sc.tableID, 348 func(desc *sqlbase.MutableTableDescriptor) error { 349 if sc.testingKnobs.OldNamesDrainedNotification != nil { 350 sc.testingKnobs.OldNamesDrainedNotification() 351 } 352 // Free up the old name(s) for reuse. 353 namesToReclaim = desc.DrainingNames 354 desc.DrainingNames = nil 355 return nil 356 }, 357 // Reclaim all the old names. 358 func(txn *kv.Txn) error { 359 b := txn.NewBatch() 360 for _, drain := range namesToReclaim { 361 err := sqlbase.RemoveObjectNamespaceEntry( 362 ctx, txn, sc.execCfg.Codec, drain.ParentID, drain.ParentSchemaID, drain.Name, false, /* KVTrace */ 363 ) 364 if err != nil { 365 return err 366 } 367 } 368 return txn.Run(ctx, b) 369 }, 370 ) 371 return err 372 } 373 374 func startGCJob( 375 ctx context.Context, 376 db *kv.DB, 377 jobRegistry *jobs.Registry, 378 username string, 379 schemaChangeDescription string, 380 details jobspb.SchemaChangeGCDetails, 381 ) error { 382 var sj *jobs.StartableJob 383 jobRecord := CreateGCJobRecord(schemaChangeDescription, username, details) 384 if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 385 var err error 386 if sj, err = jobRegistry.CreateStartableJobWithTxn(ctx, jobRecord, txn, nil /* resultCh */); err != nil { 387 return err 388 } 389 return nil 390 }); err != nil { 391 return err 392 } 393 if _, err := sj.Start(ctx); err != nil { 394 return err 395 } 396 return nil 397 } 398 399 // Execute the entire schema change in steps. 400 // inSession is set to false when this is called from the asynchronous 401 // schema change execution path. 402 // 403 // If the txn that queued the schema changer did not commit, this will be a 404 // no-op, as we'll fail to find the job for our mutation in the jobs registry. 405 func (sc *SchemaChanger) exec(ctx context.Context) error { 406 ctx = logtags.AddTag(ctx, "scExec", nil) 407 408 // TODO (lucy): Now that marking a schema change job as succeeded doesn't 409 // happen in the same transaction as removing mutations from a table 410 // descriptor, it seems possible for a job to be resumed after the mutation 411 // has already been removed. If there's a mutation provided, we should check 412 // whether it actually exists on the table descriptor and exit the job if not. 413 tableDesc, notFirst, err := sc.notFirstInLine(ctx) 414 if err != nil { 415 return err 416 } 417 if notFirst { 418 log.Infof(ctx, 419 "schema change on %s (%d v%d) mutation %d: another change is still in progress", 420 tableDesc.Name, sc.tableID, tableDesc.Version, sc.mutationID, 421 ) 422 return errSchemaChangeNotFirstInLine 423 } 424 425 log.Infof(ctx, 426 "schema change on %s (%d v%d) mutation %d starting execution...", 427 tableDesc.Name, sc.tableID, tableDesc.Version, sc.mutationID, 428 ) 429 430 if tableDesc.HasDrainingNames() { 431 if err := sc.drainNames(ctx); err != nil { 432 return err 433 } 434 } 435 436 if tableDesc.Dropped() && sc.droppedDatabaseID == sqlbase.InvalidID { 437 // We've dropped this table, let's kick off a GC job. 438 dropTime := timeutil.Now().UnixNano() 439 if tableDesc.DropTime > 0 { 440 dropTime = tableDesc.DropTime 441 } 442 gcDetails := jobspb.SchemaChangeGCDetails{ 443 Tables: []jobspb.SchemaChangeGCDetails_DroppedID{ 444 { 445 ID: tableDesc.ID, 446 DropTime: dropTime, 447 }, 448 }, 449 } 450 if err := startGCJob( 451 ctx, sc.db, sc.jobRegistry, sc.job.Payload().Username, sc.job.Payload().Description, gcDetails, 452 ); err != nil { 453 return err 454 } 455 } 456 457 if err := sc.maybeBackfillCreateTableAs(ctx, tableDesc); err != nil { 458 return err 459 } 460 461 if err := sc.maybeMakeAddTablePublic(ctx, tableDesc); err != nil { 462 return err 463 } 464 465 // Wait for the schema change to propagate to all nodes after this function 466 // returns, so that the new schema is live everywhere. This is not needed for 467 // correctness but is done to make the UI experience/tests predictable. 468 waitToUpdateLeases := func(refreshStats bool) error { 469 if err := sc.waitToUpdateLeases(ctx, sc.tableID); err != nil { 470 if errors.Is(err, sqlbase.ErrDescriptorNotFound) { 471 return err 472 } 473 log.Warningf(ctx, "waiting to update leases: %+v", err) 474 // As we are dismissing the error, go through the recording motions. 475 // This ensures that any important error gets reported to Sentry, etc. 476 sqltelemetry.RecordError(ctx, err, &sc.settings.SV) 477 } 478 // We wait to trigger a stats refresh until we know the leases have been 479 // updated. 480 if refreshStats { 481 sc.refreshStats() 482 } 483 return nil 484 } 485 486 if sc.mutationID == sqlbase.InvalidMutationID { 487 // Nothing more to do. 488 isCreateTableAs := tableDesc.Adding() && tableDesc.IsAs() 489 return waitToUpdateLeases(isCreateTableAs /* refreshStats */) 490 } 491 492 if err := sc.initJobRunningStatus(ctx); err != nil { 493 if log.V(2) { 494 log.Infof(ctx, "failed to update job status: %+v", err) 495 } 496 // Go through the recording motions. See comment above. 497 sqltelemetry.RecordError(ctx, err, &sc.settings.SV) 498 } 499 500 // Run through mutation state machine and backfill. 501 err = sc.runStateMachineAndBackfill(ctx) 502 if err != nil { 503 return err 504 } 505 506 defer func() { 507 if err := waitToUpdateLeases(err == nil /* refreshStats */); err != nil && !errors.Is(err, sqlbase.ErrDescriptorNotFound) { 508 // We only expect ErrDescriptorNotFound to be returned. This happens 509 // when the table descriptor was deleted. We can ignore this error. 510 511 log.Warningf(ctx, "unexpected error while waiting for leases to update: %+v", err) 512 // As we are dismissing the error, go through the recording motions. 513 // This ensures that any important error gets reported to Sentry, etc. 514 sqltelemetry.RecordError(ctx, err, &sc.settings.SV) 515 } 516 }() 517 518 return err 519 } 520 521 // handlePermanentSchemaChangeError cleans up schema changes that cannot 522 // be completed successfully. For schema changes with mutations, it reverses the 523 // direction of the mutations so that we can step through the state machine 524 // backwards. Note that schema changes which don't have mutations are meant to 525 // run quickly and aren't truly cancellable in the small window they require to 526 // complete. In that case, cleanup consists of simply resuming the same schema 527 // change. 528 // TODO (lucy): This is how "rolling back" has always worked for non-mutation 529 // schema change jobs, but it's unnatural for the job API and we should rethink 530 // it. 531 func (sc *SchemaChanger) handlePermanentSchemaChangeError( 532 ctx context.Context, err error, evalCtx *extendedEvalContext, 533 ) error { 534 if rollbackErr := sc.rollbackSchemaChange(ctx, err); rollbackErr != nil { 535 // From now on, the returned error will be a secondary error of the returned 536 // error, so we'll record the original error now. 537 secondary := errors.Wrap(err, "original error when rolling back mutations") 538 sqltelemetry.RecordError(ctx, secondary, &sc.settings.SV) 539 return errors.WithSecondaryError(rollbackErr, secondary) 540 } 541 542 // TODO (lucy): This is almost the same as in exec(), maybe refactor. 543 // Wait for the schema change to propagate to all nodes after this function 544 // returns, so that the new schema is live everywhere. This is not needed for 545 // correctness but is done to make the UI experience/tests predictable. 546 waitToUpdateLeases := func(refreshStats bool) error { 547 if err := sc.waitToUpdateLeases(ctx, sc.tableID); err != nil { 548 if errors.Is(err, sqlbase.ErrDescriptorNotFound) { 549 return err 550 } 551 log.Warningf(ctx, "waiting to update leases: %+v", err) 552 // As we are dismissing the error, go through the recording motions. 553 // This ensures that any important error gets reported to Sentry, etc. 554 sqltelemetry.RecordError(ctx, err, &sc.settings.SV) 555 } 556 // We wait to trigger a stats refresh until we know the leases have been 557 // updated. 558 if refreshStats { 559 sc.refreshStats() 560 } 561 return nil 562 } 563 564 defer func() { 565 if err := waitToUpdateLeases(false /* refreshStats */); err != nil && !errors.Is(err, sqlbase.ErrDescriptorNotFound) { 566 // We only expect ErrDescriptorNotFound to be returned. This happens 567 // when the table descriptor was deleted. We can ignore this error. 568 569 log.Warningf(ctx, "unexpected error while waiting for leases to update: %+v", err) 570 // As we are dismissing the error, go through the recording motions. 571 // This ensures that any important error gets reported to Sentry, etc. 572 sqltelemetry.RecordError(ctx, err, &sc.settings.SV) 573 } 574 }() 575 576 return nil 577 } 578 579 // initialize the job running status. 580 func (sc *SchemaChanger) initJobRunningStatus(ctx context.Context) error { 581 return sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 582 desc, err := sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 583 if err != nil { 584 return err 585 } 586 587 var runStatus jobs.RunningStatus 588 for _, mutation := range desc.Mutations { 589 if mutation.MutationID != sc.mutationID { 590 // Mutations are applied in a FIFO order. Only apply the first set of 591 // mutations if they have the mutation ID we're looking for. 592 break 593 } 594 595 switch mutation.Direction { 596 case sqlbase.DescriptorMutation_ADD: 597 switch mutation.State { 598 case sqlbase.DescriptorMutation_DELETE_ONLY: 599 runStatus = RunningStatusDeleteOnly 600 } 601 602 case sqlbase.DescriptorMutation_DROP: 603 switch mutation.State { 604 case sqlbase.DescriptorMutation_DELETE_AND_WRITE_ONLY: 605 runStatus = RunningStatusDeleteAndWriteOnly 606 } 607 } 608 } 609 if runStatus != "" && !desc.Dropped() { 610 if err := sc.job.WithTxn(txn).RunningStatus( 611 ctx, func(ctx context.Context, details jobspb.Details) (jobs.RunningStatus, error) { 612 return runStatus, nil 613 }); err != nil { 614 return errors.Wrapf(err, "failed to update job status") 615 } 616 } 617 return nil 618 }) 619 } 620 621 func (sc *SchemaChanger) rollbackSchemaChange(ctx context.Context, err error) error { 622 log.Warningf(ctx, "reversing schema change %d due to irrecoverable error: %s", *sc.job.ID(), err) 623 if errReverse := sc.maybeReverseMutations(ctx, err); errReverse != nil { 624 return errReverse 625 } 626 627 if fn := sc.testingKnobs.RunAfterMutationReversal; fn != nil { 628 if err := fn(*sc.job.ID()); err != nil { 629 return err 630 } 631 } 632 633 // After this point the schema change has been reversed and any retry 634 // of the schema change will act upon the reversed schema change. 635 return sc.runStateMachineAndBackfill(ctx) 636 } 637 638 // RunStateMachineBeforeBackfill moves the state machine forward 639 // and wait to ensure that all nodes are seeing the latest version 640 // of the table. 641 func (sc *SchemaChanger) RunStateMachineBeforeBackfill(ctx context.Context) error { 642 var runStatus jobs.RunningStatus 643 if _, err := sc.leaseMgr.Publish(ctx, sc.tableID, func(desc *sqlbase.MutableTableDescriptor) error { 644 645 runStatus = "" 646 // Apply mutations belonging to the same version. 647 for i, mutation := range desc.Mutations { 648 if mutation.MutationID != sc.mutationID { 649 // Mutations are applied in a FIFO order. Only apply the first set of 650 // mutations if they have the mutation ID we're looking for. 651 break 652 } 653 switch mutation.Direction { 654 case sqlbase.DescriptorMutation_ADD: 655 switch mutation.State { 656 case sqlbase.DescriptorMutation_DELETE_ONLY: 657 // TODO(vivek): while moving up the state is appropriate, 658 // it will be better to run the backfill of a unique index 659 // twice: once in the DELETE_ONLY state to confirm that 660 // the index can indeed be created, and subsequently in the 661 // DELETE_AND_WRITE_ONLY state to fill in the missing elements of the 662 // index (INSERT and UPDATE that happened in the interim). 663 desc.Mutations[i].State = sqlbase.DescriptorMutation_DELETE_AND_WRITE_ONLY 664 runStatus = RunningStatusDeleteAndWriteOnly 665 666 case sqlbase.DescriptorMutation_DELETE_AND_WRITE_ONLY: 667 // The state change has already moved forward. 668 } 669 670 case sqlbase.DescriptorMutation_DROP: 671 switch mutation.State { 672 case sqlbase.DescriptorMutation_DELETE_ONLY: 673 // The state change has already moved forward. 674 675 case sqlbase.DescriptorMutation_DELETE_AND_WRITE_ONLY: 676 desc.Mutations[i].State = sqlbase.DescriptorMutation_DELETE_ONLY 677 runStatus = RunningStatusDeleteOnly 678 } 679 } 680 } 681 if doNothing := runStatus == "" || desc.Dropped(); doNothing { 682 // Return error so that Publish() doesn't increment the version. 683 return lease.ErrDidntUpdateDescriptor 684 } 685 return nil 686 }, func(txn *kv.Txn) error { 687 if sc.job != nil { 688 if err := sc.job.WithTxn(txn).RunningStatus(ctx, func(ctx context.Context, details jobspb.Details) (jobs.RunningStatus, error) { 689 return runStatus, nil 690 }); err != nil { 691 return errors.Wrap(err, "failed to update job status") 692 } 693 } 694 return nil 695 }); err != nil { 696 return err 697 } 698 699 // wait for the state change to propagate to all leases. 700 return sc.waitToUpdateLeases(ctx, sc.tableID) 701 } 702 703 // Wait until the entire cluster has been updated to the latest version 704 // of the table descriptor. 705 func (sc *SchemaChanger) waitToUpdateLeases(ctx context.Context, tableID sqlbase.ID) error { 706 // Aggressively retry because there might be a user waiting for the 707 // schema change to complete. 708 retryOpts := retry.Options{ 709 InitialBackoff: 20 * time.Millisecond, 710 MaxBackoff: 200 * time.Millisecond, 711 Multiplier: 2, 712 } 713 log.Infof(ctx, "waiting for a single version of table %d...", tableID) 714 version, err := sc.leaseMgr.WaitForOneVersion(ctx, tableID, retryOpts) 715 log.Infof(ctx, "waiting for a single version of table %d... done (at v %d)", tableID, version) 716 return err 717 } 718 719 // done finalizes the mutations (adds new cols/indexes to the table). 720 // It ensures that all nodes are on the current (pre-update) version of the 721 // schema. 722 // It also kicks off GC jobs as needed. 723 // Returns the updated descriptor. 724 func (sc *SchemaChanger) done(ctx context.Context) (*sqlbase.ImmutableTableDescriptor, error) { 725 isRollback := false 726 727 // Get the other tables whose foreign key backreferences need to be removed. 728 // We make a call to PublishMultiple to handle the situation to add Foreign Key backreferences. 729 var fksByBackrefTable map[sqlbase.ID][]*sqlbase.ConstraintToUpdate 730 var interleaveParents map[sqlbase.ID]struct{} 731 err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 732 fksByBackrefTable = make(map[sqlbase.ID][]*sqlbase.ConstraintToUpdate) 733 interleaveParents = make(map[sqlbase.ID]struct{}) 734 735 desc, err := sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 736 if err != nil { 737 return err 738 } 739 for _, mutation := range desc.Mutations { 740 if mutation.MutationID != sc.mutationID { 741 break 742 } 743 if constraint := mutation.GetConstraint(); constraint != nil && 744 constraint.ConstraintType == sqlbase.ConstraintToUpdate_FOREIGN_KEY && 745 mutation.Direction == sqlbase.DescriptorMutation_ADD && 746 constraint.ForeignKey.Validity == sqlbase.ConstraintValidity_Unvalidated { 747 // Add backref table to referenced table with an unvalidated foreign key constraint 748 fk := &constraint.ForeignKey 749 if fk.ReferencedTableID != desc.ID { 750 fksByBackrefTable[constraint.ForeignKey.ReferencedTableID] = append(fksByBackrefTable[constraint.ForeignKey.ReferencedTableID], constraint) 751 } 752 } else if swap := mutation.GetPrimaryKeySwap(); swap != nil { 753 // If any old indexes (including the old primary index) being rewritten are interleaved 754 // children, we will have to update their parents as well. 755 for _, idxID := range append([]sqlbase.IndexID{swap.OldPrimaryIndexId}, swap.OldIndexes...) { 756 oldIndex, err := desc.FindIndexByID(idxID) 757 if err != nil { 758 return err 759 } 760 if len(oldIndex.Interleave.Ancestors) != 0 { 761 ancestor := oldIndex.Interleave.Ancestors[len(oldIndex.Interleave.Ancestors)-1] 762 if ancestor.TableID != desc.ID { 763 interleaveParents[ancestor.TableID] = struct{}{} 764 } 765 } 766 } 767 // Because we are not currently supporting primary key changes on tables/indexes 768 // that are interleaved parents, we don't check oldPrimaryIndex.InterleavedBy. 769 } 770 } 771 return nil 772 }) 773 if err != nil { 774 return nil, err 775 } 776 tableIDsToUpdate := make([]sqlbase.ID, 0, len(fksByBackrefTable)+1) 777 tableIDsToUpdate = append(tableIDsToUpdate, sc.tableID) 778 for id := range fksByBackrefTable { 779 tableIDsToUpdate = append(tableIDsToUpdate, id) 780 } 781 for id := range interleaveParents { 782 if _, ok := fksByBackrefTable[id]; !ok { 783 tableIDsToUpdate = append(tableIDsToUpdate, id) 784 } 785 } 786 787 // Jobs (for GC, etc.) that need to be started immediately after the table 788 // descriptor updates are published. 789 var childJobs []*jobs.StartableJob 790 update := func(txn *kv.Txn, descs map[sqlbase.ID]*sqlbase.MutableTableDescriptor) error { 791 // Reset vars here because update function can be called multiple times in a retry. 792 isRollback = false 793 childJobs = nil 794 795 i := 0 796 scDesc, ok := descs[sc.tableID] 797 if !ok { 798 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 799 } 800 801 for _, mutation := range scDesc.Mutations { 802 if mutation.MutationID != sc.mutationID { 803 // Mutations are applied in a FIFO order. Only apply the first set of 804 // mutations if they have the mutation ID we're looking for. 805 break 806 } 807 isRollback = mutation.Rollback 808 if indexDesc := mutation.GetIndex(); mutation.Direction == sqlbase.DescriptorMutation_DROP && 809 indexDesc != nil { 810 if canClearRangeForDrop(indexDesc) { 811 // how we keep track of dropped index names (for, e.g., zone config 812 // lookups), even though in the absence of a GC job there's nothing to 813 // clean them up. 814 scDesc.GCMutations = append( 815 scDesc.GCMutations, 816 sqlbase.TableDescriptor_GCDescriptorMutation{ 817 IndexID: indexDesc.ID, 818 }) 819 820 dropTime := timeutil.Now().UnixNano() 821 indexGCDetails := jobspb.SchemaChangeGCDetails{ 822 Indexes: []jobspb.SchemaChangeGCDetails_DroppedIndex{ 823 { 824 IndexID: indexDesc.ID, 825 DropTime: dropTime, 826 }, 827 }, 828 ParentID: sc.tableID, 829 } 830 831 description := sc.job.Payload().Description 832 if isRollback { 833 description = "ROLLBACK of " + description 834 } 835 gcJobRecord := CreateGCJobRecord(description, sc.job.Payload().Username, indexGCDetails) 836 indexGCJob, err := sc.jobRegistry.CreateStartableJobWithTxn(ctx, gcJobRecord, txn, nil /* resultsCh */) 837 if err != nil { 838 return err 839 } 840 log.VEventf(ctx, 2, "created index GC job %d", *indexGCJob.ID()) 841 childJobs = append(childJobs, indexGCJob) 842 } 843 } 844 if constraint := mutation.GetConstraint(); constraint != nil && 845 constraint.ConstraintType == sqlbase.ConstraintToUpdate_FOREIGN_KEY && 846 mutation.Direction == sqlbase.DescriptorMutation_ADD && 847 constraint.ForeignKey.Validity == sqlbase.ConstraintValidity_Unvalidated { 848 // Add backreference on the referenced table (which could be the same table) 849 backrefTable, ok := descs[constraint.ForeignKey.ReferencedTableID] 850 if !ok { 851 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 852 } 853 backrefTable.InboundFKs = append(backrefTable.InboundFKs, constraint.ForeignKey) 854 } 855 856 // Some primary key change specific operations need to happen before 857 // and after the index swap occurs. 858 if pkSwap := mutation.GetPrimaryKeySwap(); pkSwap != nil { 859 // We might have to update some zone configs for indexes that are 860 // being rewritten. It is important that this is done _before_ the 861 // index swap occurs. The logic that generates spans for subzone 862 // configurations removes spans for indexes in the dropping state, 863 // which we don't want. So, set up the zone configs before we swap. 864 if err := sc.maybeUpdateZoneConfigsForPKChange( 865 ctx, txn, sc.execCfg, scDesc.TableDesc(), pkSwap); err != nil { 866 return err 867 } 868 } 869 870 if err := scDesc.MakeMutationComplete(mutation); err != nil { 871 return err 872 } 873 874 if pkSwap := mutation.GetPrimaryKeySwap(); pkSwap != nil { 875 if fn := sc.testingKnobs.RunBeforePrimaryKeySwap; fn != nil { 876 fn() 877 } 878 // If any old index had an interleaved parent, remove the 879 // backreference from the parent. 880 // N.B. This logic needs to be kept up to date with the 881 // corresponding piece in runSchemaChangesInTxn. 882 for _, idxID := range append( 883 []sqlbase.IndexID{pkSwap.OldPrimaryIndexId}, pkSwap.OldIndexes...) { 884 oldIndex, err := scDesc.FindIndexByID(idxID) 885 if err != nil { 886 return err 887 } 888 if len(oldIndex.Interleave.Ancestors) != 0 { 889 ancestorInfo := oldIndex.Interleave.Ancestors[len(oldIndex.Interleave.Ancestors)-1] 890 ancestor := descs[ancestorInfo.TableID] 891 ancestorIdx, err := ancestor.FindIndexByID(ancestorInfo.IndexID) 892 if err != nil { 893 return err 894 } 895 foundAncestor := false 896 for k, ref := range ancestorIdx.InterleavedBy { 897 if ref.Table == scDesc.ID && ref.Index == oldIndex.ID { 898 if foundAncestor { 899 return errors.AssertionFailedf( 900 "ancestor entry in %s for %s@%s found more than once", 901 ancestor.Name, scDesc.Name, oldIndex.Name) 902 } 903 ancestorIdx.InterleavedBy = append( 904 ancestorIdx.InterleavedBy[:k], ancestorIdx.InterleavedBy[k+1:]...) 905 foundAncestor = true 906 } 907 } 908 } 909 } 910 // If we performed MakeMutationComplete on a PrimaryKeySwap mutation, then we need to start 911 // a job for the index deletion mutations that the primary key swap mutation added, if any. 912 if childJobs, err = sc.queueCleanupJobs(ctx, scDesc, txn, childJobs); err != nil { 913 return err 914 } 915 } 916 917 if computedColumnSwap := mutation.GetComputedColumnSwap(); computedColumnSwap != nil { 918 if fn := sc.testingKnobs.RunBeforeComputedColumnSwap; fn != nil { 919 fn() 920 } 921 922 // If we performed MakeMutationComplete on a computed column swap, then 923 // we need to start a job for the column deletion that the swap mutation 924 // added if any. 925 if childJobs, err = sc.queueCleanupJobs(ctx, scDesc, txn, childJobs); err != nil { 926 return err 927 } 928 } 929 i++ 930 } 931 if i == 0 { 932 // The table descriptor is unchanged. Don't let Publish() increment 933 // the version. 934 return lease.ErrDidntUpdateDescriptor 935 } 936 // Trim the executed mutations from the descriptor. 937 scDesc.Mutations = scDesc.Mutations[i:] 938 939 for i, g := range scDesc.MutationJobs { 940 if g.MutationID == sc.mutationID { 941 // Trim the executed mutation group from the descriptor. 942 scDesc.MutationJobs = append(scDesc.MutationJobs[:i], scDesc.MutationJobs[i+1:]...) 943 break 944 } 945 } 946 return nil 947 } 948 949 descs, err := sc.leaseMgr.PublishMultiple(ctx, tableIDsToUpdate, update, func(txn *kv.Txn) error { 950 schemaChangeEventType := EventLogFinishSchemaChange 951 if isRollback { 952 schemaChangeEventType = EventLogFinishSchemaRollback 953 } 954 955 // Log "Finish Schema Change" or "Finish Schema Change Rollback" 956 // event. Only the table ID and mutation ID are logged; this can 957 // be correlated with the DDL statement that initiated the change 958 // using the mutation id. 959 return MakeEventLogger(sc.execCfg).InsertEventRecord( 960 ctx, 961 txn, 962 schemaChangeEventType, 963 int32(sc.tableID), 964 int32(sc.sqlInstanceID), 965 struct { 966 MutationID uint32 967 }{uint32(sc.mutationID)}, 968 ) 969 }) 970 if fn := sc.testingKnobs.RunBeforeChildJobs; fn != nil { 971 if len(childJobs) != 0 { 972 fn() 973 } 974 } 975 if err != nil { 976 for _, job := range childJobs { 977 if rollbackErr := job.CleanupOnRollback(ctx); rollbackErr != nil { 978 log.Warningf(ctx, "failed to clean up job: %v", rollbackErr) 979 } 980 } 981 return nil, err 982 } 983 for _, job := range childJobs { 984 if _, err := job.Start(ctx); err != nil { 985 log.Warningf(ctx, "starting job %d failed with error: %v", *job.ID(), err) 986 } 987 log.VEventf(ctx, 2, "started job %d", *job.ID()) 988 } 989 return descs[sc.tableID], nil 990 } 991 992 // maybeUpdateZoneConfigsForPKChange moves zone configs for any rewritten 993 // indexes from the old index over to the new index. 994 func (sc *SchemaChanger) maybeUpdateZoneConfigsForPKChange( 995 ctx context.Context, 996 txn *kv.Txn, 997 execCfg *ExecutorConfig, 998 table *sqlbase.TableDescriptor, 999 swapInfo *sqlbase.PrimaryKeySwap, 1000 ) error { 1001 zone, err := getZoneConfigRaw(ctx, txn, table.ID) 1002 if err != nil { 1003 return err 1004 } 1005 1006 // If this table doesn't have a zone attached to it, don't do anything. 1007 if zone == nil { 1008 return nil 1009 } 1010 1011 // For each rewritten index, point its subzones for the old index at the 1012 // new index. 1013 for i, oldID := range swapInfo.OldIndexes { 1014 for j := range zone.Subzones { 1015 subzone := &zone.Subzones[j] 1016 if subzone.IndexID == uint32(oldID) { 1017 // If we find a subzone matching an old index, copy its subzone 1018 // into a new subzone with the new index's ID. 1019 subzoneCopy := *subzone 1020 subzoneCopy.IndexID = uint32(swapInfo.NewIndexes[i]) 1021 zone.SetSubzone(subzoneCopy) 1022 } 1023 } 1024 } 1025 1026 // Write the zone back. This call regenerates the index spans that apply 1027 // to each partition in the index. 1028 _, err = writeZoneConfig(ctx, txn, table.ID, table, zone, execCfg, false) 1029 if err != nil && !sqlbase.IsCCLRequiredError(err) { 1030 return err 1031 } 1032 1033 return nil 1034 } 1035 1036 // notFirstInLine returns true whenever the schema change has been queued 1037 // up for execution after another schema change. 1038 func (sc *SchemaChanger) notFirstInLine( 1039 ctx context.Context, 1040 ) (*sqlbase.TableDescriptor, bool, error) { 1041 var notFirst bool 1042 var desc *sqlbase.TableDescriptor 1043 err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1044 notFirst = false 1045 var err error 1046 desc, err = sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 1047 if err != nil { 1048 return err 1049 } 1050 for i, mutation := range desc.Mutations { 1051 if mutation.MutationID == sc.mutationID { 1052 notFirst = i != 0 1053 break 1054 } 1055 } 1056 return nil 1057 }) 1058 return desc, notFirst, err 1059 } 1060 1061 // runStateMachineAndBackfill runs the schema change state machine followed by 1062 // the backfill. 1063 func (sc *SchemaChanger) runStateMachineAndBackfill(ctx context.Context) error { 1064 if fn := sc.testingKnobs.RunBeforePublishWriteAndDelete; fn != nil { 1065 fn() 1066 } 1067 // Run through mutation state machine before backfill. 1068 if err := sc.RunStateMachineBeforeBackfill(ctx); err != nil { 1069 return err 1070 } 1071 1072 // Run backfill(s). 1073 if err := sc.runBackfill(ctx); err != nil { 1074 return err 1075 } 1076 1077 // Mark the mutations as completed. 1078 _, err := sc.done(ctx) 1079 return err 1080 } 1081 1082 func (sc *SchemaChanger) refreshStats() { 1083 // Initiate an asynchronous run of CREATE STATISTICS. We use a large number 1084 // for rowsAffected because we want to make sure that stats always get 1085 // created/refreshed here. 1086 sc.execCfg.StatsRefresher.NotifyMutation(sc.tableID, math.MaxInt32 /* rowsAffected */) 1087 } 1088 1089 // maybeReverseMutations reverses the direction of all the mutations with the 1090 // mutationID. This is called after hitting an irrecoverable error while 1091 // applying a schema change. If a column being added is reversed and dropped, 1092 // all new indexes referencing the column will also be dropped. 1093 func (sc *SchemaChanger) maybeReverseMutations(ctx context.Context, causingError error) error { 1094 if fn := sc.testingKnobs.RunBeforeMutationReversal; fn != nil { 1095 if err := fn(*sc.job.ID()); err != nil { 1096 return err 1097 } 1098 } 1099 1100 // Get the other tables whose foreign key backreferences need to be removed. 1101 var fksByBackrefTable map[sqlbase.ID][]*sqlbase.ConstraintToUpdate 1102 alreadyReversed := false 1103 err := sc.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1104 fksByBackrefTable = make(map[sqlbase.ID][]*sqlbase.ConstraintToUpdate) 1105 var err error 1106 desc, err := sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 1107 if err != nil { 1108 return err 1109 } 1110 for _, mutation := range desc.Mutations { 1111 if mutation.MutationID != sc.mutationID { 1112 break 1113 } 1114 if mutation.Rollback { 1115 // Mutation is already reversed, so we don't need to do any more work. 1116 // This can happen if the mutations were already reversed, but before 1117 // the rollback completed the job was adopted. 1118 alreadyReversed = true 1119 return nil 1120 } 1121 if constraint := mutation.GetConstraint(); constraint != nil && 1122 constraint.ConstraintType == sqlbase.ConstraintToUpdate_FOREIGN_KEY && 1123 mutation.Direction == sqlbase.DescriptorMutation_ADD && 1124 constraint.ForeignKey.Validity == sqlbase.ConstraintValidity_Validating { 1125 fk := &constraint.ForeignKey 1126 if fk.ReferencedTableID != desc.ID { 1127 fksByBackrefTable[constraint.ForeignKey.ReferencedTableID] = append(fksByBackrefTable[constraint.ForeignKey.ReferencedTableID], constraint) 1128 } 1129 } 1130 } 1131 return nil 1132 }) 1133 if err != nil { 1134 return err 1135 } 1136 if alreadyReversed { 1137 return nil 1138 } 1139 tableIDsToUpdate := make([]sqlbase.ID, 0, len(fksByBackrefTable)+1) 1140 tableIDsToUpdate = append(tableIDsToUpdate, sc.tableID) 1141 for id := range fksByBackrefTable { 1142 tableIDsToUpdate = append(tableIDsToUpdate, id) 1143 } 1144 1145 // Create update closure for the table and all other tables with backreferences 1146 var droppedMutations map[sqlbase.MutationID]struct{} 1147 update := func(_ *kv.Txn, descs map[sqlbase.ID]*sqlbase.MutableTableDescriptor) error { 1148 scDesc, ok := descs[sc.tableID] 1149 if !ok { 1150 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 1151 } 1152 // Keep track of the column mutations being reversed so that indexes 1153 // referencing them can be dropped. 1154 columns := make(map[string]struct{}) 1155 droppedMutations = nil 1156 1157 for i, mutation := range scDesc.Mutations { 1158 if mutation.MutationID != sc.mutationID { 1159 // Only reverse the first set of mutations if they have the 1160 // mutation ID we're looking for. 1161 if i == 0 { 1162 return lease.ErrDidntUpdateDescriptor 1163 } 1164 break 1165 } 1166 1167 if mutation.Rollback { 1168 // Can actually never happen. Since we should have checked for this case 1169 // above. 1170 return errors.AssertionFailedf("mutation already rolled back: %v", mutation) 1171 } 1172 1173 log.Warningf(ctx, "reverse schema change mutation: %+v", mutation) 1174 scDesc.Mutations[i], columns = sc.reverseMutation(mutation, false /*notStarted*/, columns) 1175 1176 // If the mutation is for validating a constraint that is being added, 1177 // drop the constraint because validation has failed 1178 if constraint := mutation.GetConstraint(); constraint != nil && 1179 mutation.Direction == sqlbase.DescriptorMutation_ADD { 1180 log.Warningf(ctx, "dropping constraint %+v", constraint) 1181 if err := sc.maybeDropValidatingConstraint(ctx, scDesc, constraint); err != nil { 1182 return err 1183 } 1184 // Get the foreign key backreferences to remove. 1185 if constraint.ConstraintType == sqlbase.ConstraintToUpdate_FOREIGN_KEY { 1186 fk := &constraint.ForeignKey 1187 backrefTable, ok := descs[fk.ReferencedTableID] 1188 if !ok { 1189 return errors.AssertionFailedf("required table with ID %d not provided to update closure", sc.tableID) 1190 } 1191 if err := removeFKBackReferenceFromTable(backrefTable, fk.Name, scDesc.TableDesc()); err != nil { 1192 return err 1193 } 1194 } 1195 } 1196 scDesc.Mutations[i].Rollback = true 1197 } 1198 1199 // Delete all mutations that reference any of the reversed columns 1200 // by running a graph traversal of the mutations. 1201 if len(columns) > 0 { 1202 var err error 1203 droppedMutations, err = sc.deleteIndexMutationsWithReversedColumns(ctx, scDesc, columns) 1204 if err != nil { 1205 return err 1206 } 1207 } 1208 1209 // PublishMultiple() will increment the version. 1210 return nil 1211 } 1212 1213 _, err = sc.leaseMgr.PublishMultiple(ctx, tableIDsToUpdate, update, func(txn *kv.Txn) error { 1214 // Read the table descriptor from the store. The Version of the 1215 // descriptor has already been incremented in the transaction and 1216 // this descriptor can be modified without incrementing the version. 1217 tableDesc, err := sqlbase.GetTableDescFromID(ctx, txn, sc.execCfg.Codec, sc.tableID) 1218 if err != nil { 1219 return err 1220 } 1221 1222 // Mark the schema change job as failed and create a rollback job. 1223 err = sc.updateJobForRollback(ctx, txn, tableDesc) 1224 if err != nil { 1225 return err 1226 } 1227 1228 // Mark other reversed mutation jobs as failed. 1229 for m := range droppedMutations { 1230 jobID, err := getJobIDForMutationWithDescriptor(ctx, tableDesc, m) 1231 if err != nil { 1232 return err 1233 } 1234 if err := sc.jobRegistry.Failed(ctx, txn, jobID, causingError); err != nil { 1235 return err 1236 } 1237 } 1238 1239 // Log "Reverse Schema Change" event. Only the causing error and the 1240 // mutation ID are logged; this can be correlated with the DDL statement 1241 // that initiated the change using the mutation id. 1242 return MakeEventLogger(sc.execCfg).InsertEventRecord( 1243 ctx, 1244 txn, 1245 EventLogReverseSchemaChange, 1246 int32(sc.tableID), 1247 int32(sc.sqlInstanceID), 1248 struct { 1249 Error string 1250 MutationID uint32 1251 }{fmt.Sprintf("%+v", causingError), uint32(sc.mutationID)}, 1252 ) 1253 }) 1254 if err != nil { 1255 return err 1256 } 1257 1258 if err := sc.waitToUpdateLeases(ctx, sc.tableID); err != nil { 1259 return err 1260 } 1261 for id := range fksByBackrefTable { 1262 if err := sc.waitToUpdateLeases(ctx, id); err != nil { 1263 return err 1264 } 1265 } 1266 1267 return nil 1268 } 1269 1270 // updateJobForRollback updates the schema change job in the case of a rollback. 1271 func (sc *SchemaChanger) updateJobForRollback( 1272 ctx context.Context, txn *kv.Txn, tableDesc *sqlbase.TableDescriptor, 1273 ) error { 1274 // Initialize refresh spans to scan the entire table. 1275 span := tableDesc.PrimaryIndexSpan(sc.execCfg.Codec) 1276 var spanList []jobspb.ResumeSpanList 1277 for _, m := range tableDesc.Mutations { 1278 if m.MutationID == sc.mutationID { 1279 spanList = append(spanList, 1280 jobspb.ResumeSpanList{ 1281 ResumeSpans: []roachpb.Span{span}, 1282 }, 1283 ) 1284 } 1285 } 1286 if err := sc.job.WithTxn(txn).SetDetails( 1287 ctx, jobspb.SchemaChangeDetails{ 1288 TableID: sc.tableID, 1289 MutationID: sc.mutationID, 1290 ResumeSpanList: spanList, 1291 FormatVersion: jobspb.JobResumerFormatVersion, 1292 }, 1293 ); err != nil { 1294 return err 1295 } 1296 if err := sc.job.WithTxn(txn).SetProgress(ctx, jobspb.SchemaChangeProgress{}); err != nil { 1297 return err 1298 } 1299 // Set the transaction back to nil so that this job can be used in other 1300 // transactions. 1301 sc.job.WithTxn(nil) 1302 1303 return nil 1304 } 1305 1306 func (sc *SchemaChanger) maybeDropValidatingConstraint( 1307 ctx context.Context, desc *MutableTableDescriptor, constraint *sqlbase.ConstraintToUpdate, 1308 ) error { 1309 switch constraint.ConstraintType { 1310 case sqlbase.ConstraintToUpdate_CHECK, sqlbase.ConstraintToUpdate_NOT_NULL: 1311 if constraint.Check.Validity == sqlbase.ConstraintValidity_Unvalidated { 1312 return nil 1313 } 1314 for j, c := range desc.Checks { 1315 if c.Name == constraint.Check.Name { 1316 desc.Checks = append(desc.Checks[:j], desc.Checks[j+1:]...) 1317 return nil 1318 } 1319 } 1320 if log.V(2) { 1321 log.Infof( 1322 ctx, 1323 "attempted to drop constraint %s, but it hadn't been added to the table descriptor yet", 1324 constraint.Check.Name, 1325 ) 1326 } 1327 case sqlbase.ConstraintToUpdate_FOREIGN_KEY: 1328 for i, fk := range desc.OutboundFKs { 1329 if fk.Name == constraint.ForeignKey.Name { 1330 desc.OutboundFKs = append(desc.OutboundFKs[:i], desc.OutboundFKs[i+1:]...) 1331 return nil 1332 } 1333 } 1334 if log.V(2) { 1335 log.Infof( 1336 ctx, 1337 "attempted to drop constraint %s, but it hadn't been added to the table descriptor yet", 1338 constraint.ForeignKey.Name, 1339 ) 1340 } 1341 default: 1342 return errors.AssertionFailedf("unsupported constraint type: %d", errors.Safe(constraint.ConstraintType)) 1343 } 1344 return nil 1345 } 1346 1347 // deleteIndexMutationsWithReversedColumns deletes mutations with a 1348 // different mutationID than the schema changer and with an index that 1349 // references one of the reversed columns. Execute this as a breadth 1350 // first search graph traversal. 1351 func (sc *SchemaChanger) deleteIndexMutationsWithReversedColumns( 1352 ctx context.Context, desc *sqlbase.MutableTableDescriptor, columns map[string]struct{}, 1353 ) (map[sqlbase.MutationID]struct{}, error) { 1354 dropMutations := make(map[sqlbase.MutationID]struct{}) 1355 // Run breadth first search traversal that reverses mutations 1356 for { 1357 start := len(dropMutations) 1358 for _, mutation := range desc.Mutations { 1359 if mutation.MutationID != sc.mutationID { 1360 if idx := mutation.GetIndex(); idx != nil { 1361 for _, name := range idx.ColumnNames { 1362 if _, ok := columns[name]; ok { 1363 // Such an index mutation has to be with direction ADD and 1364 // in the DELETE_ONLY state. Live indexes referencing live 1365 // columns cannot be deleted and thus never have direction 1366 // DROP. All mutations with the ADD direction start off in 1367 // the DELETE_ONLY state. 1368 if mutation.Direction != sqlbase.DescriptorMutation_ADD || 1369 mutation.State != sqlbase.DescriptorMutation_DELETE_ONLY { 1370 panic(fmt.Sprintf("mutation in bad state: %+v", mutation)) 1371 } 1372 log.Warningf(ctx, "drop schema change mutation: %+v", mutation) 1373 dropMutations[mutation.MutationID] = struct{}{} 1374 break 1375 } 1376 } 1377 } 1378 } 1379 } 1380 1381 if len(dropMutations) == start { 1382 // No more mutations to drop. 1383 break 1384 } 1385 // Drop mutations. 1386 newMutations := make([]sqlbase.DescriptorMutation, 0, len(desc.Mutations)) 1387 for _, mutation := range desc.Mutations { 1388 if _, ok := dropMutations[mutation.MutationID]; ok { 1389 // Reverse mutation. Update columns to reflect additional 1390 // columns that have been purged. This mutation doesn't need 1391 // a rollback because it was not started. 1392 mutation, columns = sc.reverseMutation(mutation, true /*notStarted*/, columns) 1393 // Mark as complete because this mutation needs no backfill. 1394 if err := desc.MakeMutationComplete(mutation); err != nil { 1395 return nil, err 1396 } 1397 } else { 1398 newMutations = append(newMutations, mutation) 1399 } 1400 } 1401 // Reset mutations. 1402 desc.Mutations = newMutations 1403 } 1404 return dropMutations, nil 1405 } 1406 1407 // Reverse a mutation. Returns the updated mutation and updated columns. 1408 // notStarted is set to true only if the schema change state machine 1409 // was not started for the mutation. 1410 func (sc *SchemaChanger) reverseMutation( 1411 mutation sqlbase.DescriptorMutation, notStarted bool, columns map[string]struct{}, 1412 ) (sqlbase.DescriptorMutation, map[string]struct{}) { 1413 switch mutation.Direction { 1414 case sqlbase.DescriptorMutation_ADD: 1415 mutation.Direction = sqlbase.DescriptorMutation_DROP 1416 // A column ADD being reversed gets placed in the map. 1417 if col := mutation.GetColumn(); col != nil { 1418 columns[col.Name] = struct{}{} 1419 } 1420 // PrimaryKeySwap and ComputedColumnSwap don't have a concept of the state machine. 1421 if pkSwap, computedColumnsSwap := 1422 mutation.GetPrimaryKeySwap(), mutation.GetComputedColumnSwap(); pkSwap != nil || computedColumnsSwap != nil { 1423 return mutation, columns 1424 } 1425 1426 if notStarted && mutation.State != sqlbase.DescriptorMutation_DELETE_ONLY { 1427 panic(fmt.Sprintf("mutation in bad state: %+v", mutation)) 1428 } 1429 1430 case sqlbase.DescriptorMutation_DROP: 1431 mutation.Direction = sqlbase.DescriptorMutation_ADD 1432 if notStarted && mutation.State != sqlbase.DescriptorMutation_DELETE_AND_WRITE_ONLY { 1433 panic(fmt.Sprintf("mutation in bad state: %+v", mutation)) 1434 } 1435 } 1436 return mutation, columns 1437 } 1438 1439 // CreateGCJobRecord creates the job record for a GC job, setting some 1440 // properties which are common for all GC jobs. 1441 func CreateGCJobRecord( 1442 originalDescription string, username string, details jobspb.SchemaChangeGCDetails, 1443 ) jobs.Record { 1444 descriptorIDs := make([]sqlbase.ID, 0) 1445 if len(details.Indexes) > 0 { 1446 if len(descriptorIDs) == 0 { 1447 descriptorIDs = []sqlbase.ID{details.ParentID} 1448 } 1449 } else { 1450 for _, table := range details.Tables { 1451 descriptorIDs = append(descriptorIDs, table.ID) 1452 } 1453 } 1454 return jobs.Record{ 1455 Description: fmt.Sprintf("GC for %s", originalDescription), 1456 Username: username, 1457 DescriptorIDs: descriptorIDs, 1458 Details: details, 1459 Progress: jobspb.SchemaChangeGCProgress{}, 1460 NonCancelable: true, 1461 } 1462 } 1463 1464 // GCJobTestingKnobs is for testing the Schema Changer GC job. 1465 // Note that this is defined here for testing purposes to avoid cyclic 1466 // dependencies. 1467 type GCJobTestingKnobs struct { 1468 RunBeforeResume func(jobID int64) error 1469 } 1470 1471 // ModuleTestingKnobs is part of the base.ModuleTestingKnobs interface. 1472 func (*GCJobTestingKnobs) ModuleTestingKnobs() {} 1473 1474 // SchemaChangerTestingKnobs for testing the schema change execution path 1475 // through both the synchronous and asynchronous paths. 1476 type SchemaChangerTestingKnobs struct { 1477 // SchemaChangeJobNoOp returning true will cause the job to be a no-op. 1478 SchemaChangeJobNoOp func() bool 1479 1480 // RunBeforePublishWriteAndDelete is called just before publishing the 1481 // write+delete state for the schema change. 1482 RunBeforePublishWriteAndDelete func() 1483 1484 // RunBeforeBackfill is called just before starting the backfill. 1485 RunBeforeBackfill func() error 1486 1487 // RunAfterBackfill is called after completing a backfill. 1488 RunAfterBackfill func(jobID int64) error 1489 1490 // RunBeforeIndexBackfill is called just before starting the index backfill, after 1491 // fixing the index backfill scan timestamp. 1492 RunBeforeIndexBackfill func() 1493 1494 // RunBeforePrimaryKeySwap is called just before the primary key swap is committed. 1495 RunBeforePrimaryKeySwap func() 1496 1497 // RunBeforeComputedColumnSwap is called just before the computed column swap is committed. 1498 RunBeforeComputedColumnSwap func() 1499 1500 // RunBeforeChildJobs is called just before child jobs are run to clean up 1501 // dropped schema elements after a mutation. 1502 RunBeforeChildJobs func() 1503 1504 // RunBeforeIndexValidation is called just before starting the index validation, 1505 // after setting the job status to validating. 1506 RunBeforeIndexValidation func() error 1507 1508 // RunBeforeConstraintValidation is called just before starting the checks validation, 1509 // after setting the job status to validating. 1510 RunBeforeConstraintValidation func() error 1511 1512 // RunBeforeMutationReversal runs at the beginning of maybeReverseMutations. 1513 RunBeforeMutationReversal func(jobID int64) error 1514 1515 // RunAfterMutationReversal runs in OnFailOrCancel after the mutations have 1516 // been reversed. 1517 RunAfterMutationReversal func(jobID int64) error 1518 1519 // RunAtStartOfOnFailOrCancel runs at the start of the OnFailOrCancel hook. 1520 RunBeforeOnFailOrCancel func(jobID int64) error 1521 1522 // RunAfterOnFailOrCancel runs after the OnFailOrCancel hook. 1523 RunAfterOnFailOrCancel func(jobID int64) error 1524 1525 // RunBeforeResume runs at the start of the Resume hook. 1526 RunBeforeResume func(jobID int64) error 1527 1528 // OldNamesDrainedNotification is called during a schema change, 1529 // after all leases on the version of the descriptor with the old 1530 // names are gone, and just before the mapping of the old names to the 1531 // descriptor id are about to be deleted. 1532 OldNamesDrainedNotification func() 1533 1534 // WriteCheckpointInterval is the interval after which a checkpoint is 1535 // written. 1536 WriteCheckpointInterval time.Duration 1537 1538 // BackfillChunkSize is to be used for all backfill chunked operations. 1539 BackfillChunkSize int64 1540 1541 // TwoVersionLeaseViolation is called whenever a schema change 1542 // transaction is unable to commit because it is violating the two 1543 // version lease invariant. 1544 TwoVersionLeaseViolation func() 1545 } 1546 1547 // ModuleTestingKnobs is part of the base.ModuleTestingKnobs interface. 1548 func (*SchemaChangerTestingKnobs) ModuleTestingKnobs() {} 1549 1550 // createSchemaChangeEvalCtx creates an extendedEvalContext() to be used for backfills. 1551 // 1552 // TODO(andrei): This EvalContext() will be broken for backfills trying to use 1553 // functions marked with distsqlBlacklist. 1554 // Also, the SessionTracing inside the context is unrelated to the one 1555 // used in the surrounding SQL session, so session tracing is unable 1556 // to capture schema change activity. 1557 func createSchemaChangeEvalCtx( 1558 ctx context.Context, 1559 execCfg *ExecutorConfig, 1560 ts hlc.Timestamp, 1561 ieFactory sqlutil.SessionBoundInternalExecutorFactory, 1562 ) extendedEvalContext { 1563 dummyLocation := time.UTC 1564 1565 sd := &sessiondata.SessionData{ 1566 SearchPath: sqlbase.DefaultSearchPath, 1567 // The database is not supposed to be needed in schema changes, as there 1568 // shouldn't be unqualified identifiers in backfills, and the pure functions 1569 // that need it should have already been evaluated. 1570 // 1571 // TODO(andrei): find a way to assert that this field is indeed not used. 1572 // And in fact it is used by `current_schemas()`, which, although is a pure 1573 // function, takes arguments which might be impure (so it can't always be 1574 // pre-evaluated). 1575 Database: "", 1576 SequenceState: sessiondata.NewSequenceState(), 1577 DataConversion: sessiondata.DataConversionConfig{ 1578 Location: dummyLocation, 1579 }, 1580 User: security.NodeUser, 1581 } 1582 1583 evalCtx := extendedEvalContext{ 1584 // Make a session tracing object on-the-fly. This is OK 1585 // because it sets "enabled: false" and thus none of the 1586 // other fields are used. 1587 Tracing: &SessionTracing{}, 1588 ExecCfg: execCfg, 1589 EvalContext: tree.EvalContext{ 1590 SessionData: sd, 1591 InternalExecutor: ieFactory(ctx, sd), 1592 // TODO(andrei): This is wrong (just like on the main code path on 1593 // setupFlow). Each processor should override Ctx with its own context. 1594 Context: ctx, 1595 Planner: &sqlbase.DummyEvalPlanner{}, 1596 PrivilegedAccessor: &sqlbase.DummyPrivilegedAccessor{}, 1597 SessionAccessor: &sqlbase.DummySessionAccessor{}, 1598 ClientNoticeSender: &sqlbase.DummyClientNoticeSender{}, 1599 Sequence: &sqlbase.DummySequenceOperators{}, 1600 Tenant: &sqlbase.DummyTenantOperator{}, 1601 Settings: execCfg.Settings, 1602 TestingKnobs: execCfg.EvalContextTestingKnobs, 1603 ClusterID: execCfg.ClusterID(), 1604 ClusterName: execCfg.RPCContext.ClusterName(), 1605 NodeID: execCfg.NodeID, 1606 Codec: execCfg.Codec, 1607 Locality: execCfg.Locality, 1608 }, 1609 } 1610 // The backfill is going to use the current timestamp for the various 1611 // functions, like now(), that need it. It's possible that the backfill has 1612 // been partially performed already by another SchemaChangeManager with 1613 // another timestamp. 1614 // 1615 // TODO(andrei): Figure out if this is what we want, and whether the 1616 // timestamp from the session that enqueued the schema change 1617 // is/should be used for impure functions like now(). 1618 evalCtx.SetTxnTimestamp(timeutil.Unix(0 /* sec */, ts.WallTime)) 1619 evalCtx.SetStmtTimestamp(timeutil.Unix(0 /* sec */, ts.WallTime)) 1620 1621 return evalCtx 1622 } 1623 1624 type schemaChangeResumer struct { 1625 job *jobs.Job 1626 } 1627 1628 func (r schemaChangeResumer) Resume( 1629 ctx context.Context, phs interface{}, resultsCh chan<- tree.Datums, 1630 ) error { 1631 p := phs.(PlanHookState) 1632 details := r.job.Details().(jobspb.SchemaChangeDetails) 1633 if p.ExecCfg().SchemaChangerTestingKnobs.SchemaChangeJobNoOp != nil && 1634 p.ExecCfg().SchemaChangerTestingKnobs.SchemaChangeJobNoOp() { 1635 return nil 1636 } 1637 if fn := p.ExecCfg().SchemaChangerTestingKnobs.RunBeforeResume; fn != nil { 1638 if err := fn(*r.job.ID()); err != nil { 1639 return err 1640 } 1641 } 1642 1643 execSchemaChange := func(tableID sqlbase.ID, mutationID sqlbase.MutationID, droppedDatabaseID sqlbase.ID) error { 1644 sc := SchemaChanger{ 1645 tableID: tableID, 1646 mutationID: mutationID, 1647 droppedDatabaseID: droppedDatabaseID, 1648 sqlInstanceID: p.ExecCfg().NodeID.SQLInstanceID(), 1649 db: p.ExecCfg().DB, 1650 leaseMgr: p.ExecCfg().LeaseManager, 1651 testingKnobs: p.ExecCfg().SchemaChangerTestingKnobs, 1652 distSQLPlanner: p.DistSQLPlanner(), 1653 jobRegistry: p.ExecCfg().JobRegistry, 1654 job: r.job, 1655 rangeDescriptorCache: p.ExecCfg().RangeDescriptorCache, 1656 leaseHolderCache: p.ExecCfg().LeaseHolderCache, 1657 clock: p.ExecCfg().Clock, 1658 settings: p.ExecCfg().Settings, 1659 execCfg: p.ExecCfg(), 1660 ieFactory: func(ctx context.Context, sd *sessiondata.SessionData) sqlutil.InternalExecutor { 1661 return r.job.MakeSessionBoundInternalExecutor(ctx, sd) 1662 }, 1663 } 1664 opts := retry.Options{ 1665 InitialBackoff: 100 * time.Millisecond, 1666 MaxBackoff: 20 * time.Second, 1667 Multiplier: 1.5, 1668 } 1669 1670 // The schema change may have to be retried if it is not first in line or 1671 // for other retriable reasons so we run it in an exponential backoff retry 1672 // loop. The loop terminates only if the context is canceled. 1673 var scErr error 1674 for r := retry.StartWithCtx(ctx, opts); r.Next(); { 1675 // Note that r.Next always returns true on first run so exec will be 1676 // called at least once before there is a chance for this loop to exit. 1677 scErr = sc.exec(ctx) 1678 switch { 1679 case scErr == nil: 1680 return nil 1681 case errors.Is(scErr, sqlbase.ErrDescriptorNotFound): 1682 // If the table descriptor for the ID can't be found, we assume that 1683 // another job to drop the table got to it first, and consider this job 1684 // finished. 1685 log.Infof( 1686 ctx, 1687 "descriptor %d not found for schema change processing mutation %d;"+ 1688 "assuming it was dropped, and exiting", 1689 tableID, mutationID, 1690 ) 1691 return nil 1692 case !isPermanentSchemaChangeError(scErr): 1693 // Check if the error is on a whitelist of errors we should retry on, 1694 // including the schema change not having the first mutation in line. 1695 default: 1696 // All other errors lead to a failed job. 1697 return scErr 1698 } 1699 } 1700 // If the context was canceled, the job registry will retry the job. We can 1701 // just return the error without wrapping it in a retry error. 1702 return scErr 1703 } 1704 1705 // For an empty database, the zone config for it was already GC'ed and there's 1706 // nothing left to do. 1707 if details.DroppedDatabaseID != sqlbase.InvalidID && len(details.DroppedTables) == 0 { 1708 return nil 1709 } 1710 1711 // If a database is being dropped, handle this separately by draining names 1712 // for all the tables. 1713 // 1714 // This also covers other cases where we have a leftover 19.2 job that drops 1715 // multiple tables in a single job (e.g., TRUNCATE on multiple tables), so 1716 // it's possible for DroppedDatabaseID to be unset. 1717 if details.DroppedDatabaseID != sqlbase.InvalidID || len(details.DroppedTables) > 1 { 1718 for i := range details.DroppedTables { 1719 droppedTable := &details.DroppedTables[i] 1720 if err := execSchemaChange(droppedTable.ID, sqlbase.InvalidMutationID, details.DroppedDatabaseID); err != nil { 1721 return err 1722 } 1723 } 1724 dropTime := timeutil.Now().UnixNano() 1725 tablesToGC := make([]jobspb.SchemaChangeGCDetails_DroppedID, len(details.DroppedTables)) 1726 for i, table := range details.DroppedTables { 1727 tablesToGC[i] = jobspb.SchemaChangeGCDetails_DroppedID{ID: table.ID, DropTime: dropTime} 1728 } 1729 multiTableGCDetails := jobspb.SchemaChangeGCDetails{ 1730 Tables: tablesToGC, 1731 ParentID: details.DroppedDatabaseID, 1732 } 1733 1734 return startGCJob( 1735 ctx, 1736 p.ExecCfg().DB, 1737 p.ExecCfg().JobRegistry, 1738 r.job.Payload().Username, 1739 r.job.Payload().Description, 1740 multiTableGCDetails, 1741 ) 1742 } 1743 if details.TableID == sqlbase.InvalidID { 1744 return errors.AssertionFailedf("schema change has no specified database or table(s)") 1745 } 1746 1747 return execSchemaChange(details.TableID, details.MutationID, details.DroppedDatabaseID) 1748 } 1749 1750 // OnFailOrCancel is part of the jobs.Resumer interface. 1751 func (r schemaChangeResumer) OnFailOrCancel(ctx context.Context, phs interface{}) error { 1752 p := phs.(PlanHookState) 1753 details := r.job.Details().(jobspb.SchemaChangeDetails) 1754 1755 if details.DroppedDatabaseID != sqlbase.InvalidID { 1756 // TODO (lucy): Do we need to do anything here? 1757 return nil 1758 } 1759 if details.TableID == sqlbase.InvalidID { 1760 return errors.AssertionFailedf("job has no database ID or table ID") 1761 } 1762 sc := SchemaChanger{ 1763 tableID: details.TableID, 1764 mutationID: details.MutationID, 1765 sqlInstanceID: p.ExecCfg().NodeID.SQLInstanceID(), 1766 db: p.ExecCfg().DB, 1767 leaseMgr: p.ExecCfg().LeaseManager, 1768 testingKnobs: p.ExecCfg().SchemaChangerTestingKnobs, 1769 distSQLPlanner: p.DistSQLPlanner(), 1770 jobRegistry: p.ExecCfg().JobRegistry, 1771 job: r.job, 1772 rangeDescriptorCache: p.ExecCfg().RangeDescriptorCache, 1773 leaseHolderCache: p.ExecCfg().LeaseHolderCache, 1774 clock: p.ExecCfg().Clock, 1775 settings: p.ExecCfg().Settings, 1776 execCfg: p.ExecCfg(), 1777 ieFactory: func(ctx context.Context, sd *sessiondata.SessionData) sqlutil.InternalExecutor { 1778 return r.job.MakeSessionBoundInternalExecutor(ctx, sd) 1779 }, 1780 } 1781 1782 if fn := sc.testingKnobs.RunBeforeOnFailOrCancel; fn != nil { 1783 if err := fn(*r.job.ID()); err != nil { 1784 return err 1785 } 1786 } 1787 1788 if r.job.Payload().FinalResumeError == nil { 1789 return errors.AssertionFailedf("job failed but had no recorded error") 1790 } 1791 scErr := errors.DecodeError(ctx, *r.job.Payload().FinalResumeError) 1792 1793 if rollbackErr := sc.handlePermanentSchemaChangeError(ctx, scErr, p.ExtendedEvalContext()); rollbackErr != nil { 1794 switch { 1795 case errors.Is(rollbackErr, sqlbase.ErrDescriptorNotFound): 1796 // If the table descriptor for the ID can't be found, we assume that 1797 // another job to drop the table got to it first, and consider this job 1798 // finished. 1799 log.Infof( 1800 ctx, 1801 "descriptor %d not found for rollback of schema change processing mutation %d;"+ 1802 "assuming it was dropped, and exiting", 1803 details.TableID, details.MutationID, 1804 ) 1805 case ctx.Err() != nil: 1806 // If the context was canceled, the job registry will retry the job. 1807 // We check for this case so that we can just return the error without 1808 // wrapping it in a retry error. 1809 return rollbackErr 1810 case !isPermanentSchemaChangeError(rollbackErr): 1811 // Check if the error is on a whitelist of errors we should retry on, and 1812 // have the job registry retry. 1813 return jobs.NewRetryJobError(rollbackErr.Error()) 1814 default: 1815 // All other errors lead to a failed job. 1816 // 1817 // TODO (lucy): We have a problem where some schema change rollbacks will 1818 // never succeed because the backfiller can't handle rolling back schema 1819 // changes that involve dropping a column; see #46541. (This is probably 1820 // not the only bug that could cause rollbacks to fail.) For historical 1821 // context: This was the case in 19.2 and probably earlier versions as 1822 // well, and in those earlier versions, the old async schema changer would 1823 // keep retrying the rollback and failing in the background because the 1824 // mutation would still be left on the table descriptor. In the present 1825 // schema change job, we return an error immediately and put the job in a 1826 // terminal state instead of retrying indefinitely, basically to make the 1827 // behavior similar to 19.2: If the rollback fails, we end up returning 1828 // immediately (instead of retrying and blocking indefinitely), and the 1829 // table descriptor is left in a bad state with some mutations that we 1830 // can't clean up. 1831 // 1832 // Ultimately, this is untenable, and we should figure out some better way 1833 // of dealing with failed rollbacks. Part of the solution is just making 1834 // rollbacks (especially of dropped columns) more robust, but part of it 1835 // will likely involve some sort of medium-term solution for cleaning up 1836 // mutations that we can't make any progress on (see #47456). In the long 1837 // term we'll hopefully be rethinking what it even means to "roll back" a 1838 // (transactional) schema change. 1839 return rollbackErr 1840 } 1841 } 1842 1843 if fn := sc.testingKnobs.RunAfterOnFailOrCancel; fn != nil { 1844 if err := fn(*r.job.ID()); err != nil { 1845 return err 1846 } 1847 } 1848 return nil 1849 } 1850 1851 func init() { 1852 createResumerFn := func(job *jobs.Job, settings *cluster.Settings) jobs.Resumer { 1853 return &schemaChangeResumer{job: job} 1854 } 1855 jobs.RegisterConstructor(jobspb.TypeSchemaChange, createResumerFn) 1856 } 1857 1858 // queueCleanupJobs checks if the completed schema change needs to start a 1859 // child job to clean up dropped schema elements. 1860 func (sc *SchemaChanger) queueCleanupJobs( 1861 ctx context.Context, scDesc *MutableTableDescriptor, txn *kv.Txn, childJobs []*jobs.StartableJob, 1862 ) ([]*jobs.StartableJob, error) { 1863 // Create jobs for dropped columns / indexes to be deleted. 1864 mutationID := scDesc.ClusterVersion.NextMutationID 1865 span := scDesc.PrimaryIndexSpan(sc.execCfg.Codec) 1866 var spanList []jobspb.ResumeSpanList 1867 for j := len(scDesc.ClusterVersion.Mutations); j < len(scDesc.Mutations); j++ { 1868 spanList = append(spanList, 1869 jobspb.ResumeSpanList{ 1870 ResumeSpans: roachpb.Spans{span}, 1871 }, 1872 ) 1873 } 1874 // Only start a job if spanList has any spans. If len(spanList) == 0, then 1875 // no mutations were enqueued by the primary key change. 1876 if len(spanList) > 0 { 1877 jobRecord := jobs.Record{ 1878 Description: fmt.Sprintf("CLEANUP JOB for '%s'", sc.job.Payload().Description), 1879 Username: sc.job.Payload().Username, 1880 DescriptorIDs: sqlbase.IDs{scDesc.GetID()}, 1881 Details: jobspb.SchemaChangeDetails{ 1882 TableID: sc.tableID, 1883 MutationID: mutationID, 1884 ResumeSpanList: spanList, 1885 FormatVersion: jobspb.JobResumerFormatVersion, 1886 }, 1887 Progress: jobspb.SchemaChangeProgress{}, 1888 NonCancelable: true, 1889 } 1890 job, err := sc.jobRegistry.CreateStartableJobWithTxn(ctx, jobRecord, txn, nil /* resultsCh */) 1891 if err != nil { 1892 return nil, err 1893 } 1894 log.VEventf(ctx, 2, "created job %d to drop previous columns "+ 1895 "and indexes.", *job.ID()) 1896 childJobs = append(childJobs, job) 1897 scDesc.MutationJobs = append(scDesc.MutationJobs, sqlbase.TableDescriptor_MutationJob{ 1898 MutationID: mutationID, 1899 JobID: *job.ID(), 1900 }) 1901 } 1902 return childJobs, nil 1903 }