github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sqlmigrations/migrations.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sqlmigrations
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"sort"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/base"
    20  	"github.com/cockroachdb/cockroach/pkg/clusterversion"
    21  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    22  	"github.com/cockroachdb/cockroach/pkg/jobs"
    23  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    24  	"github.com/cockroachdb/cockroach/pkg/keys"
    25  	"github.com/cockroachdb/cockroach/pkg/kv"
    26  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    27  	"github.com/cockroachdb/cockroach/pkg/security"
    28  	"github.com/cockroachdb/cockroach/pkg/settings"
    29  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    30  	"github.com/cockroachdb/cockroach/pkg/sql"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    34  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    35  	"github.com/cockroachdb/cockroach/pkg/sqlmigrations/leasemanager"
    36  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    37  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    38  	"github.com/cockroachdb/cockroach/pkg/util/log"
    39  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    40  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    41  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    42  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    43  	"github.com/cockroachdb/errors"
    44  	"github.com/cockroachdb/logtags"
    45  )
    46  
    47  var (
    48  	leaseDuration        = time.Minute
    49  	leaseRefreshInterval = leaseDuration / 5
    50  )
    51  
    52  // MigrationManagerTestingKnobs contains testing knobs.
    53  type MigrationManagerTestingKnobs struct {
    54  	// DisableBackfillMigrations stops applying migrations once
    55  	// a migration with 'doesBackfill == true' is encountered.
    56  	// TODO(mberhault): we could skip only backfill migrations and dependencies
    57  	// if we had some concept of migration dependencies.
    58  	DisableBackfillMigrations bool
    59  	AfterJobMigration         func()
    60  	// AlwaysRunJobMigration controls whether to always run the schema change job
    61  	// migration regardless of whether it has been marked as complete.
    62  	AlwaysRunJobMigration bool
    63  }
    64  
    65  // ModuleTestingKnobs is part of the base.ModuleTestingKnobs interface.
    66  func (*MigrationManagerTestingKnobs) ModuleTestingKnobs() {}
    67  
    68  var _ base.ModuleTestingKnobs = &MigrationManagerTestingKnobs{}
    69  
    70  // backwardCompatibleMigrations is a hard-coded list of migrations to be run on
    71  // startup. They will always be run from top-to-bottom, and because they are
    72  // assumed to be backward-compatible, they will be run regardless of what other
    73  // node versions are currently running within the cluster.
    74  // Migrations must be idempotent: a migration may run successfully but not be
    75  // recorded as completed, causing a second run.
    76  //
    77  // Attention: If a migration is creating new tables, it should also be added to
    78  // the metadata schema written by bootstrap (see addSystemDatabaseToSchema())
    79  // and it should have the includedInBootstrap field set (see comments on that
    80  // field too).
    81  var backwardCompatibleMigrations = []migrationDescriptor{
    82  	{
    83  		// Introduced in v1.0. Baked into v2.0.
    84  		name: "default UniqueID to uuid_v4 in system.eventlog",
    85  	},
    86  	{
    87  		// Introduced in v1.0. Baked into v2.0.
    88  		name: "create system.jobs table",
    89  	},
    90  	{
    91  		// Introduced in v1.0. Baked into v2.0.
    92  		name: "create system.settings table",
    93  	},
    94  	{
    95  		// Introduced in v1.0. Permanent migration.
    96  		name:        "enable diagnostics reporting",
    97  		workFn:      optInToDiagnosticsStatReporting,
    98  		clusterWide: true,
    99  	},
   100  	{
   101  		// Introduced in v1.1. Baked into v2.0.
   102  		name: "establish conservative dependencies for views #17280 #17269 #17306",
   103  	},
   104  	{
   105  		// Introduced in v1.1. Baked into v2.0.
   106  		name: "create system.sessions table",
   107  	},
   108  	{
   109  		// Introduced in v1.1. Permanent migration.
   110  		name:        "populate initial version cluster setting table entry",
   111  		workFn:      populateVersionSetting,
   112  		clusterWide: true,
   113  	},
   114  	{
   115  		// Introduced in v2.0. Baked into v2.1.
   116  		name:             "create system.table_statistics table",
   117  		newDescriptorIDs: staticIDs(keys.TableStatisticsTableID),
   118  	},
   119  	{
   120  		// Introduced in v2.0. Permanent migration.
   121  		name:   "add root user",
   122  		workFn: addRootUser,
   123  	},
   124  	{
   125  		// Introduced in v2.0. Baked into v2.1.
   126  		name:             "create system.locations table",
   127  		newDescriptorIDs: staticIDs(keys.LocationsTableID),
   128  	},
   129  	{
   130  		// Introduced in v2.0. Baked into v2.1.
   131  		name: "add default .meta and .liveness zone configs",
   132  	},
   133  	{
   134  		// Introduced in v2.0. Baked into v2.1.
   135  		name:             "create system.role_members table",
   136  		newDescriptorIDs: staticIDs(keys.RoleMembersTableID),
   137  	},
   138  	{
   139  		// Introduced in v2.0. Permanent migration.
   140  		name:   "add system.users isRole column and create admin role",
   141  		workFn: addAdminRole,
   142  	},
   143  	{
   144  		// Introduced in v2.0, replaced by "ensure admin role privileges in all descriptors"
   145  		name: "grant superuser privileges on all objects to the admin role",
   146  	},
   147  	{
   148  		// Introduced in v2.0. Permanent migration.
   149  		name:   "make root a member of the admin role",
   150  		workFn: addRootToAdminRole,
   151  	},
   152  	{
   153  		// Introduced in v2.0. Baked into v2.1.
   154  		name: "upgrade table descs to interleaved format version",
   155  	},
   156  	{
   157  		// Introduced in v2.0 alphas then folded into `retiredSettings`.
   158  		name: "remove cluster setting `kv.gc.batch_size`",
   159  	},
   160  	{
   161  		// Introduced in v2.0 alphas then folded into `retiredSettings`.
   162  		name: "remove cluster setting `kv.transaction.max_intents`",
   163  	},
   164  	{
   165  		// Introduced in v2.0. Baked into v2.1.
   166  		name: "add default system.jobs zone config",
   167  	},
   168  	{
   169  		// Introduced in v2.0. Permanent migration.
   170  		name:        "initialize cluster.secret",
   171  		workFn:      initializeClusterSecret,
   172  		clusterWide: true,
   173  	},
   174  	{
   175  		// Introduced in v2.0. Repeated in v2.1 below.
   176  		name: "ensure admin role privileges in all descriptors",
   177  	},
   178  	{
   179  		// Introduced in v2.1, repeat of 2.0 migration to catch mixed-version issues.
   180  		// TODO(mberhault): bake into v19.1.
   181  		name: "repeat: ensure admin role privileges in all descriptors",
   182  	},
   183  	{
   184  		// Introduced in v2.1.
   185  		// TODO(mberhault): bake into v19.1.
   186  		name:   "disallow public user or role name",
   187  		workFn: disallowPublicUserOrRole,
   188  	},
   189  	{
   190  		// Introduced in v2.1.
   191  		// TODO(knz): bake this migration into v19.1.
   192  		name:             "create default databases",
   193  		workFn:           createDefaultDbs,
   194  		newDescriptorIDs: databaseIDs(sqlbase.DefaultDatabaseName, sqlbase.PgDatabaseName),
   195  	},
   196  	{
   197  		// Introduced in v2.1. Baked into 20.1.
   198  		name: "add progress to system.jobs",
   199  	},
   200  	{
   201  		// Introduced in v19.1.
   202  		// TODO(knz): bake this migration into v19.2
   203  		name:   "create system.comment table",
   204  		workFn: createCommentTable,
   205  		// This migration has been introduced some time before 19.2.
   206  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.Version19_2),
   207  		newDescriptorIDs:    staticIDs(keys.CommentsTableID),
   208  	},
   209  	{
   210  		name:   "create system.replication_constraint_stats table",
   211  		workFn: createReplicationConstraintStatsTable,
   212  		// This migration has been introduced some time before 19.2.
   213  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.Version19_2),
   214  		newDescriptorIDs:    staticIDs(keys.ReplicationConstraintStatsTableID),
   215  	},
   216  	{
   217  		name:   "create system.replication_critical_localities table",
   218  		workFn: createReplicationCriticalLocalitiesTable,
   219  		// This migration has been introduced some time before 19.2.
   220  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.Version19_2),
   221  		newDescriptorIDs:    staticIDs(keys.ReplicationCriticalLocalitiesTableID),
   222  	},
   223  	{
   224  		name:   "create system.reports_meta table",
   225  		workFn: createReportsMetaTable,
   226  		// This migration has been introduced some time before 19.2.
   227  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.Version19_2),
   228  		newDescriptorIDs:    staticIDs(keys.ReportsMetaTableID),
   229  	},
   230  	{
   231  		name:   "create system.replication_stats table",
   232  		workFn: createReplicationStatsTable,
   233  		// This migration has been introduced some time before 19.2.
   234  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.Version19_2),
   235  		newDescriptorIDs:    staticIDs(keys.ReplicationStatsTableID),
   236  	},
   237  	{
   238  		// Introduced in v19.1.
   239  		// TODO(knz): bake this migration into v19.2.
   240  		name:        "propagate the ts purge interval to the new setting names",
   241  		workFn:      retireOldTsPurgeIntervalSettings,
   242  		clusterWide: true,
   243  	},
   244  	{
   245  		// Introduced in v19.2.
   246  		name:   "update system.locations with default location data",
   247  		workFn: updateSystemLocationData,
   248  	},
   249  	{
   250  		// Introduced in v19.2, baked into v20.1.
   251  		name:                "change reports fields from timestamp to timestamptz",
   252  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.Version19_2),
   253  	},
   254  	{
   255  		// Introduced in v20.1.
   256  		// TODO(ajwerner): Bake this migration into v20.2.
   257  		name:                "create system.protected_ts_meta table",
   258  		workFn:              createProtectedTimestampsMetaTable,
   259  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.VersionProtectedTimestamps),
   260  		newDescriptorIDs:    staticIDs(keys.ProtectedTimestampsMetaTableID),
   261  	},
   262  	{
   263  		// Introduced in v20.1.
   264  		// TODO(ajwerner): Bake this migration into v20.2.
   265  		name:                "create system.protected_ts_records table",
   266  		workFn:              createProtectedTimestampsRecordsTable,
   267  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.VersionProtectedTimestamps),
   268  		newDescriptorIDs:    staticIDs(keys.ProtectedTimestampsRecordsTableID),
   269  	},
   270  	{
   271  		// Introduced in v20.1. Note that this migration
   272  		// has v2 appended to it because in 20.1 betas, the migration edited the old
   273  		// system.namespace descriptor to change its Name. This wrought havoc,
   274  		// causing #47167, which caused 19.2 nodes to fail to be able to read
   275  		// system.namespace from SQL queries. However, without changing the old
   276  		// descriptor's Name, backup would fail, since backup requires that no two
   277  		// descriptors have the same Name. So, in v2 of this migration, we edit
   278  		// the name of the new table's Descriptor, calling it
   279  		// namespace2, and re-edit the old descriptor's Name to
   280  		// be just "namespace" again, to try to help clusters that might have
   281  		// upgraded to the 20.1 betas with the problem.
   282  		name:                "create new system.namespace table v2",
   283  		workFn:              createNewSystemNamespaceDescriptor,
   284  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.VersionNamespaceTableWithSchemas),
   285  		newDescriptorIDs:    staticIDs(keys.NamespaceTableID),
   286  	},
   287  	{
   288  		// Introduced in v20.10. Replaced in v20.1.1 and v20.2 by the
   289  		// StartSystemNamespaceMigration post-finalization-style migration.
   290  		name: "migrate system.namespace_deprecated entries into system.namespace",
   291  		// workFn:              migrateSystemNamespace,
   292  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.VersionNamespaceTableWithSchemas),
   293  	},
   294  	{
   295  		// Introduced in v20.1.
   296  		name:                "create system.role_options table",
   297  		workFn:              createRoleOptionsTable,
   298  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.VersionCreateRolePrivilege),
   299  		newDescriptorIDs:    staticIDs(keys.RoleOptionsTableID),
   300  	},
   301  	{
   302  		// Introduced in v20.1.
   303  		// TODO(andrei): Bake this migration into v20.2.
   304  		name: "create statement_diagnostics_requests, statement_diagnostics and " +
   305  			"system.statement_bundle_chunks tables",
   306  		workFn:              createStatementInfoSystemTables,
   307  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.VersionStatementDiagnosticsSystemTables),
   308  		newDescriptorIDs: staticIDs(keys.StatementBundleChunksTableID,
   309  			keys.StatementDiagnosticsRequestsTableID, keys.StatementDiagnosticsTableID),
   310  	},
   311  	{
   312  		// Introduced in v20.1.
   313  		name:                "remove public permissions on system.comments",
   314  		includedInBootstrap: clusterversion.VersionByKey(clusterversion.VersionSchemaChangeJob),
   315  		workFn:              depublicizeSystemComments,
   316  	},
   317  	{
   318  		// Introduced in v20.1.
   319  		name:   "add CREATEROLE privilege to admin/root",
   320  		workFn: addCreateRoleToAdminAndRoot,
   321  	},
   322  	{
   323  		// Introduced in v20.2.
   324  		name:   "add created_by columns to system.jobs",
   325  		workFn: alterSystemJobsAddCreatedByColumns,
   326  		includedInBootstrap: clusterversion.VersionByKey(
   327  			clusterversion.VersionAlterSystemJobsAddCreatedByColumns),
   328  	},
   329  }
   330  
   331  func staticIDs(
   332  	ids ...sqlbase.ID,
   333  ) func(ctx context.Context, db db, codec keys.SQLCodec) ([]sqlbase.ID, error) {
   334  	return func(ctx context.Context, db db, codec keys.SQLCodec) ([]sqlbase.ID, error) { return ids, nil }
   335  }
   336  
   337  func databaseIDs(
   338  	names ...string,
   339  ) func(ctx context.Context, db db, codec keys.SQLCodec) ([]sqlbase.ID, error) {
   340  	return func(ctx context.Context, db db, codec keys.SQLCodec) ([]sqlbase.ID, error) {
   341  		var ids []sqlbase.ID
   342  		for _, name := range names {
   343  			// This runs as part of an older migration (introduced in 2.1). We use
   344  			// the DeprecatedDatabaseKey, and let the 20.1 migration handle moving
   345  			// from the old namespace table into the new one.
   346  			kv, err := db.Get(ctx, sqlbase.NewDeprecatedDatabaseKey(name).Key(codec))
   347  			if err != nil {
   348  				return nil, err
   349  			}
   350  			ids = append(ids, sqlbase.ID(kv.ValueInt()))
   351  		}
   352  		return ids, nil
   353  	}
   354  }
   355  
   356  // migrationDescriptor describes a single migration hook that's used to modify
   357  // some part of the cluster state when the CockroachDB version is upgraded.
   358  // See docs/RFCs/cluster_upgrade_tool.md for details.
   359  type migrationDescriptor struct {
   360  	// name must be unique amongst all hard-coded migrations.
   361  	// ATTENTION: A migration's name can never be changed. It is included in a key
   362  	// marking a migration as completed.
   363  	name string
   364  	// workFn must be idempotent so that we can safely re-run it if a node failed
   365  	// while running it. nil if the migration has been "backed in" and is no
   366  	// longer to be performed at cluster startup.
   367  	workFn func(context.Context, runner) error
   368  	// includedInBootstrap is set for migrations that need to be performed for
   369  	// updating old clusters, but are also covered by the MetadataSchema that gets
   370  	// created by hand for a new cluster when it bootstraps itself. This kind of
   371  	// duplication between a migration and the MetadataSchema is useful for
   372  	// migrations that create system descriptor - for new clusters (particularly
   373  	// for tests) we want to create these tables by hand so that a corresponding
   374  	// range is created at bootstrap time. Otherwise, we'd have the split queue
   375  	// asynchronously creating some ranges which is annoying for tests.
   376  	//
   377  	// Generally when setting this field you'll want to introduce a new cluster
   378  	// version.
   379  	includedInBootstrap roachpb.Version
   380  	// doesBackfill should be set to true if the migration triggers a backfill.
   381  	doesBackfill bool
   382  	// clusterWide migrations are only run by the system tenant. All other
   383  	// migrations are run by each individual tenant. clusterWide migrations
   384  	// typically have to do with cluster settings, which is a cluster-wide
   385  	// concept.
   386  	clusterWide bool
   387  	// newDescriptorIDs is a function that returns the IDs of any additional
   388  	// descriptors that were added by this migration. This is needed to automate
   389  	// certain tests, which check the number of ranges/descriptors present on
   390  	// server bootup.
   391  	newDescriptorIDs func(ctx context.Context, db db, codec keys.SQLCodec) ([]sqlbase.ID, error)
   392  }
   393  
   394  func init() {
   395  	// Ensure that all migrations have unique names.
   396  	names := make(map[string]struct{}, len(backwardCompatibleMigrations))
   397  	for _, migration := range backwardCompatibleMigrations {
   398  		name := migration.name
   399  		if _, ok := names[name]; ok {
   400  			log.Fatalf(context.Background(), "duplicate sql migration %q", name)
   401  		}
   402  		names[name] = struct{}{}
   403  	}
   404  }
   405  
   406  type runner struct {
   407  	db          db
   408  	codec       keys.SQLCodec
   409  	sqlExecutor *sql.InternalExecutor
   410  	settings    *cluster.Settings
   411  }
   412  
   413  func (r runner) execAsRoot(ctx context.Context, opName, stmt string, qargs ...interface{}) error {
   414  	_, err := r.sqlExecutor.ExecEx(ctx, opName, nil, /* txn */
   415  		sqlbase.InternalExecutorSessionDataOverride{
   416  			User: security.RootUser,
   417  		},
   418  		stmt, qargs...)
   419  	return err
   420  }
   421  
   422  func (r runner) execAsRootWithRetry(
   423  	ctx context.Context, opName string, stmt string, qargs ...interface{},
   424  ) error {
   425  	// Retry a limited number of times because returning an error and letting
   426  	// the node kill itself is better than holding the migration lease for an
   427  	// arbitrarily long time.
   428  	var err error
   429  	for retry := retry.Start(retry.Options{MaxRetries: 5}); retry.Next(); {
   430  		err := r.execAsRoot(ctx, opName, stmt, qargs...)
   431  		if err == nil {
   432  			break
   433  		}
   434  		log.Warningf(ctx, "failed to run %s: %v", stmt, err)
   435  	}
   436  	return err
   437  }
   438  
   439  // leaseManager is defined just to allow us to use a fake client.LeaseManager
   440  // when testing this package.
   441  type leaseManager interface {
   442  	AcquireLease(ctx context.Context, key roachpb.Key) (*leasemanager.Lease, error)
   443  	ExtendLease(ctx context.Context, l *leasemanager.Lease) error
   444  	ReleaseLease(ctx context.Context, l *leasemanager.Lease) error
   445  	TimeRemaining(l *leasemanager.Lease) time.Duration
   446  }
   447  
   448  // db is defined just to allow us to use a fake client.DB when testing this
   449  // package.
   450  type db interface {
   451  	Scan(ctx context.Context, begin, end interface{}, maxRows int64) ([]kv.KeyValue, error)
   452  	Get(ctx context.Context, key interface{}) (kv.KeyValue, error)
   453  	Put(ctx context.Context, key, value interface{}) error
   454  	Txn(ctx context.Context, retryable func(ctx context.Context, txn *kv.Txn) error) error
   455  }
   456  
   457  // Manager encapsulates the necessary functionality for handling migrations
   458  // of data in the cluster.
   459  type Manager struct {
   460  	stopper      *stop.Stopper
   461  	leaseManager leaseManager
   462  	db           db
   463  	codec        keys.SQLCodec
   464  	sqlExecutor  *sql.InternalExecutor
   465  	testingKnobs MigrationManagerTestingKnobs
   466  	settings     *cluster.Settings
   467  	jobRegistry  *jobs.Registry
   468  }
   469  
   470  // NewManager initializes and returns a new Manager object.
   471  func NewManager(
   472  	stopper *stop.Stopper,
   473  	db *kv.DB,
   474  	codec keys.SQLCodec,
   475  	executor *sql.InternalExecutor,
   476  	clock *hlc.Clock,
   477  	testingKnobs MigrationManagerTestingKnobs,
   478  	clientID string,
   479  	settings *cluster.Settings,
   480  	registry *jobs.Registry,
   481  ) *Manager {
   482  	opts := leasemanager.Options{
   483  		ClientID:      clientID,
   484  		LeaseDuration: leaseDuration,
   485  	}
   486  	return &Manager{
   487  		stopper:      stopper,
   488  		leaseManager: leasemanager.New(db, clock, opts),
   489  		db:           db,
   490  		codec:        codec,
   491  		sqlExecutor:  executor,
   492  		testingKnobs: testingKnobs,
   493  		settings:     settings,
   494  		jobRegistry:  registry,
   495  	}
   496  }
   497  
   498  // ExpectedDescriptorIDs returns the list of all expected system descriptor IDs,
   499  // including those added by completed migrations. This is needed for certain
   500  // tests, which check the number of ranges and system tables at node startup.
   501  //
   502  // NOTE: This value may be out-of-date if another node is actively running
   503  // migrations, and so should only be used in test code where the migration
   504  // lifecycle is tightly controlled.
   505  func ExpectedDescriptorIDs(
   506  	ctx context.Context,
   507  	db db,
   508  	codec keys.SQLCodec,
   509  	defaultZoneConfig *zonepb.ZoneConfig,
   510  	defaultSystemZoneConfig *zonepb.ZoneConfig,
   511  ) (sqlbase.IDs, error) {
   512  	completedMigrations, err := getCompletedMigrations(ctx, db, codec)
   513  	if err != nil {
   514  		return nil, err
   515  	}
   516  	descriptorIDs := sqlbase.MakeMetadataSchema(codec, defaultZoneConfig, defaultSystemZoneConfig).DescriptorIDs()
   517  	for _, migration := range backwardCompatibleMigrations {
   518  		// Is the migration not creating descriptors?
   519  		if migration.newDescriptorIDs == nil ||
   520  			// Is the migration included in the metadata schema considered above?
   521  			(migration.includedInBootstrap != roachpb.Version{}) {
   522  			continue
   523  		}
   524  		if _, ok := completedMigrations[string(migrationKey(codec, migration))]; ok {
   525  			newIDs, err := migration.newDescriptorIDs(ctx, db, codec)
   526  			if err != nil {
   527  				return nil, err
   528  			}
   529  			descriptorIDs = append(descriptorIDs, newIDs...)
   530  		}
   531  	}
   532  	sort.Sort(descriptorIDs)
   533  	return descriptorIDs, nil
   534  }
   535  
   536  // EnsureMigrations should be run during node startup to ensure that all
   537  // required migrations have been run (and running all those that are definitely
   538  // safe to run).
   539  func (m *Manager) EnsureMigrations(ctx context.Context, bootstrapVersion roachpb.Version) error {
   540  	// First, check whether there are any migrations that need to be run.
   541  	completedMigrations, err := getCompletedMigrations(ctx, m.db, m.codec)
   542  	if err != nil {
   543  		return err
   544  	}
   545  	allMigrationsCompleted := true
   546  	for _, migration := range backwardCompatibleMigrations {
   547  		if !m.shouldRunMigration(migration, bootstrapVersion) {
   548  			continue
   549  		}
   550  		if m.testingKnobs.DisableBackfillMigrations && migration.doesBackfill {
   551  			log.Infof(ctx, "ignoring migrations after (and including) %s due to testing knob",
   552  				migration.name)
   553  			break
   554  		}
   555  		key := migrationKey(m.codec, migration)
   556  		if _, ok := completedMigrations[string(key)]; !ok {
   557  			allMigrationsCompleted = false
   558  		}
   559  	}
   560  	if allMigrationsCompleted {
   561  		return nil
   562  	}
   563  
   564  	// If there are any, grab the migration lease to ensure that only one
   565  	// node is ever doing migrations at a time.
   566  	// Note that we shouldn't ever let client.LeaseNotAvailableErrors cause us
   567  	// to stop trying, because if we return an error the server will be shut down,
   568  	// and this server being down may prevent the leaseholder from finishing.
   569  	var lease *leasemanager.Lease
   570  	if log.V(1) {
   571  		log.Info(ctx, "trying to acquire lease")
   572  	}
   573  	for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); {
   574  		lease, err = m.leaseManager.AcquireLease(ctx, m.codec.MigrationLeaseKey())
   575  		if err == nil {
   576  			break
   577  		}
   578  		log.Infof(ctx, "failed attempt to acquire migration lease: %s", err)
   579  	}
   580  	if err != nil {
   581  		return errors.Wrapf(err, "failed to acquire lease for running necessary migrations")
   582  	}
   583  
   584  	// Ensure that we hold the lease throughout the migration process and release
   585  	// it when we're done.
   586  	done := make(chan interface{}, 1)
   587  	defer func() {
   588  		done <- nil
   589  		if log.V(1) {
   590  			log.Info(ctx, "trying to release the lease")
   591  		}
   592  		if err := m.leaseManager.ReleaseLease(ctx, lease); err != nil {
   593  			log.Errorf(ctx, "failed to release migration lease: %s", err)
   594  		}
   595  	}()
   596  	if err := m.stopper.RunAsyncTask(ctx, "migrations.Manager: lease watcher",
   597  		func(ctx context.Context) {
   598  			select {
   599  			case <-done:
   600  				return
   601  			case <-time.After(leaseRefreshInterval):
   602  				if err := m.leaseManager.ExtendLease(ctx, lease); err != nil {
   603  					log.Warningf(ctx, "unable to extend ownership of expiration lease: %s", err)
   604  				}
   605  				if m.leaseManager.TimeRemaining(lease) < leaseRefreshInterval {
   606  					// Do one last final check of whether we're done - it's possible that
   607  					// ReleaseLease can sneak in and execute ahead of ExtendLease even if
   608  					// the ExtendLease started first (making for an unexpected value error),
   609  					// and doing this final check can avoid unintended shutdowns.
   610  					select {
   611  					case <-done:
   612  						return
   613  					default:
   614  						// Note that we may be able to do better than this by influencing the
   615  						// deadline of migrations' transactions based on the lease expiration
   616  						// time, but simply kill the process for now for the sake of simplicity.
   617  						log.Fatal(ctx, "not enough time left on migration lease, terminating for safety")
   618  					}
   619  				}
   620  			}
   621  		}); err != nil {
   622  		return err
   623  	}
   624  
   625  	// Re-get the list of migrations in case any of them were completed between
   626  	// our initial check and our grabbing of the lease.
   627  	completedMigrations, err = getCompletedMigrations(ctx, m.db, m.codec)
   628  	if err != nil {
   629  		return err
   630  	}
   631  
   632  	startTime := timeutil.Now().String()
   633  	r := runner{
   634  		db:          m.db,
   635  		codec:       m.codec,
   636  		sqlExecutor: m.sqlExecutor,
   637  		settings:    m.settings,
   638  	}
   639  	for _, migration := range backwardCompatibleMigrations {
   640  		if !m.shouldRunMigration(migration, bootstrapVersion) {
   641  			continue
   642  		}
   643  
   644  		key := migrationKey(m.codec, migration)
   645  		if _, ok := completedMigrations[string(key)]; ok {
   646  			continue
   647  		}
   648  
   649  		if m.testingKnobs.DisableBackfillMigrations && migration.doesBackfill {
   650  			log.Infof(ctx, "ignoring migrations after (and including) %s due to testing knob",
   651  				migration.name)
   652  			break
   653  		}
   654  
   655  		if log.V(1) {
   656  			log.Infof(ctx, "running migration %q", migration.name)
   657  		}
   658  		if err := migration.workFn(ctx, r); err != nil {
   659  			return errors.Wrapf(err, "failed to run migration %q", migration.name)
   660  		}
   661  
   662  		log.VEventf(ctx, 1, "persisting record of completing migration %s", migration.name)
   663  		if err := m.db.Put(ctx, key, startTime); err != nil {
   664  			return errors.Wrapf(err, "failed to persist record of completing migration %q",
   665  				migration.name)
   666  		}
   667  	}
   668  
   669  	return nil
   670  }
   671  
   672  func (m *Manager) shouldRunMigration(
   673  	migration migrationDescriptor, bootstrapVersion roachpb.Version,
   674  ) bool {
   675  	if migration.workFn == nil {
   676  		// The migration has been baked in.
   677  		return false
   678  	}
   679  	minVersion := migration.includedInBootstrap
   680  	if minVersion != (roachpb.Version{}) && !bootstrapVersion.Less(minVersion) {
   681  		// The migration is unnecessary.
   682  		return false
   683  	}
   684  	if migration.clusterWide && !m.codec.ForSystemTenant() {
   685  		// The migration is a cluster-wide migration and we are not the
   686  		// system tenant.
   687  		return false
   688  	}
   689  	return true
   690  }
   691  
   692  var schemaChangeJobMigrationName = "upgrade schema change job format"
   693  
   694  func schemaChangeJobMigrationKey(codec keys.SQLCodec) roachpb.Key {
   695  	return append(codec.MigrationKeyPrefix(), roachpb.RKey(schemaChangeJobMigrationName)...)
   696  }
   697  
   698  var systemNamespaceMigrationName = "upgrade system.namespace post-20.1-finalization"
   699  
   700  func systemNamespaceMigrationKey(codec keys.SQLCodec) roachpb.Key {
   701  	return append(codec.MigrationKeyPrefix(), roachpb.RKey(systemNamespaceMigrationName)...)
   702  }
   703  
   704  // schemaChangeJobMigrationKeyForTable returns a key prefixed with
   705  // schemaChangeJobMigrationKey for a specific table, to store the completion
   706  // status for adding a new job if the table was being added or needed to drain
   707  // names.
   708  func schemaChangeJobMigrationKeyForTable(codec keys.SQLCodec, tableID sqlbase.ID) roachpb.Key {
   709  	return encoding.EncodeUint32Ascending(schemaChangeJobMigrationKey(codec), uint32(tableID))
   710  }
   711  
   712  // StartSchemaChangeJobMigration starts an async task to run the migration that
   713  // upgrades 19.2-style jobs to the 20.1 job format, so that the jobs can be
   714  // adopted by the job registry. The task first waits until the upgrade to 20.1
   715  // is finalized before running the migration. The migration is retried until
   716  // it succeeds (on any node).
   717  func (m *Manager) StartSchemaChangeJobMigration(ctx context.Context) error {
   718  	migrationKey := schemaChangeJobMigrationKey(m.codec)
   719  	return m.stopper.RunAsyncTask(ctx, "run-schema-change-job-migration", func(ctx context.Context) {
   720  		log.Info(ctx, "starting wait for upgrade finalization before schema change job migration")
   721  		// First wait for the cluster to finalize the upgrade to 20.1. These values
   722  		// were chosen to be similar to the retry loop for finalizing the cluster
   723  		// upgrade.
   724  		waitRetryOpts := retry.Options{
   725  			InitialBackoff: 10 * time.Second,
   726  			MaxBackoff:     10 * time.Second,
   727  			Closer:         m.stopper.ShouldQuiesce(),
   728  		}
   729  		for retry := retry.StartWithCtx(ctx, waitRetryOpts); retry.Next(); {
   730  			if m.settings.Version.IsActive(ctx, clusterversion.VersionSchemaChangeJob) {
   731  				break
   732  			}
   733  		}
   734  		select {
   735  		case <-m.stopper.ShouldQuiesce():
   736  			return
   737  		default:
   738  		}
   739  		log.VEventf(ctx, 2, "detected upgrade finalization")
   740  
   741  		if !m.testingKnobs.AlwaysRunJobMigration {
   742  			// Check whether this migration has already been completed.
   743  			if kv, err := m.db.Get(ctx, migrationKey); err != nil {
   744  				log.Infof(ctx, "error getting record of schema change job migration: %s", err.Error())
   745  			} else if kv.Exists() {
   746  				log.Infof(ctx, "schema change job migration already complete")
   747  				return
   748  			}
   749  		}
   750  
   751  		// Now run the migration. This is retried indefinitely until it finishes.
   752  		log.Infof(ctx, "starting schema change job migration")
   753  		r := runner{
   754  			db:          m.db,
   755  			codec:       m.codec,
   756  			sqlExecutor: m.sqlExecutor,
   757  			settings:    m.settings,
   758  		}
   759  		migrationRetryOpts := retry.Options{
   760  			InitialBackoff: 1 * time.Minute,
   761  			MaxBackoff:     10 * time.Minute,
   762  			Closer:         m.stopper.ShouldQuiesce(),
   763  		}
   764  		startTime := timeutil.Now().String()
   765  		for migRetry := retry.Start(migrationRetryOpts); migRetry.Next(); {
   766  			migrateCtx, _ := m.stopper.WithCancelOnQuiesce(context.Background())
   767  			migrateCtx = logtags.AddTag(migrateCtx, "schema-change-job-migration", nil)
   768  			if err := migrateSchemaChangeJobs(migrateCtx, r, m.jobRegistry); err != nil {
   769  				log.Errorf(ctx, "error attempting running schema change job migration, will retry: %s %s", err.Error(), startTime)
   770  				continue
   771  			}
   772  			log.Infof(ctx, "schema change job migration completed")
   773  			if err := m.db.Put(ctx, migrationKey, startTime); err != nil {
   774  				log.Warningf(ctx, "error persisting record of schema change job migration, will retry: %s", err.Error())
   775  			}
   776  			break
   777  		}
   778  		if fn := m.testingKnobs.AfterJobMigration; fn != nil {
   779  			fn()
   780  		}
   781  	})
   782  }
   783  
   784  var systemNamespaceMigrationEnabled = settings.RegisterBoolSetting(
   785  	"testing.system_namespace_migration.enabled",
   786  	"internal testing only: disable the system namespace migration",
   787  	true,
   788  )
   789  
   790  // StartSystemNamespaceMigration starts an async task to run the migration that
   791  // migrates entries from system.namespace (descriptor 2) to system.namespace2
   792  // (descriptor 30). The task first waits until the upgrade to 20.1 is finalized
   793  // before running the migration. The migration is retried until it succeeds (on
   794  // any node).
   795  func (m *Manager) StartSystemNamespaceMigration(
   796  	ctx context.Context, bootstrapVersion roachpb.Version,
   797  ) error {
   798  	if !bootstrapVersion.Less(clusterversion.VersionByKey(clusterversion.VersionNamespaceTableWithSchemas)) {
   799  		// Our bootstrap version is equal to or greater than 20.1, where no old
   800  		// namespace table is created: we can skip this migration.
   801  		return nil
   802  	}
   803  	return m.stopper.RunAsyncTask(ctx, "run-system-namespace-migration", func(ctx context.Context) {
   804  		log.Info(ctx, "starting wait for upgrade finalization before system.namespace migration")
   805  		// First wait for the cluster to finalize the upgrade to 20.1. These values
   806  		// were chosen to be similar to the retry loop for finalizing the cluster
   807  		// upgrade.
   808  		waitRetryOpts := retry.Options{
   809  			InitialBackoff: 10 * time.Second,
   810  			MaxBackoff:     10 * time.Second,
   811  			Closer:         m.stopper.ShouldQuiesce(),
   812  		}
   813  		for retry := retry.StartWithCtx(ctx, waitRetryOpts); retry.Next(); {
   814  			if !systemNamespaceMigrationEnabled.Get(&m.settings.SV) {
   815  				continue
   816  			}
   817  			if m.settings.Version.IsActive(ctx, clusterversion.VersionNamespaceTableWithSchemas) {
   818  				break
   819  			}
   820  		}
   821  		select {
   822  		case <-m.stopper.ShouldQuiesce():
   823  			return
   824  		default:
   825  		}
   826  		log.VEventf(ctx, 2, "detected upgrade finalization for system.namespace migration")
   827  
   828  		migrationKey := systemNamespaceMigrationKey(m.codec)
   829  		// Check whether this migration has already been completed.
   830  		if kv, err := m.db.Get(ctx, migrationKey); err != nil {
   831  			log.Infof(ctx, "error getting record of system.namespace migration: %s", err.Error())
   832  		} else if kv.Exists() {
   833  			log.Infof(ctx, "system.namespace migration already complete")
   834  			return
   835  		}
   836  
   837  		// Now run the migration. This is retried indefinitely until it finishes.
   838  		log.Infof(ctx, "starting system.namespace migration")
   839  		r := runner{
   840  			db:          m.db,
   841  			codec:       m.codec,
   842  			sqlExecutor: m.sqlExecutor,
   843  			settings:    m.settings,
   844  		}
   845  		migrationRetryOpts := retry.Options{
   846  			InitialBackoff: 1 * time.Minute,
   847  			MaxBackoff:     10 * time.Minute,
   848  			Closer:         m.stopper.ShouldQuiesce(),
   849  		}
   850  		startTime := timeutil.Now().String()
   851  		for migRetry := retry.Start(migrationRetryOpts); migRetry.Next(); {
   852  			if err := m.migrateSystemNamespace(ctx, migrationKey, r, startTime); err != nil {
   853  				log.Errorf(ctx, "error attempting running system.namespace migration, will retry: %s %s", err.Error(),
   854  					startTime)
   855  				continue
   856  			}
   857  			break
   858  		}
   859  	})
   860  }
   861  
   862  // migrateSystemNamespace migrates entries from the deprecated system.namespace
   863  // table to the new one, which includes a parentSchemaID column. Each database
   864  // entry is copied to the new table along with a corresponding entry for the
   865  // 'public' schema. Each table entry is copied over with the public schema as
   866  // as its parentSchemaID.
   867  //
   868  // Only entries that do not exist in the new table are copied.
   869  //
   870  // New database and table entries continue to be written to the deprecated
   871  // namespace table until VersionNamespaceTableWithSchemas is active. This means
   872  // that an additional migration will be necessary in 20.2 to catch any new
   873  // entries which may have been missed by this one. In the meantime, namespace
   874  // lookups fall back to the deprecated table if a name is not found in the new
   875  // one.
   876  func (m *Manager) migrateSystemNamespace(
   877  	ctx context.Context, migrationKey roachpb.Key, r runner, startTime string,
   878  ) error {
   879  	migrateCtx, cancel := m.stopper.WithCancelOnQuiesce(ctx)
   880  	defer cancel()
   881  	migrateCtx = logtags.AddTag(migrateCtx, "system-namespace-migration", nil)
   882  	// Loop until there's no more work to be done.
   883  	workLeft := true
   884  	for workLeft {
   885  		if err := m.db.Txn(migrateCtx, func(ctx context.Context, txn *kv.Txn) error {
   886  			// Check again to see if someone else wrote the migration key.
   887  			if kv, err := txn.Get(ctx, migrationKey); err != nil {
   888  				log.Infof(ctx, "error getting record of system.namespace migration: %s", err.Error())
   889  				// Retry the migration.
   890  				return err
   891  			} else if kv.Exists() {
   892  				// Give up, no work to be done.
   893  				log.Infof(ctx, "system.namespace migration already complete")
   894  				return nil
   895  			}
   896  			// Fetch all entries that are not present in the new namespace table. Each
   897  			// of these entries will be copied to the new table.
   898  			//
   899  			// Note that we are very careful to always delete from both namespace tables
   900  			// in 20.1, so there's no possibility that we'll be overwriting a deleted
   901  			// table that existed in the old table and the new table but was deleted
   902  			// from only the new table.
   903  			const batchSize = 1000
   904  			q := fmt.Sprintf(
   905  				`SELECT "parentID", name, id FROM [%d AS namespace_deprecated]
   906                WHERE id NOT IN (SELECT id FROM [%d AS namespace]) LIMIT %d`,
   907  				sqlbase.DeprecatedNamespaceTable.ID, sqlbase.NamespaceTable.ID, batchSize+1)
   908  			rows, err := r.sqlExecutor.QueryEx(
   909  				ctx, "read-deprecated-namespace-table", txn,
   910  				sqlbase.InternalExecutorSessionDataOverride{
   911  					User: security.RootUser,
   912  				},
   913  				q)
   914  			if err != nil {
   915  				return err
   916  			}
   917  			log.Infof(ctx, "Migrating system.namespace chunk with %d rows", len(rows))
   918  			for i, row := range rows {
   919  				workLeft = false
   920  				// We found some rows from the query, which means that we can't quit
   921  				// just yet.
   922  				if i >= batchSize {
   923  					workLeft = true
   924  					// Just process 1000 rows at a time.
   925  					break
   926  				}
   927  				parentID := sqlbase.ID(tree.MustBeDInt(row[0]))
   928  				name := string(tree.MustBeDString(row[1]))
   929  				id := sqlbase.ID(tree.MustBeDInt(row[2]))
   930  				if parentID == keys.RootNamespaceID {
   931  					// This row represents a database. Add it to the new namespace table.
   932  					databaseKey := sqlbase.NewDatabaseKey(name)
   933  					if err := txn.Put(ctx, databaseKey.Key(r.codec), id); err != nil {
   934  						return err
   935  					}
   936  					// Also create a 'public' schema for this database.
   937  					schemaKey := sqlbase.NewSchemaKey(id, "public")
   938  					log.VEventf(ctx, 2, "Migrating system.namespace entry for database %s", name)
   939  					if err := txn.Put(ctx, schemaKey.Key(r.codec), keys.PublicSchemaID); err != nil {
   940  						return err
   941  					}
   942  				} else {
   943  					// This row represents a table. Add it to the new namespace table with the
   944  					// schema set to 'public'.
   945  					if id == keys.DeprecatedNamespaceTableID {
   946  						// The namespace table itself was already handled in
   947  						// createNewSystemNamespaceDescriptor. Do not overwrite it with the
   948  						// deprecated ID.
   949  						continue
   950  					}
   951  					tableKey := sqlbase.NewTableKey(parentID, keys.PublicSchemaID, name)
   952  					log.VEventf(ctx, 2, "Migrating system.namespace entry for table %s", name)
   953  					if err := txn.Put(ctx, tableKey.Key(r.codec), id); err != nil {
   954  						return err
   955  					}
   956  				}
   957  			}
   958  			return nil
   959  		}); err != nil {
   960  			return err
   961  		}
   962  	}
   963  	// No more work to be done.
   964  	log.Infof(migrateCtx, "system.namespace migration completed")
   965  	if err := m.db.Put(migrateCtx, migrationKey, startTime); err != nil {
   966  		log.Warningf(migrateCtx, "error persisting record of system.namespace migration, will retry: %s", err.Error())
   967  		return err
   968  	}
   969  	return nil
   970  }
   971  
   972  // migrateSchemaChangeJobs runs the schema change job migration. The migration
   973  // has two steps. In the first step, we scan the jobs table for all
   974  // non-Succeeded jobs; for each job, it looks up the associated table and uses
   975  // the table descriptor state to update the job payload appropriately. For jobs
   976  // that are waiting for GC for dropped tables, indexes, etc., we mark the
   977  // existing job as completed and create a new GC job. In the second step, we
   978  // get all the descriptors and all running jobs, and create a new job for all
   979  // tables that are either in the ADD state or have draining names but which
   980  // have no running jobs, since tables in those states in 19.2 would have been
   981  // processed by the schema changer.
   982  func migrateSchemaChangeJobs(ctx context.Context, r runner, registry *jobs.Registry) error {
   983  	// Get all jobs that aren't Succeeded and evaluate whether they need a
   984  	// migration. (Jobs that are canceled in 19.2 could still have in-progress
   985  	// schema changes.)
   986  	rows, err := r.sqlExecutor.QueryEx(
   987  		ctx, "jobs-for-migration", nil, /* txn */
   988  		sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser},
   989  		"SELECT id, payload FROM system.jobs WHERE status != $1", jobs.StatusSucceeded,
   990  	)
   991  	if err != nil {
   992  		return err
   993  	}
   994  	for _, row := range rows {
   995  		jobID := int64(tree.MustBeDInt(row[0]))
   996  		log.VEventf(ctx, 2, "job %d: evaluating for schema change job migration", jobID)
   997  
   998  		payload, err := jobs.UnmarshalPayload(row[1])
   999  		if err != nil {
  1000  			return err
  1001  		}
  1002  		if details := payload.GetSchemaChange(); details == nil ||
  1003  			details.FormatVersion > jobspb.BaseFormatVersion {
  1004  			continue
  1005  		}
  1006  
  1007  		log.Infof(ctx, "job %d: undergoing schema change job migration", jobID)
  1008  
  1009  		if err := r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1010  			// Read the job again inside the transaction. If the job was already
  1011  			// upgraded, we don't have to do anything else.
  1012  			job, err := registry.LoadJobWithTxn(ctx, jobID, txn)
  1013  			if err != nil {
  1014  				// The job could have been GC'ed in the meantime.
  1015  				if jobs.HasJobNotFoundError(err) {
  1016  					return nil
  1017  				}
  1018  				return err
  1019  			}
  1020  			payload := job.Payload()
  1021  			details := payload.GetSchemaChange()
  1022  			if details.FormatVersion > jobspb.BaseFormatVersion {
  1023  				return nil
  1024  			}
  1025  
  1026  			// Determine whether the job is for dropping a database/table. Note that
  1027  			// DroppedTables is always populated in 19.2 for all jobs that drop
  1028  			// tables.
  1029  			if len(details.DroppedTables) > 0 {
  1030  				return migrateDropTablesOrDatabaseJob(ctx, txn, r.codec, registry, job)
  1031  			}
  1032  
  1033  			descIDs := job.Payload().DescriptorIDs
  1034  			// All other jobs have exactly 1 associated descriptor ID (for a table),
  1035  			// and correspond to a schema change with a mutation.
  1036  			if len(descIDs) != 1 {
  1037  				return errors.AssertionFailedf(
  1038  					"job %d: could not be migrated due to unexpected descriptor IDs %v", *job.ID(), descIDs)
  1039  			}
  1040  			descID := descIDs[0]
  1041  			tableDesc, err := sqlbase.GetTableDescFromID(ctx, txn, r.codec, descID)
  1042  			if err != nil {
  1043  				return err
  1044  			}
  1045  			return migrateMutationJobForTable(ctx, txn, registry, job, tableDesc)
  1046  		}); err != nil {
  1047  			return err
  1048  		}
  1049  		log.Infof(ctx, "job %d: completed schema change job migration", jobID)
  1050  	}
  1051  
  1052  	// Finally, we iterate through all table descriptors and jobs, and create jobs
  1053  	// for any tables in the ADD state or that have draining names that don't
  1054  	// already have jobs. We also create a GC job for all tables in the DROP state
  1055  	// with no associated schema change or GC job, which can result from failed
  1056  	// IMPORT and RESTORE jobs whose table data wasn't fully GC'ed.
  1057  	//
  1058  	// We start by getting all descriptors and all running jobs in a single
  1059  	// transaction. Each eligible table then gets a job created for it, each in a
  1060  	// separate transaction; in each of those transactions, we write a table-
  1061  	// specific KV with a key prefixed by schemaChangeJobMigrationKey to try to
  1062  	// prevent more than one such job from being created for the table.
  1063  	//
  1064  	// This process ensures that every table that entered into one of these
  1065  	// intermediate states (being added/dropped, or having draining names) in 19.2
  1066  	// will have a job created for it in 20.1, so that the table can finish being
  1067  	// processed. It's not essential for only one job to be created for each
  1068  	// table, since a redundant schema change job is a no-op, but we make an
  1069  	// effort to do that anyway.
  1070  	//
  1071  	// There are probably more efficient ways to do this part of the migration,
  1072  	// but the current approach seemed like the most straightforward.
  1073  	var allDescs []sqlbase.DescriptorProto
  1074  	schemaChangeJobsForDesc := make(map[sqlbase.ID][]int64)
  1075  	gcJobsForDesc := make(map[sqlbase.ID][]int64)
  1076  	if err := r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1077  		descs, err := catalogkv.GetAllDescriptors(ctx, txn, r.codec)
  1078  		if err != nil {
  1079  			return err
  1080  		}
  1081  		allDescs = descs
  1082  
  1083  		// Get all running schema change jobs.
  1084  		rows, err := r.sqlExecutor.QueryEx(
  1085  			ctx, "preexisting-jobs", txn,
  1086  			sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser},
  1087  			"SELECT id, payload FROM system.jobs WHERE status = $1", jobs.StatusRunning,
  1088  		)
  1089  		if err != nil {
  1090  			return err
  1091  		}
  1092  		for _, row := range rows {
  1093  			jobID := int64(tree.MustBeDInt(row[0]))
  1094  			payload, err := jobs.UnmarshalPayload(row[1])
  1095  			if err != nil {
  1096  				return err
  1097  			}
  1098  			if details := payload.GetSchemaChange(); details != nil {
  1099  				if details.FormatVersion < jobspb.JobResumerFormatVersion {
  1100  					continue
  1101  				}
  1102  				if details.TableID != sqlbase.InvalidID {
  1103  					schemaChangeJobsForDesc[details.TableID] = append(schemaChangeJobsForDesc[details.TableID], jobID)
  1104  				} else {
  1105  					for _, t := range details.DroppedTables {
  1106  						schemaChangeJobsForDesc[t.ID] = append(schemaChangeJobsForDesc[t.ID], jobID)
  1107  					}
  1108  				}
  1109  			} else if details := payload.GetSchemaChangeGC(); details != nil {
  1110  				for _, t := range details.Tables {
  1111  					gcJobsForDesc[t.ID] = append(gcJobsForDesc[t.ID], jobID)
  1112  				}
  1113  			}
  1114  		}
  1115  		return nil
  1116  	}); err != nil {
  1117  		return err
  1118  	}
  1119  
  1120  	createSchemaChangeJobForTable := func(txn *kv.Txn, desc *sqlbase.TableDescriptor) error {
  1121  		var description string
  1122  		if desc.Adding() {
  1123  			description = fmt.Sprintf("adding table %d", desc.ID)
  1124  		} else if desc.HasDrainingNames() {
  1125  			description = fmt.Sprintf("draining names for table %d", desc.ID)
  1126  		} else {
  1127  			// This shouldn't be possible, but if it happens, it would be
  1128  			// appropriate to do nothing without returning an error.
  1129  			log.Warningf(
  1130  				ctx,
  1131  				"tried to add schema change job for table %d which is neither being added nor has draining names",
  1132  				desc.ID,
  1133  			)
  1134  			return nil
  1135  		}
  1136  		record := jobs.Record{
  1137  			Description:   description,
  1138  			Username:      security.NodeUser,
  1139  			DescriptorIDs: sqlbase.IDs{desc.ID},
  1140  			Details: jobspb.SchemaChangeDetails{
  1141  				TableID:       desc.ID,
  1142  				FormatVersion: jobspb.JobResumerFormatVersion,
  1143  			},
  1144  			Progress:      jobspb.SchemaChangeProgress{},
  1145  			NonCancelable: true,
  1146  		}
  1147  		job, err := registry.CreateJobWithTxn(ctx, record, txn)
  1148  		if err != nil {
  1149  			return err
  1150  		}
  1151  		log.Infof(ctx, "migration created new schema change job %d: %s", *job.ID(), description)
  1152  		return nil
  1153  	}
  1154  
  1155  	createGCJobForTable := func(txn *kv.Txn, desc *sqlbase.TableDescriptor) error {
  1156  		record := sql.CreateGCJobRecord(
  1157  			fmt.Sprintf("table %d", desc.ID),
  1158  			security.NodeUser,
  1159  			jobspb.SchemaChangeGCDetails{
  1160  				Tables: []jobspb.SchemaChangeGCDetails_DroppedID{{ID: desc.ID, DropTime: desc.DropTime}},
  1161  			})
  1162  		job, err := registry.CreateJobWithTxn(ctx, record, txn)
  1163  		if err != nil {
  1164  			return err
  1165  		}
  1166  		log.Infof(ctx, "migration created new GC job %d for table %d", *job.ID(), desc.ID)
  1167  		return nil
  1168  	}
  1169  
  1170  	log.Infof(ctx, "evaluating tables for creating jobs")
  1171  	for _, desc := range allDescs {
  1172  		switch desc := desc.(type) {
  1173  		case *sqlbase.TableDescriptor:
  1174  			if scJobs := schemaChangeJobsForDesc[desc.ID]; len(scJobs) > 0 {
  1175  				log.VEventf(ctx, 3, "table %d has running schema change jobs %v, skipping", desc.ID, scJobs)
  1176  				continue
  1177  			} else if gcJobs := gcJobsForDesc[desc.ID]; len(gcJobs) > 0 {
  1178  				log.VEventf(ctx, 3, "table %d has running GC jobs %v, skipping", desc.ID, gcJobs)
  1179  				continue
  1180  			}
  1181  			if !desc.Adding() && !desc.Dropped() && !desc.HasDrainingNames() {
  1182  				log.VEventf(ctx, 3,
  1183  					"table %d is not being added or dropped and does not have draining names, skipping",
  1184  					desc.ID,
  1185  				)
  1186  				continue
  1187  			}
  1188  
  1189  			if err := r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1190  				key := schemaChangeJobMigrationKeyForTable(r.codec, desc.ID)
  1191  				startTime := timeutil.Now().String()
  1192  				if kv, err := txn.Get(ctx, key); err != nil {
  1193  					return err
  1194  				} else if kv.Exists() {
  1195  					log.VEventf(ctx, 3, "table %d already processed in migration", desc.ID)
  1196  					return nil
  1197  				}
  1198  				if desc.Adding() || desc.HasDrainingNames() {
  1199  					if err := createSchemaChangeJobForTable(txn, desc); err != nil {
  1200  						return err
  1201  					}
  1202  				} else if desc.Dropped() {
  1203  					// Note that a table can be both in the DROP state and have draining
  1204  					// names. In that case it was enough to just create a schema change
  1205  					// job, as in the case above, because that job will itself create a
  1206  					// GC job.
  1207  					if err := createGCJobForTable(txn, desc); err != nil {
  1208  						return err
  1209  					}
  1210  				}
  1211  				if err := txn.Put(ctx, key, startTime); err != nil {
  1212  					return err
  1213  				}
  1214  				return nil
  1215  			}); err != nil {
  1216  				return err
  1217  			}
  1218  		case *sqlbase.DatabaseDescriptor:
  1219  			// Do nothing.
  1220  		}
  1221  	}
  1222  
  1223  	return nil
  1224  }
  1225  
  1226  // migrateMutationJobForTable handles migrating jobs associated with mutations
  1227  // on a table, each of which is stored in MutationJobs. (This includes adding
  1228  // and dropping columns, indexes, and constraints, as well as primary key
  1229  // changes.) This function also handles jobs for indexes waiting for GC,
  1230  // stored in GCMutations.
  1231  func migrateMutationJobForTable(
  1232  	ctx context.Context,
  1233  	txn *kv.Txn,
  1234  	registry *jobs.Registry,
  1235  	job *jobs.Job,
  1236  	tableDesc *sql.TableDescriptor,
  1237  ) error {
  1238  	log.VEventf(ctx, 2, "job %d: undergoing migration as mutation job for table %d", *job.ID(), tableDesc.ID)
  1239  
  1240  	// Check whether the job is for a mutation. There can be multiple mutations
  1241  	// with the same ID that correspond to the same job, but we just need to
  1242  	// look at one, since all the mutations with the same ID get state updates
  1243  	// in the same transaction.
  1244  	for i := range tableDesc.MutationJobs {
  1245  		mutationJob := &tableDesc.MutationJobs[i]
  1246  		if mutationJob.JobID != *job.ID() {
  1247  			continue
  1248  		}
  1249  		log.VEventf(
  1250  			ctx, 2, "job %d: found corresponding MutationJob %d on table %d",
  1251  			*job.ID(), mutationJob.MutationID, tableDesc.ID,
  1252  		)
  1253  		var mutation *sqlbase.DescriptorMutation
  1254  		for i := range tableDesc.Mutations {
  1255  			if tableDesc.Mutations[i].MutationID == mutationJob.MutationID {
  1256  				mutation = &tableDesc.Mutations[i]
  1257  				break
  1258  			}
  1259  		}
  1260  		if mutation == nil {
  1261  			// In theory, MutationJobs[i] corresponds to Mutations[i] in 19.2 and
  1262  			// earlier versions, so this should never happen. However, we've seen this
  1263  			// happen (#48786), so we have to be defensive.
  1264  			mutationNotFoundError := errors.AssertionFailedf("mutation %d not found for MutationJob %d",
  1265  				mutationJob.MutationID, mutationJob.JobID)
  1266  			log.Errorf(ctx, "%v", mutationNotFoundError)
  1267  			return registry.Failed(ctx, txn, *job.ID(), mutationNotFoundError)
  1268  		}
  1269  
  1270  		// Update the job details and status based on the table descriptor
  1271  		// state.
  1272  		if err := job.WithTxn(txn).Update(ctx, func(txn *kv.Txn, md jobs.JobMetadata, ju *jobs.JobUpdater) error {
  1273  			// Update the job details with the table and mutation IDs.
  1274  			details := md.Payload.GetSchemaChange()
  1275  			details.TableID = tableDesc.ID
  1276  			details.MutationID = mutationJob.MutationID
  1277  			details.FormatVersion = jobspb.JobResumerFormatVersion
  1278  			md.Payload.Details = jobspb.WrapPayloadDetails(*details)
  1279  
  1280  			log.VEventf(ctx, 2, "job %d: updating details to %+v", *job.ID(), details)
  1281  
  1282  			// Also give the job a non-nil expired lease to indicate that the job
  1283  			// is adoptable.
  1284  			md.Payload.Lease = &jobspb.Lease{}
  1285  			ju.UpdatePayload(md.Payload)
  1286  
  1287  			// If the mutation exists on the table descriptor, then the schema
  1288  			// change isn't actually in a terminal state, regardless of what the
  1289  			// job status is. So we force the status to Reverting if there's any
  1290  			// indication that it failed or was canceled, and otherwise force the
  1291  			// state to Running.
  1292  			shouldRevert := md.Status == jobs.StatusFailed || md.Status == jobs.StatusCanceled ||
  1293  				md.Status == jobs.StatusReverting || md.Status == jobs.StatusCancelRequested
  1294  			previousStatus := md.Status
  1295  			if mutation.Rollback || shouldRevert {
  1296  				md.Status = jobs.StatusReverting
  1297  			} else {
  1298  				md.Status = jobs.StatusRunning
  1299  			}
  1300  			log.VEventf(ctx, 2, "job %d: updating status from %s to %s", *job.ID(), previousStatus, md.Status)
  1301  			ju.UpdateStatus(md.Status)
  1302  			return nil
  1303  		}); err != nil {
  1304  			return err
  1305  		}
  1306  		log.Infof(
  1307  			ctx, "job %d: successfully migrated for table %d, mutation %d",
  1308  			*job.ID(), tableDesc.ID, mutationJob.MutationID,
  1309  		)
  1310  		return nil
  1311  	}
  1312  
  1313  	// If not a mutation, check whether the job corresponds to a GCMutation.
  1314  	// This indicates that the job must be in the "waiting for GC TTL" state.
  1315  	// In that case, we mark the job as succeeded and create a new job for GC.
  1316  	for i := range tableDesc.GCMutations {
  1317  		gcMutation := &tableDesc.GCMutations[i]
  1318  		// JobID and dropTime are populated only in 19.2 and earlier versions.
  1319  		if gcMutation.JobID != *job.ID() {
  1320  			continue
  1321  		}
  1322  		log.VEventf(
  1323  			ctx, 2, "job %d: found corresponding index GC mutation for index %d on table %d",
  1324  			*job.ID(), gcMutation.IndexID, tableDesc.ID,
  1325  		)
  1326  		if err := registry.Succeeded(ctx, txn, *job.ID()); err != nil {
  1327  			return err
  1328  		}
  1329  		log.VEventf(ctx, 2, "job %d: marked as succeeded", *job.ID())
  1330  
  1331  		indexGCJobRecord := sql.CreateGCJobRecord(
  1332  			job.Payload().Description,
  1333  			job.Payload().Username,
  1334  			jobspb.SchemaChangeGCDetails{
  1335  				Indexes: []jobspb.SchemaChangeGCDetails_DroppedIndex{
  1336  					{
  1337  						IndexID:  gcMutation.IndexID,
  1338  						DropTime: gcMutation.DropTime,
  1339  					},
  1340  				},
  1341  				ParentID: tableDesc.GetID(),
  1342  			},
  1343  		)
  1344  		// The new job ID won't be written to GCMutations, which is fine because
  1345  		// we don't read the job ID in 20.1 for anything except this migration.
  1346  		newJob, err := registry.CreateJobWithTxn(ctx, indexGCJobRecord, txn)
  1347  		if err != nil {
  1348  			return err
  1349  		}
  1350  		log.Infof(ctx,
  1351  			"migration marked drop table job %d as successful, created GC job %d",
  1352  			*job.ID(), *newJob.ID(),
  1353  		)
  1354  		return nil
  1355  	}
  1356  
  1357  	// If the job isn't in MutationJobs or GCMutations, it's likely just a
  1358  	// failed or canceled job that was successfully cleaned up. Check for this,
  1359  	// and return an error if this is not the case.
  1360  	status, err := job.CurrentStatus(ctx)
  1361  	if err != nil {
  1362  		return err
  1363  	}
  1364  	if status == jobs.StatusCanceled || status == jobs.StatusFailed {
  1365  		return nil
  1366  	}
  1367  	return errors.Newf(
  1368  		"job %d: no corresponding mutation found on table %d during migration", *job.ID(), tableDesc.ID)
  1369  }
  1370  
  1371  // migrateDropTablesOrDatabaseJob handles migrating any jobs that require
  1372  // dropping a table, including dropping tables, views, sequences, and
  1373  // databases, as well as truncating tables.
  1374  func migrateDropTablesOrDatabaseJob(
  1375  	ctx context.Context, txn *kv.Txn, codec keys.SQLCodec, registry *jobs.Registry, job *jobs.Job,
  1376  ) error {
  1377  	payload := job.Payload()
  1378  	details := payload.GetSchemaChange()
  1379  	log.VEventf(ctx, 2,
  1380  		"job %d: undergoing migration as drop table/database job for tables %+v",
  1381  		*job.ID(), details.DroppedTables,
  1382  	)
  1383  
  1384  	if job.Progress().RunningStatus == string(sql.RunningStatusDrainingNames) {
  1385  		// If the job is draining names, the schema change job resumer will handle
  1386  		// it. Just update the job details.
  1387  		if err := job.WithTxn(txn).Update(ctx, func(txn *kv.Txn, md jobs.JobMetadata, ju *jobs.JobUpdater) error {
  1388  			if len(details.DroppedTables) == 1 {
  1389  				details.TableID = details.DroppedTables[0].ID
  1390  			}
  1391  			details.FormatVersion = jobspb.JobResumerFormatVersion
  1392  			md.Payload.Details = jobspb.WrapPayloadDetails(*details)
  1393  
  1394  			log.VEventf(ctx, 2, "job %d: updating details to %+v", *job.ID(), details)
  1395  
  1396  			// Also give the job a non-nil expired lease to indicate that the job
  1397  			// is adoptable.
  1398  			md.Payload.Lease = &jobspb.Lease{}
  1399  			ju.UpdatePayload(md.Payload)
  1400  			return nil
  1401  		}); err != nil {
  1402  			return err
  1403  		}
  1404  		log.Infof(ctx, "job %d: successfully migrated in draining names state", *job.ID())
  1405  		return nil
  1406  	}
  1407  
  1408  	// Otherwise, the job is in the "waiting for GC TTL" phase. In this case, we
  1409  	// mark the present job as Succeeded and create a new GC job.
  1410  
  1411  	// TODO (lucy/paul): In the case of multiple tables, is it a problem if some
  1412  	// of the tables have already been GC'ed at this point? In 19.2, each table
  1413  	// advances separately through the stages of being dropped, so it should be
  1414  	// possible for some tables to still be draining names while others have
  1415  	// already undergone GC.
  1416  
  1417  	if err := registry.Succeeded(ctx, txn, *job.ID()); err != nil {
  1418  		return err
  1419  	}
  1420  	log.VEventf(ctx, 2, "job %d: marked as succeeded", *job.ID())
  1421  
  1422  	tablesToDrop := make([]jobspb.SchemaChangeGCDetails_DroppedID, len(details.DroppedTables))
  1423  	for i := range details.DroppedTables {
  1424  		tableID := details.DroppedTables[i].ID
  1425  		tablesToDrop[i].ID = details.DroppedTables[i].ID
  1426  		desc, err := sqlbase.GetTableDescFromID(ctx, txn, codec, tableID)
  1427  		if err != nil {
  1428  			return err
  1429  		}
  1430  		tablesToDrop[i].DropTime = desc.DropTime
  1431  	}
  1432  	gcJobRecord := sql.CreateGCJobRecord(
  1433  		job.Payload().Description,
  1434  		job.Payload().Username,
  1435  		jobspb.SchemaChangeGCDetails{
  1436  			Tables:   tablesToDrop,
  1437  			ParentID: details.DroppedDatabaseID,
  1438  		},
  1439  	)
  1440  	// The new job ID won't be written to DropJobID on the table descriptor(s),
  1441  	// which is fine because we don't read the job ID in 20.1 for anything
  1442  	// except this migration.
  1443  	// TODO (lucy): The above is true except for the cleanup loop for orphaned
  1444  	// jobs in the registry, which should be fixed.
  1445  	newJob, err := registry.CreateJobWithTxn(ctx, gcJobRecord, txn)
  1446  	if err != nil {
  1447  		return err
  1448  	}
  1449  	log.Infof(ctx,
  1450  		"migration marked drop database/table job %d as successful, created GC job %d",
  1451  		*job.ID(), *newJob.ID(),
  1452  	)
  1453  	return err
  1454  }
  1455  
  1456  func getCompletedMigrations(
  1457  	ctx context.Context, db db, codec keys.SQLCodec,
  1458  ) (map[string]struct{}, error) {
  1459  	if log.V(1) {
  1460  		log.Info(ctx, "trying to get the list of completed migrations")
  1461  	}
  1462  	prefix := codec.MigrationKeyPrefix()
  1463  	keyvals, err := db.Scan(ctx, prefix, prefix.PrefixEnd(), 0 /* maxRows */)
  1464  	if err != nil {
  1465  		return nil, errors.Wrapf(err, "failed to get list of completed migrations")
  1466  	}
  1467  	completedMigrations := make(map[string]struct{})
  1468  	for _, keyval := range keyvals {
  1469  		completedMigrations[string(keyval.Key)] = struct{}{}
  1470  	}
  1471  	return completedMigrations, nil
  1472  }
  1473  
  1474  func migrationKey(codec keys.SQLCodec, migration migrationDescriptor) roachpb.Key {
  1475  	return append(codec.MigrationKeyPrefix(), roachpb.RKey(migration.name)...)
  1476  }
  1477  
  1478  func createSystemTable(ctx context.Context, r runner, desc sqlbase.TableDescriptor) error {
  1479  	// We install the table at the KV layer so that we can choose a known ID in
  1480  	// the reserved ID space. (The SQL layer doesn't allow this.)
  1481  	err := r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1482  		b := txn.NewBatch()
  1483  		tKey := sqlbase.MakePublicTableNameKey(ctx, r.settings, desc.GetParentID(), desc.GetName())
  1484  		b.CPut(tKey.Key(r.codec), desc.GetID(), nil)
  1485  		b.CPut(sqlbase.MakeDescMetadataKey(r.codec, desc.GetID()), sqlbase.WrapDescriptor(&desc), nil)
  1486  		if err := txn.SetSystemConfigTrigger(); err != nil {
  1487  			return err
  1488  		}
  1489  		return txn.Run(ctx, b)
  1490  	})
  1491  	// CPuts only provide idempotent inserts if we ignore the errors that arise
  1492  	// when the condition isn't met.
  1493  	if errors.HasType(err, (*roachpb.ConditionFailedError)(nil)) {
  1494  		return nil
  1495  	}
  1496  	return err
  1497  }
  1498  
  1499  func createCommentTable(ctx context.Context, r runner) error {
  1500  	return createSystemTable(ctx, r, sqlbase.CommentsTable)
  1501  }
  1502  
  1503  func createReplicationConstraintStatsTable(ctx context.Context, r runner) error {
  1504  	if err := createSystemTable(ctx, r, sqlbase.ReplicationConstraintStatsTable); err != nil {
  1505  		return err
  1506  	}
  1507  	err := r.execAsRoot(ctx, "add-constraints-ttl",
  1508  		fmt.Sprintf(
  1509  			"ALTER TABLE system.replication_constraint_stats CONFIGURE ZONE USING gc.ttlseconds = %d",
  1510  			int(sqlbase.ReplicationConstraintStatsTableTTL.Seconds())))
  1511  	return errors.Wrapf(err, "failed to set TTL on %s", sqlbase.ReplicationConstraintStatsTable.Name)
  1512  }
  1513  
  1514  func createReplicationCriticalLocalitiesTable(ctx context.Context, r runner) error {
  1515  	return createSystemTable(ctx, r, sqlbase.ReplicationCriticalLocalitiesTable)
  1516  }
  1517  
  1518  func createReplicationStatsTable(ctx context.Context, r runner) error {
  1519  	if err := createSystemTable(ctx, r, sqlbase.ReplicationStatsTable); err != nil {
  1520  		return err
  1521  	}
  1522  	err := r.execAsRoot(ctx, "add-replication-status-ttl",
  1523  		fmt.Sprintf("ALTER TABLE system.replication_stats CONFIGURE ZONE USING gc.ttlseconds = %d",
  1524  			int(sqlbase.ReplicationStatsTableTTL.Seconds())))
  1525  	return errors.Wrapf(err, "failed to set TTL on %s", sqlbase.ReplicationStatsTable.Name)
  1526  }
  1527  
  1528  func createProtectedTimestampsMetaTable(ctx context.Context, r runner) error {
  1529  	return errors.Wrap(createSystemTable(ctx, r, sqlbase.ProtectedTimestampsMetaTable),
  1530  		"failed to create system.protected_ts_meta")
  1531  }
  1532  
  1533  func createProtectedTimestampsRecordsTable(ctx context.Context, r runner) error {
  1534  	return errors.Wrap(createSystemTable(ctx, r, sqlbase.ProtectedTimestampsRecordsTable),
  1535  		"failed to create system.protected_ts_records")
  1536  }
  1537  
  1538  func createNewSystemNamespaceDescriptor(ctx context.Context, r runner) error {
  1539  	return r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1540  		b := txn.NewBatch()
  1541  
  1542  		// Retrieve the existing namespace table's descriptor and change its name to
  1543  		// "namespace". This corrects the behavior of this migration as it existed
  1544  		// in 20.1 betas. The old namespace table cannot be edited without breaking
  1545  		// explicit selects from system.namespace in 19.2.
  1546  		deprecatedKey := sqlbase.MakeDescMetadataKey(r.codec, keys.DeprecatedNamespaceTableID)
  1547  		deprecatedDesc := &sqlbase.Descriptor{}
  1548  		ts, err := txn.GetProtoTs(ctx, deprecatedKey, deprecatedDesc)
  1549  		if err != nil {
  1550  			return err
  1551  		}
  1552  		deprecatedDesc.Table(ts).Name = sqlbase.DeprecatedNamespaceTable.Name
  1553  		b.Put(deprecatedKey, deprecatedDesc)
  1554  
  1555  		// The 19.2 namespace table contains an entry for "namespace" which maps to
  1556  		// the deprecated namespace tables ID. Even though the cluster version at
  1557  		// this point is 19.2, we construct a metadata name key in the 20.1 format.
  1558  		// This is for two reasons:
  1559  		// 1. We do not want to change the mapping in namespace_deprecated for
  1560  		//    "namespace", as for the purpose of namespace_deprecated, namespace
  1561  		//    refers to the correct ID.
  1562  		// 2. By adding the ID mapping in the new system.namespace table, the
  1563  		//    idempotent semantics of the migration ensure that "namespace" maps to
  1564  		//    the correct ID in the new system.namespace table after all tables are
  1565  		//    copied over.
  1566  		nameKey := sqlbase.NewPublicTableKey(
  1567  			sqlbase.NamespaceTable.GetParentID(), sqlbase.NamespaceTableName)
  1568  		b.Put(nameKey.Key(r.codec), sqlbase.NamespaceTable.GetID())
  1569  		b.Put(sqlbase.MakeDescMetadataKey(
  1570  			r.codec, sqlbase.NamespaceTable.GetID()), sqlbase.WrapDescriptor(&sqlbase.NamespaceTable))
  1571  		return txn.Run(ctx, b)
  1572  	})
  1573  }
  1574  
  1575  func createRoleOptionsTable(ctx context.Context, r runner) error {
  1576  	// Create system.role_options table with an entry for (admin, CREATEROLE).
  1577  	err := createSystemTable(ctx, r, sqlbase.RoleOptionsTable)
  1578  	if err != nil {
  1579  		return errors.Wrap(err, "failed to create system.role_options")
  1580  	}
  1581  
  1582  	return nil
  1583  }
  1584  
  1585  func addCreateRoleToAdminAndRoot(ctx context.Context, r runner) error {
  1586  	// Upsert the admin/root roles with CreateRole privilege into the table.
  1587  	// We intentionally override any existing entry.
  1588  	const upsertCreateRoleStmt = `
  1589            UPSERT INTO system.role_options (username, option, value) VALUES ($1, 'CREATEROLE', NULL)
  1590            `
  1591  	err := r.execAsRootWithRetry(ctx,
  1592  		"add role options table and upsert admin with CREATEROLE",
  1593  		upsertCreateRoleStmt,
  1594  		sqlbase.AdminRole)
  1595  
  1596  	if err != nil {
  1597  		return err
  1598  	}
  1599  
  1600  	return r.execAsRootWithRetry(ctx,
  1601  		"add role options table and upsert admin with CREATEROLE",
  1602  		upsertCreateRoleStmt,
  1603  		security.RootUser)
  1604  }
  1605  
  1606  func createReportsMetaTable(ctx context.Context, r runner) error {
  1607  	return createSystemTable(ctx, r, sqlbase.ReportsMetaTable)
  1608  }
  1609  
  1610  func createStatementInfoSystemTables(ctx context.Context, r runner) error {
  1611  	if err := createSystemTable(ctx, r, sqlbase.StatementBundleChunksTable); err != nil {
  1612  		return errors.Wrap(err, "failed to create system.statement_bundle_chunks")
  1613  	}
  1614  	if err := createSystemTable(ctx, r, sqlbase.StatementDiagnosticsRequestsTable); err != nil {
  1615  		return errors.Wrap(err, "failed to create system.statement_diagnostics_requests")
  1616  	}
  1617  	if err := createSystemTable(ctx, r, sqlbase.StatementDiagnosticsTable); err != nil {
  1618  		return errors.Wrap(err, "failed to create system.statement_diagnostics")
  1619  	}
  1620  	return nil
  1621  }
  1622  
  1623  // SettingsDefaultOverrides documents the effect of several migrations that add
  1624  // an explicit value for a setting, effectively changing the "default value"
  1625  // from what was defined in code.
  1626  var SettingsDefaultOverrides = map[string]string{
  1627  	"diagnostics.reporting.enabled": "true",
  1628  	"cluster.secret":                "<random>",
  1629  }
  1630  
  1631  func optInToDiagnosticsStatReporting(ctx context.Context, r runner) error {
  1632  	// We're opting-out of the automatic opt-in. See discussion in updates.go.
  1633  	if cluster.TelemetryOptOut() {
  1634  		return nil
  1635  	}
  1636  	return r.execAsRootWithRetry(ctx, "optInToDiagnosticsStatReporting",
  1637  		`SET CLUSTER SETTING diagnostics.reporting.enabled = true`)
  1638  }
  1639  
  1640  func initializeClusterSecret(ctx context.Context, r runner) error {
  1641  	return r.execAsRootWithRetry(
  1642  		ctx, "initializeClusterSecret",
  1643  		`SET CLUSTER SETTING cluster.secret = gen_random_uuid()::STRING`,
  1644  	)
  1645  }
  1646  
  1647  func populateVersionSetting(ctx context.Context, r runner) error {
  1648  	var v roachpb.Version
  1649  	if err := r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1650  		return txn.GetProto(ctx, keys.BootstrapVersionKey, &v)
  1651  	}); err != nil {
  1652  		return err
  1653  	}
  1654  	if v == (roachpb.Version{}) {
  1655  		// The cluster was bootstrapped at v1.0 (or even earlier), so just use
  1656  		// the TestingBinaryMinSupportedVersion of the binary.
  1657  		v = clusterversion.TestingBinaryMinSupportedVersion
  1658  	}
  1659  
  1660  	b, err := protoutil.Marshal(&clusterversion.ClusterVersion{Version: v})
  1661  	if err != nil {
  1662  		return errors.Wrap(err, "while marshaling version")
  1663  	}
  1664  
  1665  	// Add a ON CONFLICT DO NOTHING to avoid changing an existing version.
  1666  	// Again, this can happen if the migration doesn't run to completion
  1667  	// (overwriting also seems reasonable, but what for).
  1668  	// We don't allow users to perform version changes until we have run
  1669  	// the insert below.
  1670  	if err := r.execAsRoot(
  1671  		ctx,
  1672  		"insert-setting",
  1673  		fmt.Sprintf(`INSERT INTO system.settings (name, value, "lastUpdated", "valueType") VALUES ('version', x'%x', now(), 'm') ON CONFLICT(name) DO NOTHING`, b),
  1674  	); err != nil {
  1675  		return err
  1676  	}
  1677  
  1678  	if err := r.execAsRoot(
  1679  		ctx, "set-setting", "SET CLUSTER SETTING version = $1", v.String(),
  1680  	); err != nil {
  1681  		return err
  1682  	}
  1683  	return nil
  1684  }
  1685  
  1686  func addRootUser(ctx context.Context, r runner) error {
  1687  	// Upsert the root user into the table. We intentionally override any existing entry.
  1688  	const upsertRootStmt = `
  1689  	        UPSERT INTO system.users (username, "hashedPassword", "isRole") VALUES ($1, '', false)
  1690  	        `
  1691  	return r.execAsRootWithRetry(ctx, "addRootUser", upsertRootStmt, security.RootUser)
  1692  }
  1693  
  1694  func addAdminRole(ctx context.Context, r runner) error {
  1695  	// Upsert the admin role into the table. We intentionally override any existing entry.
  1696  	const upsertAdminStmt = `
  1697            UPSERT INTO system.users (username, "hashedPassword", "isRole") VALUES ($1, '', true)
  1698            `
  1699  	return r.execAsRootWithRetry(ctx, "addAdminRole", upsertAdminStmt, sqlbase.AdminRole)
  1700  }
  1701  
  1702  func addRootToAdminRole(ctx context.Context, r runner) error {
  1703  	// Upsert the role membership into the table. We intentionally override any existing entry.
  1704  	const upsertAdminStmt = `
  1705            UPSERT INTO system.role_members ("role", "member", "isAdmin") VALUES ($1, $2, true)
  1706            `
  1707  	return r.execAsRootWithRetry(
  1708  		ctx, "addRootToAdminRole", upsertAdminStmt, sqlbase.AdminRole, security.RootUser)
  1709  }
  1710  
  1711  func disallowPublicUserOrRole(ctx context.Context, r runner) error {
  1712  	// Check whether a user or role named "public" exists.
  1713  	const selectPublicStmt = `
  1714            SELECT username, "isRole" from system.users WHERE username = $1
  1715            `
  1716  
  1717  	for retry := retry.Start(retry.Options{MaxRetries: 5}); retry.Next(); {
  1718  		row, err := r.sqlExecutor.QueryRowEx(
  1719  			ctx, "disallowPublicUserOrRole", nil, /* txn */
  1720  			sqlbase.InternalExecutorSessionDataOverride{
  1721  				User: security.RootUser,
  1722  			},
  1723  			selectPublicStmt, sqlbase.PublicRole,
  1724  		)
  1725  		if err != nil {
  1726  			continue
  1727  		}
  1728  		if row == nil {
  1729  			// No such user.
  1730  			return nil
  1731  		}
  1732  
  1733  		isRole, ok := tree.AsDBool(row[1])
  1734  		if !ok {
  1735  			log.Fatalf(ctx, "expected 'isRole' column of system.users to be of type bool, got %v", row)
  1736  		}
  1737  
  1738  		if isRole {
  1739  			return fmt.Errorf(`found a role named %s which is now a reserved name. Please drop the role `+
  1740  				`(DROP ROLE %s) using a previous version of CockroachDB and try again`,
  1741  				sqlbase.PublicRole, sqlbase.PublicRole)
  1742  		}
  1743  		return fmt.Errorf(`found a user named %s which is now a reserved name. Please drop the role `+
  1744  			`(DROP USER %s) using a previous version of CockroachDB and try again`,
  1745  			sqlbase.PublicRole, sqlbase.PublicRole)
  1746  	}
  1747  	return nil
  1748  }
  1749  
  1750  func createDefaultDbs(ctx context.Context, r runner) error {
  1751  	// Create the default databases. These are plain databases with
  1752  	// default permissions. Nothing special happens if they exist
  1753  	// already.
  1754  	const createDbStmt = `CREATE DATABASE IF NOT EXISTS "%s"`
  1755  
  1756  	var err error
  1757  	for retry := retry.Start(retry.Options{MaxRetries: 5}); retry.Next(); {
  1758  		for _, dbName := range []string{sqlbase.DefaultDatabaseName, sqlbase.PgDatabaseName} {
  1759  			stmt := fmt.Sprintf(createDbStmt, dbName)
  1760  			err = r.execAsRoot(ctx, "create-default-db", stmt)
  1761  			if err != nil {
  1762  				log.Warningf(ctx, "failed attempt to add database %q: %s", dbName, err)
  1763  				break
  1764  			}
  1765  		}
  1766  		if err == nil {
  1767  			break
  1768  		}
  1769  	}
  1770  	return err
  1771  }
  1772  
  1773  func retireOldTsPurgeIntervalSettings(ctx context.Context, r runner) error {
  1774  	// We are going to deprecate `timeseries.storage.10s_resolution_ttl`
  1775  	// into `timeseries.storage.resolution_10s.ttl` if the latter is not
  1776  	// defined.
  1777  	//
  1778  	// Ditto for the `30m` resolution.
  1779  
  1780  	// Copy 'timeseries.storage.10s_resolution_ttl' into
  1781  	// 'timeseries.storage.resolution_10s.ttl' if the former is defined
  1782  	// and the latter is not defined yet.
  1783  	//
  1784  	// We rely on the SELECT returning no row if the original setting
  1785  	// was not defined, and INSERT ON CONFLICT DO NOTHING to ignore the
  1786  	// insert if the new name was already set.
  1787  	if err := r.execAsRoot(ctx, "copy-setting", `
  1788  INSERT INTO system.settings (name, value, "lastUpdated", "valueType")
  1789     SELECT 'timeseries.storage.resolution_10s.ttl', value, "lastUpdated", "valueType"
  1790       FROM system.settings WHERE name = 'timeseries.storage.10s_resolution_ttl'
  1791  ON CONFLICT (name) DO NOTHING`,
  1792  	); err != nil {
  1793  		return err
  1794  	}
  1795  
  1796  	// Ditto 30m.
  1797  	if err := r.execAsRoot(ctx, "copy-setting", `
  1798  INSERT INTO system.settings (name, value, "lastUpdated", "valueType")
  1799     SELECT 'timeseries.storage.resolution_30m.ttl', value, "lastUpdated", "valueType"
  1800       FROM system.settings WHERE name = 'timeseries.storage.30m_resolution_ttl'
  1801  ON CONFLICT (name) DO NOTHING`,
  1802  	); err != nil {
  1803  		return err
  1804  	}
  1805  
  1806  	return nil
  1807  }
  1808  
  1809  func updateSystemLocationData(ctx context.Context, r runner) error {
  1810  	// See if the system.locations table already has data in it.
  1811  	// If so, we don't want to do anything.
  1812  	row, err := r.sqlExecutor.QueryRowEx(ctx, "update-system-locations",
  1813  		nil, /* txn */
  1814  		sqlbase.InternalExecutorSessionDataOverride{User: security.RootUser},
  1815  		`SELECT count(*) FROM system.locations`)
  1816  	if err != nil {
  1817  		return err
  1818  	}
  1819  	count := int(tree.MustBeDInt(row[0]))
  1820  	if count != 0 {
  1821  		return nil
  1822  	}
  1823  
  1824  	for _, loc := range roachpb.DefaultLocationInformation {
  1825  		stmt := `UPSERT INTO system.locations VALUES ($1, $2, $3, $4)`
  1826  		tier := loc.Locality.Tiers[0]
  1827  		if err := r.execAsRoot(ctx, "update-system-locations",
  1828  			stmt, tier.Key, tier.Value, loc.Latitude, loc.Longitude,
  1829  		); err != nil {
  1830  			return err
  1831  		}
  1832  	}
  1833  	return nil
  1834  }
  1835  
  1836  func depublicizeSystemComments(ctx context.Context, r runner) error {
  1837  	// At some point in time, system.comments was mistakenly created
  1838  	// with all privileges granted to the "public" role (i.e. everyone).
  1839  	// This migration cleans this up.
  1840  
  1841  	// Schema changes are normally banned in the mixed-version 19.2/20.1 state, so
  1842  	// we override the ban and force the schema change to run anyway. This is safe
  1843  	// because updating privileges on a table only requires an update to the table
  1844  	// descriptor, and the job created will only wait for leases to expire. We
  1845  	// don't expect any other schema changes to happen on the comments table that
  1846  	// the 20.1 job could interfere with. The update to the table descriptor would
  1847  	// cause a 19.2 SchemaChangeManager to attempt a schema change, but it would
  1848  	// be a no-op.
  1849  	ctx = descs.MigrationSchemaChangeRequiredContext(ctx)
  1850  
  1851  	for _, priv := range []string{"GRANT", "INSERT", "DELETE", "UPDATE"} {
  1852  		stmt := fmt.Sprintf(`REVOKE %s ON TABLE system.comments FROM public`, priv)
  1853  		// REVOKE should never fail here -- it's always possible for root
  1854  		// to revoke a privilege even if it's not currently granted.
  1855  		if err := r.execAsRoot(ctx, "depublicize-system-comments", stmt); err != nil {
  1856  			return err
  1857  		}
  1858  	}
  1859  	return nil
  1860  }
  1861  
  1862  func alterSystemJobsAddCreatedByColumns(ctx context.Context, r runner) error {
  1863  	// NB: we use family name as it existed in the original system.jobs schema to
  1864  	// minimize migration work needed (avoid renames).
  1865  	addColsStmt := `
  1866  ALTER TABLE system.jobs
  1867  ADD COLUMN IF NOT EXISTS created_by_type STRING FAMILY fam_0_id_status_created_payload,
  1868  ADD COLUMN IF NOT EXISTS created_by_id INT FAMILY fam_0_id_status_created_payload
  1869  `
  1870  	addIdxStmt := `
  1871  CREATE INDEX IF NOT EXISTS jobs_created_by_type_created_by_id_idx
  1872  ON system.jobs (created_by_type, created_by_id) 
  1873  STORING (status)
  1874  `
  1875  	asNode := sqlbase.InternalExecutorSessionDataOverride{
  1876  		User: security.NodeUser,
  1877  	}
  1878  
  1879  	if _, err := r.sqlExecutor.ExecEx(
  1880  		ctx, "add-jobs-cols", nil, asNode, addColsStmt); err != nil {
  1881  		return err
  1882  	}
  1883  
  1884  	_, err := r.sqlExecutor.ExecEx(ctx, "add-jobs-idx", nil, asNode, addIdxStmt)
  1885  	return err
  1886  }