github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/temporary_schema.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "fmt" 16 "strconv" 17 "strings" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/keys" 21 "github.com/cockroachdb/cockroach/pkg/kv" 22 "github.com/cockroachdb/cockroach/pkg/roachpb" 23 "github.com/cockroachdb/cockroach/pkg/security" 24 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 25 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 26 "github.com/cockroachdb/cockroach/pkg/settings" 27 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 28 "github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv" 29 "github.com/cockroachdb/cockroach/pkg/sql/catalog/resolver" 30 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 31 "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" 32 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 33 "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" 34 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 35 "github.com/cockroachdb/cockroach/pkg/util" 36 "github.com/cockroachdb/cockroach/pkg/util/hlc" 37 "github.com/cockroachdb/cockroach/pkg/util/log" 38 "github.com/cockroachdb/cockroach/pkg/util/metric" 39 "github.com/cockroachdb/cockroach/pkg/util/retry" 40 "github.com/cockroachdb/cockroach/pkg/util/stop" 41 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 42 "github.com/cockroachdb/cockroach/pkg/util/uint128" 43 "github.com/cockroachdb/errors" 44 io_prometheus_client "github.com/prometheus/client_model/go" 45 ) 46 47 // TempObjectCleanupInterval is a ClusterSetting controlling how often 48 // temporary objects get cleaned up. 49 var TempObjectCleanupInterval = settings.RegisterPublicDurationSetting( 50 "sql.temp_object_cleaner.cleanup_interval", 51 "how often to clean up orphaned temporary objects", 52 30*time.Minute, 53 ) 54 55 var ( 56 temporaryObjectCleanerActiveCleanersMetric = metric.Metadata{ 57 Name: "sql.temp_object_cleaner.active_cleaners", 58 Help: "number of cleaner t®asks currently running on this node", 59 Measurement: "Count", 60 Unit: metric.Unit_COUNT, 61 MetricType: io_prometheus_client.MetricType_GAUGE, 62 } 63 temporaryObjectCleanerSchemasToDeleteMetric = metric.Metadata{ 64 Name: "sql.temp_object_cleaner.schemas_to_delete", 65 Help: "number of schemas to be deleted by the temp object cleaner on this node", 66 Measurement: "Count", 67 Unit: metric.Unit_COUNT, 68 MetricType: io_prometheus_client.MetricType_COUNTER, 69 } 70 temporaryObjectCleanerSchemasDeletionErrorMetric = metric.Metadata{ 71 Name: "sql.temp_object_cleaner.schemas_deletion_error", 72 Help: "number of errored schema deletions by the temp object cleaner on this node", 73 Measurement: "Count", 74 Unit: metric.Unit_COUNT, 75 MetricType: io_prometheus_client.MetricType_COUNTER, 76 } 77 temporaryObjectCleanerSchemasDeletionSuccessMetric = metric.Metadata{ 78 Name: "sql.temp_object_cleaner.schemas_deletion_success", 79 Help: "number of successful schema deletions by the temp object cleaner on this node", 80 Measurement: "Count", 81 Unit: metric.Unit_COUNT, 82 MetricType: io_prometheus_client.MetricType_COUNTER, 83 } 84 ) 85 86 func createTempSchema(params runParams, sKey sqlbase.DescriptorKey) (sqlbase.ID, error) { 87 id, err := catalogkv.GenerateUniqueDescID(params.ctx, params.p.ExecCfg().DB, params.p.ExecCfg().Codec) 88 if err != nil { 89 return sqlbase.InvalidID, err 90 } 91 if err := params.p.createSchemaWithID(params.ctx, sKey.Key(params.ExecCfg().Codec), id); err != nil { 92 return sqlbase.InvalidID, err 93 } 94 95 params.p.sessionDataMutator.SetTemporarySchemaName(sKey.Name()) 96 return id, nil 97 } 98 99 func (p *planner) createSchemaWithID( 100 ctx context.Context, schemaNameKey roachpb.Key, schemaID sqlbase.ID, 101 ) error { 102 if p.ExtendedEvalContext().Tracing.KVTracingEnabled() { 103 log.VEventf(ctx, 2, "CPut %s -> %d", schemaNameKey, schemaID) 104 } 105 106 b := &kv.Batch{} 107 b.CPut(schemaNameKey, schemaID, nil) 108 109 return p.txn.Run(ctx, b) 110 } 111 112 // temporarySchemaName returns the session specific temporary schema name given 113 // the sessionID. When the session creates a temporary object for the first 114 // time, it must create a schema with the name returned by this function. 115 func temporarySchemaName(sessionID ClusterWideID) string { 116 return fmt.Sprintf("pg_temp_%d_%d", sessionID.Hi, sessionID.Lo) 117 } 118 119 // temporarySchemaSessionID returns the sessionID of the given temporary schema. 120 func temporarySchemaSessionID(scName string) (bool, ClusterWideID, error) { 121 if !strings.HasPrefix(scName, "pg_temp_") { 122 return false, ClusterWideID{}, nil 123 } 124 parts := strings.Split(scName, "_") 125 if len(parts) != 4 { 126 return false, ClusterWideID{}, errors.Errorf("malformed temp schema name %s", scName) 127 } 128 hi, err := strconv.ParseUint(parts[2], 10, 64) 129 if err != nil { 130 return false, ClusterWideID{}, err 131 } 132 lo, err := strconv.ParseUint(parts[3], 10, 64) 133 if err != nil { 134 return false, ClusterWideID{}, err 135 } 136 return true, ClusterWideID{uint128.Uint128{Hi: hi, Lo: lo}}, nil 137 } 138 139 // getTemporaryObjectNames returns all the temporary objects under the 140 // temporary schema of the given dbID. 141 func getTemporaryObjectNames( 142 ctx context.Context, txn *kv.Txn, codec keys.SQLCodec, dbID sqlbase.ID, tempSchemaName string, 143 ) (TableNames, error) { 144 dbDesc, err := catalogkv.MustGetDatabaseDescByID(ctx, txn, codec, dbID) 145 if err != nil { 146 return nil, err 147 } 148 a := catalogkv.UncachedPhysicalAccessor{} 149 return a.GetObjectNames( 150 ctx, 151 txn, 152 codec, 153 dbDesc, 154 tempSchemaName, 155 tree.DatabaseListFlags{CommonLookupFlags: tree.CommonLookupFlags{Required: false}}, 156 ) 157 } 158 159 // cleanupSessionTempObjects removes all temporary objects (tables, sequences, 160 // views, temporary schema) created by the session. 161 func cleanupSessionTempObjects( 162 ctx context.Context, 163 settings *cluster.Settings, 164 db *kv.DB, 165 codec keys.SQLCodec, 166 ie sqlutil.InternalExecutor, 167 sessionID ClusterWideID, 168 ) error { 169 tempSchemaName := temporarySchemaName(sessionID) 170 return db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 171 // We are going to read all database descriptor IDs, then for each database 172 // we will drop all the objects under the temporary schema. 173 dbIDs, err := catalogkv.GetAllDatabaseDescriptorIDs(ctx, txn, codec) 174 if err != nil { 175 return err 176 } 177 for _, id := range dbIDs { 178 if err := cleanupSchemaObjects( 179 ctx, 180 settings, 181 txn, 182 codec, 183 ie, 184 id, 185 tempSchemaName, 186 ); err != nil { 187 return err 188 } 189 // Even if no objects were found under the temporary schema, the schema 190 // itself may still exist (eg. a temporary table was created and then 191 // dropped). So we remove the namespace table entry of the temporary 192 // schema. 193 if err := sqlbase.RemoveSchemaNamespaceEntry(ctx, txn, codec, id, tempSchemaName); err != nil { 194 return err 195 } 196 } 197 return nil 198 }) 199 } 200 201 // cleanupSchemaObjects removes all objects that is located within a dbID and schema. 202 func cleanupSchemaObjects( 203 ctx context.Context, 204 settings *cluster.Settings, 205 txn *kv.Txn, 206 codec keys.SQLCodec, 207 ie sqlutil.InternalExecutor, 208 dbID sqlbase.ID, 209 schemaName string, 210 ) error { 211 tbNames, err := getTemporaryObjectNames(ctx, txn, codec, dbID, schemaName) 212 if err != nil { 213 return err 214 } 215 a := catalogkv.UncachedPhysicalAccessor{} 216 217 searchPath := sqlbase.DefaultSearchPath.WithTemporarySchemaName(schemaName) 218 override := sqlbase.InternalExecutorSessionDataOverride{ 219 SearchPath: &searchPath, 220 User: security.RootUser, 221 } 222 223 // TODO(andrei): We might want to accelerate the deletion of this data. 224 var tables sqlbase.IDs 225 var views sqlbase.IDs 226 var sequences sqlbase.IDs 227 228 descsByID := make(map[sqlbase.ID]*TableDescriptor, len(tbNames)) 229 tblNamesByID := make(map[sqlbase.ID]tree.TableName, len(tbNames)) 230 for _, tbName := range tbNames { 231 objDesc, err := a.GetObjectDesc( 232 ctx, 233 txn, 234 settings, 235 codec, 236 tbName.Catalog(), 237 tbName.Schema(), 238 tbName.Object(), 239 tree.ObjectLookupFlagsWithRequired(), 240 ) 241 if err != nil { 242 return err 243 } 244 desc := objDesc.TableDesc() 245 246 descsByID[desc.ID] = desc 247 tblNamesByID[desc.ID] = tbName 248 249 if desc.SequenceOpts != nil { 250 sequences = append(sequences, desc.ID) 251 } else if desc.ViewQuery != "" { 252 views = append(views, desc.ID) 253 } else { 254 tables = append(tables, desc.ID) 255 } 256 } 257 258 for _, toDelete := range []struct { 259 // typeName is the type of table being deleted, e.g. view, table, sequence 260 typeName string 261 // ids represents which ids we wish to remove. 262 ids sqlbase.IDs 263 // preHook is used to perform any operations needed before calling 264 // delete on all the given ids. 265 preHook func(sqlbase.ID) error 266 }{ 267 // Drop views before tables to avoid deleting required dependencies. 268 {"VIEW", views, nil}, 269 {"TABLE", tables, nil}, 270 // Drop sequences after tables, because then we reduce the amount of work 271 // that may be needed to drop indices. 272 { 273 "SEQUENCE", 274 sequences, 275 func(id sqlbase.ID) error { 276 desc := descsByID[id] 277 // For any dependent tables, we need to drop the sequence dependencies. 278 // This can happen if a permanent table references a temporary table. 279 for _, d := range desc.DependedOnBy { 280 // We have already cleaned out anything we are depended on if we've seen 281 // the descriptor already. 282 if _, ok := descsByID[d.ID]; ok { 283 continue 284 } 285 dTableDesc, err := sqlbase.GetTableDescFromID(ctx, txn, codec, d.ID) 286 if err != nil { 287 return err 288 } 289 db, err := sqlbase.GetDatabaseDescFromID(ctx, txn, codec, dTableDesc.GetParentID()) 290 if err != nil { 291 return err 292 } 293 schema, err := resolver.ResolveSchemaNameByID( 294 ctx, 295 txn, 296 codec, 297 dTableDesc.GetParentID(), 298 dTableDesc.GetParentSchemaID(), 299 ) 300 if err != nil { 301 return err 302 } 303 dependentColIDs := util.MakeFastIntSet() 304 for _, colID := range d.ColumnIDs { 305 dependentColIDs.Add(int(colID)) 306 } 307 for _, col := range dTableDesc.Columns { 308 if dependentColIDs.Contains(int(col.ID)) { 309 tbName := tree.MakeTableNameWithSchema( 310 tree.Name(db.Name), 311 tree.Name(schema), 312 tree.Name(dTableDesc.Name), 313 ) 314 _, err = ie.ExecEx( 315 ctx, 316 "delete-temp-dependent-col", 317 txn, 318 override, 319 fmt.Sprintf( 320 "ALTER TABLE %s ALTER COLUMN %s DROP DEFAULT", 321 tbName.FQString(), 322 tree.NameString(col.Name), 323 ), 324 ) 325 if err != nil { 326 return err 327 } 328 } 329 } 330 } 331 return nil 332 }, 333 }, 334 } { 335 if len(toDelete.ids) > 0 { 336 if toDelete.preHook != nil { 337 for _, id := range toDelete.ids { 338 if err := toDelete.preHook(id); err != nil { 339 return err 340 } 341 } 342 } 343 344 var query strings.Builder 345 query.WriteString("DROP ") 346 query.WriteString(toDelete.typeName) 347 348 for i, id := range toDelete.ids { 349 tbName := tblNamesByID[id] 350 if i != 0 { 351 query.WriteString(",") 352 } 353 query.WriteString(" ") 354 query.WriteString(tbName.FQString()) 355 } 356 query.WriteString(" CASCADE") 357 _, err = ie.ExecEx(ctx, "delete-temp-"+toDelete.typeName, txn, override, query.String()) 358 if err != nil { 359 return err 360 } 361 } 362 } 363 return nil 364 } 365 366 // isMeta1LeaseholderFunc helps us avoid an import into pkg/storage. 367 type isMeta1LeaseholderFunc func(hlc.Timestamp) (bool, error) 368 369 // TemporaryObjectCleaner is a background thread job that periodically 370 // cleans up orphaned temporary objects by sessions which did not close 371 // down cleanly. 372 type TemporaryObjectCleaner struct { 373 settings *cluster.Settings 374 db *kv.DB 375 codec keys.SQLCodec 376 makeSessionBoundInternalExecutor sqlutil.SessionBoundInternalExecutorFactory 377 // statusServer gives access to the Status service. 378 statusServer serverpb.OptionalStatusServer 379 isMeta1LeaseholderFunc isMeta1LeaseholderFunc 380 testingKnobs ExecutorTestingKnobs 381 metrics *temporaryObjectCleanerMetrics 382 } 383 384 // temporaryObjectCleanerMetrics are the metrics for TemporaryObjectCleaner 385 type temporaryObjectCleanerMetrics struct { 386 ActiveCleaners *metric.Gauge 387 SchemasToDelete *metric.Counter 388 SchemasDeletionError *metric.Counter 389 SchemasDeletionSuccess *metric.Counter 390 } 391 392 var _ metric.Struct = (*temporaryObjectCleanerMetrics)(nil) 393 394 // MetricStruct implements the metrics.Struct interface. 395 func (m *temporaryObjectCleanerMetrics) MetricStruct() {} 396 397 // NewTemporaryObjectCleaner initializes the TemporaryObjectCleaner with the 398 // required arguments, but does not start it. 399 func NewTemporaryObjectCleaner( 400 settings *cluster.Settings, 401 db *kv.DB, 402 codec keys.SQLCodec, 403 registry *metric.Registry, 404 makeSessionBoundInternalExecutor sqlutil.SessionBoundInternalExecutorFactory, 405 statusServer serverpb.OptionalStatusServer, 406 isMeta1LeaseholderFunc isMeta1LeaseholderFunc, 407 testingKnobs ExecutorTestingKnobs, 408 ) *TemporaryObjectCleaner { 409 metrics := makeTemporaryObjectCleanerMetrics() 410 registry.AddMetricStruct(metrics) 411 return &TemporaryObjectCleaner{ 412 settings: settings, 413 db: db, 414 codec: codec, 415 makeSessionBoundInternalExecutor: makeSessionBoundInternalExecutor, 416 statusServer: statusServer, 417 isMeta1LeaseholderFunc: isMeta1LeaseholderFunc, 418 testingKnobs: testingKnobs, 419 metrics: metrics, 420 } 421 } 422 423 // makeTemporaryObjectCleanerMetrics makes the metrics for the TemporaryObjectCleaner. 424 func makeTemporaryObjectCleanerMetrics() *temporaryObjectCleanerMetrics { 425 return &temporaryObjectCleanerMetrics{ 426 ActiveCleaners: metric.NewGauge(temporaryObjectCleanerActiveCleanersMetric), 427 SchemasToDelete: metric.NewCounter(temporaryObjectCleanerSchemasToDeleteMetric), 428 SchemasDeletionError: metric.NewCounter(temporaryObjectCleanerSchemasDeletionErrorMetric), 429 SchemasDeletionSuccess: metric.NewCounter(temporaryObjectCleanerSchemasDeletionSuccessMetric), 430 } 431 } 432 433 // doTemporaryObjectCleanup performs the actual cleanup. 434 func (c *TemporaryObjectCleaner) doTemporaryObjectCleanup( 435 ctx context.Context, closerCh <-chan struct{}, 436 ) error { 437 defer log.Infof(ctx, "completed temporary object cleanup job") 438 // Wrap the retry functionality with the default arguments. 439 retryFunc := func(ctx context.Context, do func() error) error { 440 return retry.WithMaxAttempts( 441 ctx, 442 retry.Options{ 443 InitialBackoff: 1 * time.Second, 444 MaxBackoff: 1 * time.Minute, 445 Multiplier: 2, 446 Closer: closerCh, 447 }, 448 5, // maxAttempts 449 func() error { 450 err := do() 451 if err != nil { 452 log.Warningf(ctx, "error during schema cleanup, retrying: %v", err) 453 } 454 return err 455 }, 456 ) 457 } 458 459 // We only want to perform the cleanup if we are holding the meta1 lease. 460 // This ensures only one server can perform the job at a time. 461 isLeaseholder, err := c.isMeta1LeaseholderFunc(c.db.Clock().Now()) 462 if err != nil { 463 return err 464 } 465 if !isLeaseholder { 466 log.Infof(ctx, "skipping temporary object cleanup run as it is not the leaseholder") 467 return nil 468 } 469 470 c.metrics.ActiveCleaners.Inc(1) 471 defer c.metrics.ActiveCleaners.Dec(1) 472 473 log.Infof(ctx, "running temporary object cleanup background job") 474 txn := kv.NewTxn(ctx, c.db, 0) 475 476 // Build a set of all session IDs with temporary objects. 477 var dbIDs []sqlbase.ID 478 if err := retryFunc(ctx, func() error { 479 var err error 480 dbIDs, err = catalogkv.GetAllDatabaseDescriptorIDs(ctx, txn, c.codec) 481 return err 482 }); err != nil { 483 return err 484 } 485 486 sessionIDs := make(map[ClusterWideID]struct{}) 487 for _, dbID := range dbIDs { 488 var schemaNames map[sqlbase.ID]string 489 if err := retryFunc(ctx, func() error { 490 var err error 491 schemaNames, err = resolver.GetForDatabase(ctx, txn, c.codec, dbID) 492 return err 493 }); err != nil { 494 return err 495 } 496 for _, scName := range schemaNames { 497 isTempSchema, sessionID, err := temporarySchemaSessionID(scName) 498 if err != nil { 499 // This should not cause an error. 500 log.Warningf(ctx, "could not parse %q as temporary schema name", scName) 501 continue 502 } 503 if isTempSchema { 504 sessionIDs[sessionID] = struct{}{} 505 } 506 } 507 } 508 log.Infof(ctx, "found %d temporary schemas", len(sessionIDs)) 509 510 if len(sessionIDs) == 0 { 511 log.Infof(ctx, "early exiting temporary schema cleaner as no temporary schemas were found") 512 return nil 513 } 514 515 statusServer, err := c.statusServer.OptionalErr(47894) 516 if err != nil { 517 return err 518 } 519 520 // Get active sessions. 521 var response *serverpb.ListSessionsResponse 522 if err := retryFunc(ctx, func() error { 523 var err error 524 response, err = statusServer.ListSessions( 525 ctx, 526 &serverpb.ListSessionsRequest{}, 527 ) 528 return err 529 }); err != nil { 530 return err 531 } 532 activeSessions := make(map[uint128.Uint128]struct{}) 533 for _, session := range response.Sessions { 534 activeSessions[uint128.FromBytes(session.ID)] = struct{}{} 535 } 536 537 // Clean up temporary data for inactive sessions. 538 ie := c.makeSessionBoundInternalExecutor(ctx, &sessiondata.SessionData{}) 539 for sessionID := range sessionIDs { 540 if _, ok := activeSessions[sessionID.Uint128]; !ok { 541 log.Eventf(ctx, "cleaning up temporary object for session %q", sessionID) 542 c.metrics.SchemasToDelete.Inc(1) 543 544 // Reset the session data with the appropriate sessionID such that we can resolve 545 // the given schema correctly. 546 if err := retryFunc(ctx, func() error { 547 return cleanupSessionTempObjects( 548 ctx, 549 c.settings, 550 c.db, 551 c.codec, 552 ie, 553 sessionID, 554 ) 555 }); err != nil { 556 // Log error but continue trying to delete the rest. 557 log.Warningf(ctx, "failed to clean temp objects under session %q: %v", sessionID, err) 558 c.metrics.SchemasDeletionError.Inc(1) 559 } else { 560 c.metrics.SchemasDeletionSuccess.Inc(1) 561 telemetry.Inc(sqltelemetry.TempObjectCleanerDeletionCounter) 562 } 563 } else { 564 log.Eventf(ctx, "not cleaning up %q as session is still active", sessionID) 565 } 566 } 567 568 return nil 569 } 570 571 // Start initializes the background thread which periodically cleans up leftover temporary objects. 572 func (c *TemporaryObjectCleaner) Start(ctx context.Context, stopper *stop.Stopper) { 573 stopper.RunWorker(ctx, func(ctx context.Context) { 574 nextTick := timeutil.Now() 575 for { 576 nextTickCh := time.After(nextTick.Sub(timeutil.Now())) 577 if c.testingKnobs.TempObjectsCleanupCh != nil { 578 nextTickCh = c.testingKnobs.TempObjectsCleanupCh 579 } 580 581 select { 582 case <-nextTickCh: 583 if err := c.doTemporaryObjectCleanup(ctx, stopper.ShouldQuiesce()); err != nil { 584 log.Warningf(ctx, "failed to clean temp objects: %v", err) 585 } 586 case <-stopper.ShouldQuiesce(): 587 return 588 case <-ctx.Done(): 589 return 590 } 591 if c.testingKnobs.OnTempObjectsCleanupDone != nil { 592 c.testingKnobs.OnTempObjectsCleanupDone() 593 } 594 nextTick = nextTick.Add(TempObjectCleanupInterval.Get(&c.settings.SV)) 595 log.Infof(ctx, "temporary object cleaner next scheduled to run at %s", nextTick) 596 } 597 }) 598 }