vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/gc/tablegc.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package gc 18 19 import ( 20 "context" 21 "fmt" 22 "math" 23 "math/rand" 24 "sort" 25 "sync" 26 "sync/atomic" 27 "time" 28 29 "github.com/spf13/pflag" 30 31 "vitess.io/vitess/go/mysql" 32 "vitess.io/vitess/go/timer" 33 "vitess.io/vitess/go/vt/dbconnpool" 34 "vitess.io/vitess/go/vt/log" 35 "vitess.io/vitess/go/vt/schema" 36 "vitess.io/vitess/go/vt/servenv" 37 "vitess.io/vitess/go/vt/sqlparser" 38 "vitess.io/vitess/go/vt/topo" 39 "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool" 40 "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" 41 "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle" 42 ) 43 44 const ( 45 // evacHours is a hard coded, reasonable time for a table to spend in EVAC state 46 evacHours = 72 47 throttlerAppName = "tablegc" 48 ) 49 50 var ( 51 checkInterval = 1 * time.Hour 52 purgeReentranceInterval = 1 * time.Minute 53 gcLifecycle = "hold,purge,evac,drop" 54 ) 55 56 func init() { 57 servenv.OnParseFor("vtcombo", registerGCFlags) 58 servenv.OnParseFor("vttablet", registerGCFlags) 59 } 60 61 func registerGCFlags(fs *pflag.FlagSet) { 62 // checkInterval marks the interval between looking for tables in mysql server/schema 63 fs.DurationVar(&checkInterval, "gc_check_interval", checkInterval, "Interval between garbage collection checks") 64 // purgeReentranceInterval marks the interval between searching tables to purge 65 fs.DurationVar(&purgeReentranceInterval, "gc_purge_check_interval", purgeReentranceInterval, "Interval between purge discovery checks") 66 // gcLifecycle is the sequence of steps the table goes through in the process of getting dropped 67 fs.StringVar(&gcLifecycle, "table_gc_lifecycle", gcLifecycle, "States for a DROP TABLE garbage collection cycle. Default is 'hold,purge,evac,drop', use any subset ('drop' implcitly always included)") 68 } 69 70 var ( 71 sqlPurgeTable = `delete from %a limit 50` 72 sqlShowVtTables = `show full tables like '\_vt\_%'` 73 sqlDropTable = "drop table if exists `%a`" 74 purgeReentranceFlag int64 75 ) 76 77 // transitionRequest encapsulates a request to transition a table to next state 78 type transitionRequest struct { 79 fromTableName string 80 isBaseTable bool 81 toGCState schema.TableGCState 82 uuid string 83 } 84 85 func init() { 86 rand.Seed(time.Now().UnixNano()) 87 } 88 89 // TableGC is the main entity in the table garbage collection mechanism. 90 // This service "garbage collects" tables: 91 // - it checks for magically-named tables (e.g. _vt_EVAC_f6338b2af8af11eaa210f875a4d24e90_20200920063522) 92 // - it analyzes a table's state from its name 93 // - it applies operations on the table (namely purge for PURGE tables) 94 // - when due time, it transitions a table (via RENAME TABLE) to the next state 95 // - finally, it issues a DROP TABLE 96 // The sequence of steps is controlled by the command line variable --table_gc_lifecycle 97 type TableGC struct { 98 keyspace string 99 shard string 100 dbName string 101 102 isOpen int64 103 cancelOperation context.CancelFunc 104 105 throttlerClient *throttle.Client 106 107 env tabletenv.Env 108 pool *connpool.Pool 109 ts *topo.Server 110 111 stateMutex sync.Mutex 112 purgeMutex sync.Mutex 113 114 purgingTables map[string]bool 115 // lifecycleStates indicates what states a GC table goes through. The user can set 116 // this with --table_gc_lifecycle, such that some states can be skipped. 117 lifecycleStates map[schema.TableGCState]bool 118 } 119 120 // Status published some status valus from the collector 121 type Status struct { 122 Keyspace string 123 Shard string 124 125 isPrimary bool 126 IsOpen bool 127 128 purgingTables []string 129 } 130 131 // NewTableGC creates a table collector 132 func NewTableGC(env tabletenv.Env, ts *topo.Server, lagThrottler *throttle.Throttler) *TableGC { 133 collector := &TableGC{ 134 throttlerClient: throttle.NewBackgroundClient(lagThrottler, throttlerAppName, throttle.ThrottleCheckPrimaryWrite), 135 isOpen: 0, 136 137 env: env, 138 ts: ts, 139 pool: connpool.NewPool(env, "TableGCPool", tabletenv.ConnPoolConfig{ 140 Size: 2, 141 IdleTimeoutSeconds: env.Config().OltpReadPool.IdleTimeoutSeconds, 142 }), 143 144 purgingTables: map[string]bool{}, 145 } 146 147 return collector 148 } 149 150 // InitDBConfig initializes keyspace and shard 151 func (collector *TableGC) InitDBConfig(keyspace, shard, dbName string) { 152 log.Info("TableGC: init") 153 collector.keyspace = keyspace 154 collector.shard = shard 155 collector.dbName = dbName 156 } 157 158 // Open opens database pool and initializes the schema 159 func (collector *TableGC) Open() (err error) { 160 collector.stateMutex.Lock() 161 defer collector.stateMutex.Unlock() 162 if collector.isOpen > 0 { 163 // already open 164 return nil 165 } 166 if !collector.env.Config().EnableTableGC { 167 return nil 168 } 169 170 collector.lifecycleStates, err = schema.ParseGCLifecycle(gcLifecycle) 171 if err != nil { 172 return fmt.Errorf("Error parsing --table_gc_lifecycle flag: %+v", err) 173 } 174 175 log.Info("TableGC: opening") 176 collector.pool.Open(collector.env.Config().DB.AllPrivsWithDB(), collector.env.Config().DB.DbaWithDB(), collector.env.Config().DB.AppDebugWithDB()) 177 atomic.StoreInt64(&collector.isOpen, 1) 178 179 conn, err := dbconnpool.NewDBConnection(context.Background(), collector.env.Config().DB.AllPrivsWithDB()) 180 if err != nil { 181 return err 182 } 183 defer conn.Close() 184 serverSupportsFastDrops, err := conn.SupportsCapability(mysql.FastDropTableFlavorCapability) 185 if err != nil { 186 return err 187 } 188 if serverSupportsFastDrops { 189 // MySQL 8.0.23 and onwards supports fast DROP TABLE operations. This means we don't have to 190 // go through the purging & evac cycle: once the table has been held for long enough, we can just 191 // move on to dropping it. Dropping a large table in 8.0.23 is expected to take several seconds, and 192 // should not block other queries or place any locks on the buffer pool. 193 delete(collector.lifecycleStates, schema.PurgeTableGCState) 194 delete(collector.lifecycleStates, schema.EvacTableGCState) 195 } 196 197 ctx := context.Background() 198 ctx, collector.cancelOperation = context.WithCancel(ctx) 199 go collector.operate(ctx) 200 201 return nil 202 } 203 204 // Close frees resources 205 func (collector *TableGC) Close() { 206 log.Infof("TableGC - started execution of Close. Acquiring initMutex lock") 207 collector.stateMutex.Lock() 208 defer collector.stateMutex.Unlock() 209 log.Infof("TableGC - acquired lock") 210 if collector.isOpen == 0 { 211 log.Infof("TableGC - no collector is open") 212 // not open 213 return 214 } 215 216 log.Info("TableGC: closing") 217 if collector.cancelOperation != nil { 218 collector.cancelOperation() 219 } 220 log.Infof("TableGC - closing pool") 221 collector.pool.Close() 222 atomic.StoreInt64(&collector.isOpen, 0) 223 log.Infof("TableGC - finished execution of Close") 224 } 225 226 // operate is the main entry point for the table garbage collector operation and logic. 227 func (collector *TableGC) operate(ctx context.Context) { 228 229 dropTablesChan := make(chan string) 230 purgeRequestsChan := make(chan bool) 231 transitionRequestsChan := make(chan *transitionRequest) 232 233 tickers := [](*timer.SuspendableTicker){} 234 addTicker := func(d time.Duration) *timer.SuspendableTicker { 235 t := timer.NewSuspendableTicker(d, false) 236 tickers = append(tickers, t) 237 return t 238 } 239 tableCheckTicker := addTicker(checkInterval) 240 purgeReentranceTicker := addTicker(purgeReentranceInterval) 241 242 for _, t := range tickers { 243 defer t.Stop() 244 // since we just started the tickers now, speed up the ticks by forcing an immediate tick 245 go t.TickNow() 246 } 247 248 log.Info("TableGC: operating") 249 for { 250 select { 251 case <-ctx.Done(): 252 log.Info("TableGC: done operating") 253 return 254 case <-tableCheckTicker.C: 255 { 256 log.Info("TableGC: tableCheckTicker") 257 _ = collector.checkTables(ctx, dropTablesChan, transitionRequestsChan) 258 } 259 case <-purgeReentranceTicker.C: 260 { 261 // relay the request 262 go func() { purgeRequestsChan <- true }() 263 } 264 case <-purgeRequestsChan: 265 { 266 go func() { 267 tableName, err := collector.purge(ctx) 268 if err != nil { 269 log.Errorf("TableGC: error purging table %s: %+v", tableName, err) 270 return 271 } 272 if tableName == "" { 273 // No table purged (or at least not to completion) 274 return 275 } 276 // a table was successfully purged to complection. Chances are, there's more 277 // tables waiting to be purged. Let's speed things by 278 // requesting another purge, instead of waiting a full hour 279 // The table is now empty! 280 // we happen to know at this time that the table is in PURGE state, 281 // I mean, that's why we're here. We can hard code that. 282 _, _, uuid, _, _ := schema.AnalyzeGCTableName(tableName) 283 collector.submitTransitionRequest(ctx, transitionRequestsChan, schema.PurgeTableGCState, tableName, true, uuid) 284 collector.removePurgingTable(tableName) 285 // finished with this table. Maybe more tables are looking to be purged. 286 // Trigger another call to purge(), instead of waiting a full purgeReentranceInterval cycle 287 288 time.AfterFunc(time.Second, func() { purgeRequestsChan <- true }) 289 290 }() 291 } 292 case dropTableName := <-dropTablesChan: 293 { 294 log.Info("TableGC: dropTablesChan") 295 if err := collector.dropTable(ctx, dropTableName); err != nil { 296 log.Errorf("TableGC: error dropping table %s: %+v", dropTableName, err) 297 } 298 } 299 case transition := <-transitionRequestsChan: 300 { 301 log.Info("TableGC: transitionRequestsChan") 302 if err := collector.transitionTable(ctx, transition); err != nil { 303 log.Errorf("TableGC: error transitioning table %s to %+v: %+v", transition.fromTableName, transition.toGCState, err) 304 } 305 } 306 } 307 } 308 } 309 310 // nextState evaluates what the next state should be, given a state; this takes into account 311 // lifecycleStates (as generated by user supplied --table_gc_lifecycle flag) 312 func (collector *TableGC) nextState(fromState schema.TableGCState) *schema.TableGCState { 313 var state schema.TableGCState 314 switch fromState { 315 case schema.HoldTableGCState: 316 state = schema.PurgeTableGCState 317 case schema.PurgeTableGCState: 318 state = schema.EvacTableGCState 319 case schema.EvacTableGCState: 320 state = schema.DropTableGCState 321 case schema.DropTableGCState: 322 return nil 323 default: 324 return nil 325 } 326 if _, ok := collector.lifecycleStates[state]; !ok { 327 return collector.nextState(state) 328 } 329 return &state 330 } 331 332 // generateTansition creates a transition request, based on current state and taking configured lifecycleStates 333 // into consideration (we may skip some states) 334 func (collector *TableGC) generateTansition(ctx context.Context, fromState schema.TableGCState, fromTableName string, isBaseTable bool, uuid string) *transitionRequest { 335 nextState := collector.nextState(fromState) 336 if nextState == nil { 337 return nil 338 } 339 return &transitionRequest{ 340 fromTableName: fromTableName, 341 isBaseTable: isBaseTable, 342 toGCState: *nextState, 343 uuid: uuid, 344 } 345 } 346 347 // submitTransitionRequest generates and queues a transition request for a given table 348 func (collector *TableGC) submitTransitionRequest(ctx context.Context, transitionRequestsChan chan<- *transitionRequest, fromState schema.TableGCState, fromTableName string, isBaseTable bool, uuid string) { 349 log.Infof("TableGC: submitting transition request for %s", fromTableName) 350 go func() { 351 transition := collector.generateTansition(ctx, fromState, fromTableName, isBaseTable, uuid) 352 if transition != nil { 353 transitionRequestsChan <- transition 354 } 355 }() 356 } 357 358 // shouldTransitionTable checks if the given table is a GC table and if it's time to transition it to next state 359 func (collector *TableGC) shouldTransitionTable(tableName string) (shouldTransition bool, state schema.TableGCState, uuid string, err error) { 360 isGCTable, state, uuid, t, err := schema.AnalyzeGCTableName(tableName) 361 if err != nil { 362 return false, state, uuid, err 363 } 364 if !isGCTable { 365 // irrelevant table 366 return false, state, uuid, nil 367 } 368 if _, ok := collector.lifecycleStates[state]; ok { 369 // this state is in our expected lifecycle. Let's check table's time hint: 370 timeNow := time.Now().UTC() 371 if timeNow.Before(t) { 372 // not yet time to operate on this table 373 return false, state, uuid, nil 374 } 375 // If the state is not in our expected lifecycle, we ignore the time hint and just move it to the next phase 376 } 377 return true, state, uuid, nil 378 } 379 380 // checkTables looks for potential GC tables in the MySQL server+schema. 381 // It lists _vt_% tables, then filters through those which are due-date. 382 // It then applies the necessary operation per table. 383 func (collector *TableGC) checkTables(ctx context.Context, dropTablesChan chan<- string, transitionRequestsChan chan<- *transitionRequest) error { 384 conn, err := collector.pool.Get(ctx, nil) 385 if err != nil { 386 return err 387 } 388 defer conn.Recycle() 389 390 log.Infof("TableGC: check tables") 391 392 res, err := conn.Exec(ctx, sqlShowVtTables, math.MaxInt32, true) 393 if err != nil { 394 return err 395 } 396 397 for _, row := range res.Rows { 398 tableName := row[0].ToString() 399 tableType := row[1].ToString() 400 isBaseTable := (tableType == "BASE TABLE") 401 402 shouldTransition, state, uuid, err := collector.shouldTransitionTable(tableName) 403 404 if err != nil { 405 log.Errorf("TableGC: error while checking tables: %+v", err) 406 continue 407 } 408 if !shouldTransition { 409 // irrelevant table 410 continue 411 } 412 413 log.Infof("TableGC: will operate on table %s", tableName) 414 415 if state == schema.HoldTableGCState { 416 // Hold period expired. Moving to next state 417 collector.submitTransitionRequest(ctx, transitionRequestsChan, state, tableName, isBaseTable, uuid) 418 } 419 if state == schema.PurgeTableGCState { 420 if isBaseTable { 421 // This table needs to be purged. Make sure to enlist it (we may already have) 422 collector.addPurgingTable(tableName) 423 } else { 424 // This is a view. We don't need to delete rows from views. Just transition into next phase 425 collector.submitTransitionRequest(ctx, transitionRequestsChan, state, tableName, isBaseTable, uuid) 426 } 427 } 428 if state == schema.EvacTableGCState { 429 // This table was in EVAC state for the required period. It will transition into DROP state 430 collector.submitTransitionRequest(ctx, transitionRequestsChan, state, tableName, isBaseTable, uuid) 431 } 432 if state == schema.DropTableGCState { 433 // This table needs to be dropped immediately. 434 go func() { dropTablesChan <- tableName }() 435 } 436 } 437 438 return nil 439 } 440 441 // purge continuously purges rows from a table. 442 // This function is non-reentrant: there's only one instance of this function running at any given time. 443 // A timer keeps calling this function, so if it bails out (e.g. on error) it will later resume work 444 func (collector *TableGC) purge(ctx context.Context) (tableName string, err error) { 445 if atomic.CompareAndSwapInt64(&purgeReentranceFlag, 0, 1) { 446 defer atomic.StoreInt64(&purgeReentranceFlag, 0) 447 } else { 448 // An instance of this function is already running 449 return "", nil 450 } 451 452 tableName, found := collector.nextTableToPurge() 453 if !found { 454 // Nothing do do here... 455 return "", nil 456 } 457 458 conn, err := dbconnpool.NewDBConnection(ctx, collector.env.Config().DB.DbaWithDB()) 459 if err != nil { 460 return tableName, err 461 } 462 defer conn.Close() 463 464 // Disable binary logging, re-enable afterwards 465 // The idea is that DROP TABLE can be expensive, on the primary, if the table is not empty. 466 // However, on replica the price is not as high. Therefore, we only purge the rows on the primary. 467 // This saves a lot of load from the replication stream, avoiding excessive lags. It also 468 // avoids excessive IO on the replicas. 469 // (note that the user may skip the PURGE step if they want, but the step is on by default) 470 471 // However, disabling SQL_LOG_BIN requires SUPER privileges, and we don't know that we have that. 472 // Any externally managed database might not give SUPER privileges to the vitess accounts, and this is known to be the case for Amazon Aurora. 473 // We therefore disable log bin on best-effort basis. The logic is still fine and sound if binary logging 474 // is left enabled. We just lose some optimization. 475 disableLogBin := func() (bool, error) { 476 _, err := conn.ExecuteFetch("SET sql_log_bin = OFF", 0, false) 477 if err == nil { 478 return true, nil 479 } 480 if merr, ok := err.(*mysql.SQLError); ok { 481 if merr.Num == mysql.ERSpecifiedAccessDenied { 482 // We do not have privileges to disable binary logging. That's fine, we're on best effort, 483 // so we're going to silently ignore this error. 484 return false, nil 485 } 486 } 487 // We do not tolerate other errors, though. 488 return false, err 489 } 490 sqlLogBinDisabled, err := disableLogBin() 491 if err != nil { 492 return tableName, err 493 } 494 495 defer func() { 496 if sqlLogBinDisabled && !conn.IsClosed() { 497 if _, err := conn.ExecuteFetch("SET sql_log_bin = ON", 0, false); err != nil { 498 log.Errorf("TableGC: error setting sql_log_bin = ON: %+v", err) 499 // a followup defer() will run conn.Close() at any case. 500 } 501 } 502 }() 503 504 log.Infof("TableGC: purge begin for %s", tableName) 505 for { 506 if ctx.Err() != nil { 507 // cancelled 508 return tableName, err 509 } 510 if !collector.throttlerClient.ThrottleCheckOKOrWait(ctx) { 511 continue 512 } 513 // OK, we're clear to go! 514 515 // Issue a DELETE 516 parsed := sqlparser.BuildParsedQuery(sqlPurgeTable, tableName) 517 res, err := conn.ExecuteFetch(parsed.Query, 1, true) 518 if err != nil { 519 return tableName, err 520 } 521 if res.RowsAffected == 0 { 522 log.Infof("TableGC: purge complete for %s", tableName) 523 return tableName, nil 524 } 525 } 526 } 527 528 // dropTable runs an actual DROP TABLE statement, and marks the end of the line for the 529 // tables' GC lifecycle. 530 func (collector *TableGC) dropTable(ctx context.Context, tableName string) error { 531 conn, err := collector.pool.Get(ctx, nil) 532 if err != nil { 533 return err 534 } 535 defer conn.Recycle() 536 537 parsed := sqlparser.BuildParsedQuery(sqlDropTable, tableName) 538 539 log.Infof("TableGC: dropping table: %s", tableName) 540 _, err = conn.Exec(ctx, parsed.Query, 1, true) 541 if err != nil { 542 return err 543 } 544 log.Infof("TableGC: dropped table: %s", tableName) 545 return nil 546 } 547 548 // transitionTable is called upon a transition request. The actual implementation of a transition 549 // is a RENAME TABLE statement. 550 func (collector *TableGC) transitionTable(ctx context.Context, transition *transitionRequest) error { 551 conn, err := collector.pool.Get(ctx, nil) 552 if err != nil { 553 return err 554 } 555 defer conn.Recycle() 556 557 // when we transition into PURGE, that means we want to begin purging immediately 558 // when we transition into DROP, that means we want to drop immediately 559 // Thereforce the default timestamp is Now 560 t := time.Now().UTC() 561 switch transition.toGCState { 562 case schema.EvacTableGCState: 563 if transition.isBaseTable { 564 // in EVAC state we want the table pages to evacuate from the buffer pool. We therefore 565 // set the timestamp to some point the future, which we self determine 566 t = t.Add(evacHours * time.Hour) 567 } 568 // Views don't need evac. t remains "now" 569 } 570 571 renameStatement, toTableName, err := schema.GenerateRenameStatementWithUUID(transition.fromTableName, transition.toGCState, transition.uuid, t) 572 if err != nil { 573 return err 574 } 575 576 log.Infof("TableGC: renaming table: %s to %s", transition.fromTableName, toTableName) 577 _, err = conn.Exec(ctx, renameStatement, 1, true) 578 if err != nil { 579 return err 580 } 581 log.Infof("TableGC: renamed table: %s", transition.fromTableName) 582 return nil 583 } 584 585 // addPurgingTable adds a table to the list of droppingpurging (or pending purging) tables 586 func (collector *TableGC) addPurgingTable(tableName string) { 587 collector.purgeMutex.Lock() 588 defer collector.purgeMutex.Unlock() 589 590 collector.purgingTables[tableName] = true 591 } 592 593 // removePurgingTable removes a table from the purging list; likely this is called when 594 // the table is fully purged and is renamed away to be dropped. 595 func (collector *TableGC) removePurgingTable(tableName string) { 596 collector.purgeMutex.Lock() 597 defer collector.purgeMutex.Unlock() 598 599 delete(collector.purgingTables, tableName) 600 } 601 602 // nextTableToPurge returns the name of the next table we should start purging. 603 // We pick the table with the oldest timestamp. 604 func (collector *TableGC) nextTableToPurge() (tableName string, ok bool) { 605 collector.purgeMutex.Lock() 606 defer collector.purgeMutex.Unlock() 607 608 if len(collector.purgingTables) == 0 { 609 return "", false 610 } 611 tableNames := []string{} 612 for tableName := range collector.purgingTables { 613 tableNames = append(tableNames, tableName) 614 } 615 sort.SliceStable(tableNames, func(i, j int) bool { 616 _, _, _, ti, _ := schema.AnalyzeGCTableName(tableNames[i]) 617 _, _, _, tj, _ := schema.AnalyzeGCTableName(tableNames[j]) 618 619 return ti.Before(tj) 620 }) 621 return tableNames[0], true 622 } 623 624 // Status exports a status breakdown 625 func (collector *TableGC) Status() *Status { 626 collector.purgeMutex.Lock() 627 defer collector.purgeMutex.Unlock() 628 629 status := &Status{ 630 Keyspace: collector.keyspace, 631 Shard: collector.shard, 632 633 IsOpen: (atomic.LoadInt64(&collector.isOpen) > 0), 634 } 635 for tableName := range collector.purgingTables { 636 status.purgingTables = append(status.purgingTables, tableName) 637 } 638 639 return status 640 }