vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/tx_engine.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tabletserver 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "vitess.io/vitess/go/pools" 26 "vitess.io/vitess/go/timer" 27 "vitess.io/vitess/go/trace" 28 "vitess.io/vitess/go/vt/concurrency" 29 "vitess.io/vitess/go/vt/dtids" 30 "vitess.io/vitess/go/vt/log" 31 "vitess.io/vitess/go/vt/proto/vtrpc" 32 "vitess.io/vitess/go/vt/servenv" 33 "vitess.io/vitess/go/vt/vterrors" 34 "vitess.io/vitess/go/vt/vtgate/vtgateconn" 35 "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool" 36 "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" 37 "vitess.io/vitess/go/vt/vttablet/tabletserver/tx" 38 "vitess.io/vitess/go/vt/vttablet/tabletserver/txlimiter" 39 40 querypb "vitess.io/vitess/go/vt/proto/query" 41 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 42 ) 43 44 type txEngineState int 45 46 // The TxEngine can be in any of these states 47 const ( 48 NotServing txEngineState = iota 49 Transitioning 50 AcceptingReadAndWrite 51 AcceptingReadOnly 52 ) 53 54 func (state txEngineState) String() string { 55 names := [...]string{ 56 "NotServing", 57 "Transitioning", 58 "AcceptReadWrite", 59 "AcceptingReadOnly"} 60 61 if state < NotServing || state > AcceptingReadOnly { 62 return fmt.Sprintf("Unknown - %d", int(state)) 63 } 64 65 return names[state] 66 } 67 68 // TxEngine is responsible for handling the tx-pool and keeping read-write, read-only or not-serving 69 // states. It will start and shut down the underlying tx-pool as required. 70 type TxEngine struct { 71 env tabletenv.Env 72 73 // stateLock is to protect state and beginRequests changes. 74 stateLock sync.Mutex 75 state txEngineState 76 77 // beginRequests is used to make sure that we do not make a state 78 // transition while creating new transactions 79 beginRequests sync.WaitGroup 80 81 twopcEnabled bool 82 shutdownGracePeriod time.Duration 83 coordinatorAddress string 84 abandonAge time.Duration 85 ticks *timer.Timer 86 87 // reservedConnStats keeps statistics about reserved connections 88 reservedConnStats *servenv.TimingsWrapper 89 90 txPool *TxPool 91 preparedPool *TxPreparedPool 92 twoPC *TwoPC 93 twoPCReady sync.WaitGroup 94 } 95 96 // NewTxEngine creates a new TxEngine. 97 func NewTxEngine(env tabletenv.Env) *TxEngine { 98 config := env.Config() 99 te := &TxEngine{ 100 env: env, 101 shutdownGracePeriod: config.GracePeriods.ShutdownSeconds.Get(), 102 reservedConnStats: env.Exporter().NewTimings("ReservedConnections", "Reserved connections stats", "operation"), 103 } 104 limiter := txlimiter.New(env) 105 te.txPool = NewTxPool(env, limiter) 106 te.twopcEnabled = config.TwoPCEnable 107 if te.twopcEnabled { 108 if config.TwoPCCoordinatorAddress == "" { 109 log.Error("Coordinator address not specified: Disabling 2PC") 110 te.twopcEnabled = false 111 } 112 if config.TwoPCAbandonAge <= 0 { 113 log.Error("2PC abandon age not specified: Disabling 2PC") 114 te.twopcEnabled = false 115 } 116 } 117 te.coordinatorAddress = config.TwoPCCoordinatorAddress 118 te.abandonAge = config.TwoPCAbandonAge.Get() 119 te.ticks = timer.NewTimer(te.abandonAge / 2) 120 121 // Set the prepared pool capacity to something lower than 122 // tx pool capacity. Those spare connections are needed to 123 // perform metadata state change operations. Without this, 124 // the system can deadlock if all connections get moved to 125 // the TxPreparedPool. 126 te.preparedPool = NewTxPreparedPool(config.TxPool.Size - 2) 127 readPool := connpool.NewPool(env, "TxReadPool", tabletenv.ConnPoolConfig{ 128 Size: 3, 129 IdleTimeoutSeconds: env.Config().TxPool.IdleTimeoutSeconds, 130 }) 131 te.twoPC = NewTwoPC(readPool) 132 te.state = NotServing 133 return te 134 } 135 136 // AcceptReadWrite will start accepting all transactions. 137 // If transitioning from RO mode, transactions are rolled 138 // back before accepting new transactions. This is to allow 139 // for 2PC state to be correctly initialized. 140 func (te *TxEngine) AcceptReadWrite() { 141 te.transition(AcceptingReadAndWrite) 142 } 143 144 // AcceptReadOnly transitions to read-only mode. If current state 145 // is read-write, then we wait for shutdown and then transition. 146 func (te *TxEngine) AcceptReadOnly() { 147 te.transition(AcceptingReadOnly) 148 } 149 150 func (te *TxEngine) transition(state txEngineState) { 151 te.stateLock.Lock() 152 defer te.stateLock.Unlock() 153 if te.state == state { 154 return 155 } 156 157 log.Infof("TxEngine transition: %v", state) 158 switch te.state { 159 case AcceptingReadOnly, AcceptingReadAndWrite: 160 te.shutdownLocked() 161 case NotServing: 162 // No special action. 163 } 164 165 te.state = state 166 te.txPool.Open(te.env.Config().DB.AppWithDB(), te.env.Config().DB.DbaWithDB(), te.env.Config().DB.AppDebugWithDB()) 167 168 if te.twopcEnabled && te.state == AcceptingReadAndWrite { 169 // If there are errors, we choose to raise an alert and 170 // continue anyway. Serving traffic is considered more important 171 // than blocking everything for the sake of a few transactions. 172 // We do this async; so we do not end up blocking writes on 173 // failover for our setup tasks if using semi-sync replication. 174 te.twoPCReady.Add(1) 175 go func() { 176 defer te.twoPCReady.Done() 177 if err := te.twoPC.Open(te.env.Config().DB); err != nil { 178 te.env.Stats().InternalErrors.Add("TwopcOpen", 1) 179 log.Errorf("Could not open TwoPC engine: %v", err) 180 } 181 if err := te.prepareFromRedo(); err != nil { 182 te.env.Stats().InternalErrors.Add("TwopcResurrection", 1) 183 log.Errorf("Could not prepare transactions: %v", err) 184 } 185 te.startWatchdog() 186 }() 187 } 188 } 189 190 // Close will disregard common rules for when to kill transactions 191 // and wait forever for transactions to wrap up 192 func (te *TxEngine) Close() { 193 log.Infof("TxEngine - started Close. Acquiring stateLock lock") 194 te.stateLock.Lock() 195 log.Infof("TxEngine - acquired stateLock") 196 defer func() { 197 te.state = NotServing 198 te.stateLock.Unlock() 199 }() 200 if te.state == NotServing { 201 log.Infof("TxEngine - state is not serving already") 202 return 203 } 204 205 log.Infof("TxEngine - starting shutdown") 206 te.shutdownLocked() 207 log.Info("TxEngine: closed") 208 } 209 210 func (te *TxEngine) isTxPoolAvailable(addToWaitGroup func(int)) error { 211 te.stateLock.Lock() 212 defer te.stateLock.Unlock() 213 214 canOpenTransactions := te.state == AcceptingReadOnly || te.state == AcceptingReadAndWrite 215 if !canOpenTransactions { 216 return vterrors.Errorf(vtrpc.Code_UNAVAILABLE, vterrors.TxEngineClosed, te.state) 217 } 218 addToWaitGroup(1) 219 return nil 220 } 221 222 // Begin begins a transaction, and returns the associated transaction id and the 223 // statement(s) used to execute the begin (if any). 224 // 225 // Subsequent statements can access the connection through the transaction id. 226 func (te *TxEngine) Begin(ctx context.Context, savepointQueries []string, reservedID int64, setting *pools.Setting, options *querypb.ExecuteOptions) (int64, string, string, error) { 227 span, ctx := trace.NewSpan(ctx, "TxEngine.Begin") 228 defer span.Finish() 229 230 // if the connection is already reserved then, we should not apply the settings. 231 if reservedID != 0 && setting != nil { 232 return 0, "", "", vterrors.Errorf(vtrpcpb.Code_INTERNAL, "[BUG] should not mix reserved connection and connection with setting") 233 } 234 235 err := te.isTxPoolAvailable(te.beginRequests.Add) 236 if err != nil { 237 return 0, "", "", err 238 } 239 240 defer te.beginRequests.Done() 241 conn, beginSQL, sessionStateChanges, err := te.txPool.Begin(ctx, options, te.state == AcceptingReadOnly, reservedID, savepointQueries, setting) 242 if err != nil { 243 return 0, "", "", err 244 } 245 defer conn.UnlockUpdateTime() 246 return conn.ReservedID(), beginSQL, sessionStateChanges, err 247 } 248 249 // Commit commits the specified transaction and renews connection id if one exists. 250 func (te *TxEngine) Commit(ctx context.Context, transactionID int64) (int64, string, error) { 251 span, ctx := trace.NewSpan(ctx, "TxEngine.Commit") 252 defer span.Finish() 253 var query string 254 var err error 255 connID, err := te.txFinish(transactionID, tx.TxCommit, func(conn *StatefulConnection) error { 256 query, err = te.txPool.Commit(ctx, conn) 257 return err 258 }) 259 260 return connID, query, err 261 } 262 263 // Rollback rolls back the specified transaction. 264 func (te *TxEngine) Rollback(ctx context.Context, transactionID int64) (int64, error) { 265 span, ctx := trace.NewSpan(ctx, "TxEngine.Rollback") 266 defer span.Finish() 267 268 return te.txFinish(transactionID, tx.TxRollback, func(conn *StatefulConnection) error { 269 return te.txPool.Rollback(ctx, conn) 270 }) 271 } 272 273 func (te *TxEngine) txFinish(transactionID int64, reason tx.ReleaseReason, f func(*StatefulConnection) error) (int64, error) { 274 conn, err := te.txPool.GetAndLock(transactionID, reason.String()) 275 if err != nil { 276 return 0, err 277 } 278 err = f(conn) 279 if err != nil || !conn.IsTainted() { 280 conn.Release(reason) 281 return 0, err 282 } 283 err = conn.Renew() 284 if err != nil { 285 conn.Release(tx.ConnRenewFail) 286 return 0, err 287 } 288 return conn.ConnID, nil 289 } 290 291 // shutdownLocked closes the TxEngine. If the immediate flag is on, 292 // then all current transactions are immediately rolled back. 293 // Otherwise, the function waits for all current transactions 294 // to conclude. If a shutdown grace period was specified, 295 // the transactions are rolled back if they're not resolved 296 // by that time. 297 func (te *TxEngine) shutdownLocked() { 298 log.Infof("TxEngine - called shutdownLocked") 299 immediate := true 300 if te.state == AcceptingReadAndWrite { 301 immediate = false 302 } 303 304 // Unlock, wait for all begin requests to complete, and relock. 305 te.state = Transitioning 306 te.stateLock.Unlock() 307 log.Infof("TxEngine - waiting for begin requests") 308 te.beginRequests.Wait() 309 log.Infof("TxEngine - acquiring state lock again") 310 te.stateLock.Lock() 311 log.Infof("TxEngine - state lock acquired again") 312 313 // Shut down functions are idempotent. 314 // No need to check if 2pc is enabled. 315 log.Infof("TxEngine - stop watchdog") 316 te.stopWatchdog() 317 318 poolEmpty := make(chan bool) 319 rollbackDone := make(chan bool) 320 // This goroutine decides if transactions have to be 321 // forced to rollback, and if so, when. Once done, 322 // the function closes rollbackDone, which can be 323 // verified to make sure it won't kick in later. 324 go func() { 325 defer func() { 326 te.env.LogError() 327 close(rollbackDone) 328 }() 329 if immediate { 330 // Immediately rollback everything and return. 331 log.Info("Immediate shutdown: rolling back now.") 332 te.txPool.scp.ShutdownNonTx() 333 te.shutdownTransactions() 334 return 335 } 336 // If not immediate, we start with shutting down non-tx (reserved) 337 // connections. 338 te.txPool.scp.ShutdownNonTx() 339 if te.shutdownGracePeriod <= 0 { 340 // No grace period was specified. Wait indefinitely for transactions to be concluded. 341 // TODO(sougou): invoking rollbackPrepared is incorrect here. Prepared statements should 342 // actually be rolled back last. But this will cause the shutdown to hang because the 343 // tx pool will never become empty, because the prepared pool is holding on to connections 344 // from the tx pool. But we plan to deprecate this approach to 2PC. So, this 345 // should eventually be deleted. 346 te.rollbackPrepared() 347 log.Info("No grace period specified: performing normal wait.") 348 return 349 } 350 tmr := time.NewTimer(te.shutdownGracePeriod) 351 defer tmr.Stop() 352 select { 353 case <-tmr.C: 354 log.Info("Grace period exceeded: rolling back now.") 355 te.shutdownTransactions() 356 case <-poolEmpty: 357 // The pool cleared before the timer kicked in. Just return. 358 log.Info("Transactions completed before grace period: shutting down.") 359 } 360 }() 361 log.Infof("TxEngine - waiting for empty txPool") 362 te.txPool.WaitForEmpty() 363 // If the goroutine is still running, signal that it can exit. 364 close(poolEmpty) 365 // Make sure the goroutine has returned. 366 log.Infof("TxEngine - making sure the goroutine has returned") 367 <-rollbackDone 368 369 log.Infof("TxEngine - closing the txPool") 370 te.txPool.Close() 371 log.Infof("TxEngine - closing twoPC") 372 te.twoPC.Close() 373 log.Infof("TxEngine - finished shutdownLocked") 374 } 375 376 // prepareFromRedo replays and prepares the transactions 377 // from the redo log, loads previously failed transactions 378 // into the reserved list, and adjusts the txPool LastID 379 // to ensure there are no future collisions. 380 func (te *TxEngine) prepareFromRedo() error { 381 ctx := tabletenv.LocalContext() 382 var allErr concurrency.AllErrorRecorder 383 prepared, failed, err := te.twoPC.ReadAllRedo(ctx) 384 if err != nil { 385 return err 386 } 387 388 maxid := int64(0) 389 outer: 390 for _, preparedTx := range prepared { 391 txid, err := dtids.TransactionID(preparedTx.Dtid) 392 if err != nil { 393 log.Errorf("Error extracting transaction ID from dtid: %v", err) 394 } 395 if txid > maxid { 396 maxid = txid 397 } 398 conn, _, _, err := te.txPool.Begin(ctx, &querypb.ExecuteOptions{}, false, 0, nil, nil) 399 if err != nil { 400 allErr.RecordError(err) 401 continue 402 } 403 for _, stmt := range preparedTx.Queries { 404 conn.TxProperties().RecordQuery(stmt) 405 _, err := conn.Exec(ctx, stmt, 1, false) 406 if err != nil { 407 allErr.RecordError(err) 408 te.txPool.RollbackAndRelease(ctx, conn) 409 continue outer 410 } 411 } 412 // We should not use the external Prepare because 413 // we don't want to write again to the redo log. 414 err = te.preparedPool.Put(conn, preparedTx.Dtid) 415 if err != nil { 416 allErr.RecordError(err) 417 continue 418 } 419 } 420 for _, preparedTx := range failed { 421 txid, err := dtids.TransactionID(preparedTx.Dtid) 422 if err != nil { 423 log.Errorf("Error extracting transaction ID from dtid: %v", err) 424 } 425 if txid > maxid { 426 maxid = txid 427 } 428 te.preparedPool.SetFailed(preparedTx.Dtid) 429 } 430 te.txPool.AdjustLastID(maxid) 431 log.Infof("TwoPC: Prepared %d transactions, and registered %d failures.", len(prepared), len(failed)) 432 return allErr.Error() 433 } 434 435 // shutdownTransactions rolls back all open transactions 436 // including the prepared ones. 437 // This is used for transitioning from a primary to a non-primary 438 // serving type. 439 func (te *TxEngine) shutdownTransactions() { 440 te.rollbackPrepared() 441 ctx := tabletenv.LocalContext() 442 // The order of rollbacks is currently not material because 443 // we don't allow new statements or commits during 444 // this function. In case of any such change, this will 445 // have to be revisited. 446 te.txPool.Shutdown(ctx) 447 } 448 449 func (te *TxEngine) rollbackPrepared() { 450 ctx := tabletenv.LocalContext() 451 for _, conn := range te.preparedPool.FetchAll() { 452 te.txPool.Rollback(ctx, conn) 453 conn.Release(tx.TxRollback) 454 } 455 } 456 457 // startWatchdog starts the watchdog goroutine, which looks for abandoned 458 // transactions and calls the notifier on them. 459 func (te *TxEngine) startWatchdog() { 460 te.ticks.Start(func() { 461 ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), te.abandonAge/4) 462 defer cancel() 463 464 // Raise alerts on prepares that have been unresolved for too long. 465 // Use 5x abandonAge to give opportunity for watchdog to resolve these. 466 count, err := te.twoPC.CountUnresolvedRedo(ctx, time.Now().Add(-te.abandonAge*5)) 467 if err != nil { 468 te.env.Stats().InternalErrors.Add("WatchdogFail", 1) 469 log.Errorf("Error reading unresolved prepares: '%v': %v", te.coordinatorAddress, err) 470 } 471 te.env.Stats().Unresolved.Set("Prepares", count) 472 473 // Resolve lingering distributed transactions. 474 txs, err := te.twoPC.ReadAbandoned(ctx, time.Now().Add(-te.abandonAge)) 475 if err != nil { 476 te.env.Stats().InternalErrors.Add("WatchdogFail", 1) 477 log.Errorf("Error reading transactions for 2pc watchdog: %v", err) 478 return 479 } 480 if len(txs) == 0 { 481 return 482 } 483 484 coordConn, err := vtgateconn.Dial(ctx, te.coordinatorAddress) 485 if err != nil { 486 te.env.Stats().InternalErrors.Add("WatchdogFail", 1) 487 log.Errorf("Error connecting to coordinator '%v': %v", te.coordinatorAddress, err) 488 return 489 } 490 defer coordConn.Close() 491 492 var wg sync.WaitGroup 493 for tx := range txs { 494 wg.Add(1) 495 go func(dtid string) { 496 defer wg.Done() 497 if err := coordConn.ResolveTransaction(ctx, dtid); err != nil { 498 te.env.Stats().InternalErrors.Add("WatchdogFail", 1) 499 log.Errorf("Error notifying for dtid %s: %v", dtid, err) 500 } 501 }(tx) 502 } 503 wg.Wait() 504 }) 505 } 506 507 // stopWatchdog stops the watchdog goroutine. 508 func (te *TxEngine) stopWatchdog() { 509 te.ticks.Stop() 510 } 511 512 // ReserveBegin creates a reserved connection, and in it opens a transaction 513 func (te *TxEngine) ReserveBegin(ctx context.Context, options *querypb.ExecuteOptions, preQueries []string, savepointQueries []string) (int64, string, error) { 514 span, ctx := trace.NewSpan(ctx, "TxEngine.ReserveBegin") 515 defer span.Finish() 516 err := te.isTxPoolAvailable(te.beginRequests.Add) 517 if err != nil { 518 return 0, "", err 519 } 520 defer te.beginRequests.Done() 521 522 conn, err := te.reserve(ctx, options, preQueries) 523 if err != nil { 524 return 0, "", err 525 } 526 defer conn.UnlockUpdateTime() 527 _, sessionStateChanges, err := te.txPool.begin(ctx, options, te.state == AcceptingReadOnly, conn, savepointQueries) 528 if err != nil { 529 conn.Close() 530 conn.Release(tx.ConnInitFail) 531 return 0, "", err 532 } 533 return conn.ReservedID(), sessionStateChanges, nil 534 } 535 536 var noop = func(int) {} 537 538 // Reserve creates a reserved connection and returns the id to it 539 func (te *TxEngine) Reserve(ctx context.Context, options *querypb.ExecuteOptions, txID int64, preQueries []string) (int64, error) { 540 span, ctx := trace.NewSpan(ctx, "TxEngine.Reserve") 541 defer span.Finish() 542 if txID == 0 { 543 err := te.isTxPoolAvailable(noop) 544 if err != nil { 545 return 0, err 546 } 547 conn, err := te.reserve(ctx, options, preQueries) 548 if err != nil { 549 return 0, err 550 } 551 defer conn.Unlock() 552 return conn.ReservedID(), nil 553 } 554 555 conn, err := te.txPool.GetAndLock(txID, "to reserve") 556 if err != nil { 557 return 0, err 558 } 559 defer conn.Unlock() 560 561 err = te.taintConn(ctx, conn, preQueries) 562 if err != nil { 563 return 0, err 564 } 565 return conn.ReservedID(), nil 566 } 567 568 // Reserve creates a reserved connection and returns the id to it 569 func (te *TxEngine) reserve(ctx context.Context, options *querypb.ExecuteOptions, preQueries []string) (*StatefulConnection, error) { 570 conn, err := te.txPool.scp.NewConn(ctx, options, nil) 571 if err != nil { 572 return nil, err 573 } 574 575 err = te.taintConn(ctx, conn, preQueries) 576 if err != nil { 577 return nil, err 578 } 579 580 return conn, err 581 } 582 583 func (te *TxEngine) taintConn(ctx context.Context, conn *StatefulConnection, preQueries []string) error { 584 err := conn.Taint(ctx, te.reservedConnStats) 585 if err != nil { 586 return err 587 } 588 for _, query := range preQueries { 589 _, err := conn.Exec(ctx, query, 0 /*maxrows*/, false /*wantFields*/) 590 if err != nil { 591 conn.Releasef("error during connection setup: %s\n%v", query, err) 592 return err 593 } 594 } 595 return nil 596 } 597 598 // Release closes the underlying connection. 599 func (te *TxEngine) Release(connID int64) error { 600 conn, err := te.txPool.GetAndLock(connID, "for release") 601 if err != nil { 602 return err 603 } 604 605 conn.Release(tx.ConnRelease) 606 607 return nil 608 }