github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/txn_state.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/kv" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/metric" 25 "github.com/cockroachdb/cockroach/pkg/util/mon" 26 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 27 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 28 "github.com/cockroachdb/cockroach/pkg/util/tracing" 29 "github.com/cockroachdb/errors" 30 "github.com/cockroachdb/logtags" 31 opentracing "github.com/opentracing/opentracing-go" 32 ) 33 34 // txnState contains state associated with an ongoing SQL txn; it constitutes 35 // the ExtendedState of a connExecutor's state machine (defined in conn_fsm.go). 36 // It contains fields that are mutated as side-effects of state transitions; 37 // notably the KV client.Txn. All mutations to txnState are performed through 38 // calling fsm.Machine.Apply(event); see conn_fsm.go for the definition of the 39 // state machine. 40 type txnState struct { 41 // Mutable fields accessed from goroutines not synchronized by this txn's 42 // session, such as when a SHOW SESSIONS statement is executed on another 43 // session. 44 // 45 // Note that reads of mu.txn from the session's main goroutine do not require 46 // acquiring a read lock - since only that goroutine will ever write to 47 // mu.txn. Writes to mu.txn do require a write lock to guarantee safety with 48 // reads by other goroutines. 49 mu struct { 50 syncutil.RWMutex 51 52 txn *kv.Txn 53 54 // txnStart records the time that txn started. 55 txnStart time.Time 56 } 57 58 // connCtx is the connection's context. This is the parent of Ctx. 59 connCtx context.Context 60 61 // Ctx is the context for everything running in this SQL txn. 62 // This is only set while the session's state is not stateNoTxn. 63 Ctx context.Context 64 65 // sp is the span corresponding to the SQL txn. These are often root spans, as 66 // SQL txns are frequently the level at which we do tracing. 67 sp opentracing.Span 68 // recordingThreshold, is not zero, indicates that sp is recording and that 69 // the recording should be dumped to the log if execution of the transaction 70 // took more than this. 71 recordingThreshold time.Duration 72 recordingStart time.Time 73 74 // cancel is Ctx's cancellation function. Called upon COMMIT/ROLLBACK of the 75 // transaction to release resources associated with the context. nil when no 76 // txn is in progress. 77 cancel context.CancelFunc 78 79 // The timestamp to report for current_timestamp(), now() etc. 80 // This must be constant for the lifetime of a SQL transaction. 81 sqlTimestamp time.Time 82 83 // The transaction's priority. 84 priority roachpb.UserPriority 85 86 // The transaction's read only state. 87 readOnly bool 88 89 // Set to true when the current transaction is using a historical timestamp 90 // through the use of AS OF SYSTEM TIME. 91 isHistorical bool 92 93 // mon tracks txn-bound objects like the running state of 94 // planNode in the midst of performing a computation. 95 mon *mon.BytesMonitor 96 97 // adv is overwritten after every transition. It represents instructions for 98 // for moving the cursor over the stream of input statements to the next 99 // statement to be executed. 100 // Do not use directly; set through setAdvanceInfo() and read through 101 // consumeAdvanceInfo(). 102 adv advanceInfo 103 104 // txnAbortCount is incremented whenever the state transitions to 105 // stateAborted. 106 txnAbortCount *metric.Counter 107 } 108 109 // txnType represents the type of a SQL transaction. 110 type txnType int 111 112 //go:generate stringer -type=txnType 113 const ( 114 // implicitTxn means that the txn was created for a (single) SQL statement 115 // executed outside of a transaction. 116 implicitTxn txnType = iota 117 // explicitTxn means that the txn was explicitly started with a BEGIN 118 // statement. 119 explicitTxn 120 ) 121 122 // resetForNewSQLTxn (re)initializes the txnState for a new transaction. 123 // It creates a new client.Txn and initializes it using the session defaults. 124 // 125 // connCtx: The context in which the new transaction is started (usually a 126 // connection's context). ts.Ctx will be set to a child context and should be 127 // used for everything that happens within this SQL transaction. 128 // txnType: The type of the starting txn. 129 // sqlTimestamp: The timestamp to report for current_timestamp(), now() etc. 130 // historicalTimestamp: If non-nil indicates that the transaction is historical 131 // and should be fixed to this timestamp. 132 // priority: The transaction's priority. 133 // readOnly: The read-only character of the new txn. 134 // txn: If not nil, this txn will be used instead of creating a new txn. If so, 135 // all the other arguments need to correspond to the attributes of this txn. 136 // tranCtx: A bag of extra execution context. 137 func (ts *txnState) resetForNewSQLTxn( 138 connCtx context.Context, 139 txnType txnType, 140 sqlTimestamp time.Time, 141 historicalTimestamp *hlc.Timestamp, 142 priority roachpb.UserPriority, 143 readOnly tree.ReadWriteMode, 144 txn *kv.Txn, 145 tranCtx transitionCtx, 146 ) { 147 // Reset state vars to defaults. 148 ts.sqlTimestamp = sqlTimestamp 149 ts.isHistorical = false 150 151 // Create a context for this transaction. It will include a root span that 152 // will contain everything executed as part of the upcoming SQL txn, including 153 // (automatic or user-directed) retries. The span is closed by finishSQLTxn(). 154 // TODO(andrei): figure out how to close these spans on server shutdown? Ties 155 // into a larger discussion about how to drain SQL and rollback open txns. 156 var sp opentracing.Span 157 opName := sqlTxnName 158 159 // Create a span for the new txn. The span is always Recordable to support the 160 // use of session tracing, which may start recording on it. 161 // TODO(andrei): We should use tracing.EnsureChildSpan() as that's much more 162 // efficient that StartSpan (and also it'd be simpler), but that interface 163 // doesn't current support the Recordable option. 164 if parentSp := opentracing.SpanFromContext(connCtx); parentSp != nil { 165 // Create a child span for this SQL txn. 166 sp = parentSp.Tracer().StartSpan( 167 opName, 168 opentracing.ChildOf(parentSp.Context()), tracing.Recordable, 169 tracing.LogTagsFromCtx(connCtx), 170 ) 171 } else { 172 // Create a root span for this SQL txn. 173 sp = tranCtx.tracer.(*tracing.Tracer).StartRootSpan( 174 opName, logtags.FromContext(connCtx), tracing.RecordableSpan) 175 } 176 177 if txnType == implicitTxn { 178 sp.SetTag("implicit", "true") 179 } 180 181 alreadyRecording := tranCtx.sessionTracing.Enabled() 182 duration := traceTxnThreshold.Get(&tranCtx.settings.SV) 183 if !alreadyRecording && (duration > 0) { 184 tracing.StartRecording(sp, tracing.SnowballRecording) 185 ts.recordingThreshold = duration 186 ts.recordingStart = timeutil.Now() 187 } 188 189 // Put the new span in the context. 190 txnCtx := opentracing.ContextWithSpan(connCtx, sp) 191 192 if !tracing.IsRecordable(sp) { 193 log.Fatalf(connCtx, "non-recordable transaction span of type: %T", sp) 194 } 195 196 ts.sp = sp 197 ts.Ctx, ts.cancel = contextutil.WithCancel(txnCtx) 198 199 ts.mon.Start(ts.Ctx, tranCtx.connMon, mon.BoundAccount{} /* reserved */) 200 ts.mu.Lock() 201 if txn == nil { 202 ts.mu.txn = kv.NewTxnWithSteppingEnabled(ts.Ctx, tranCtx.db, tranCtx.nodeIDOrZero) 203 ts.mu.txn.SetDebugName(opName) 204 } else { 205 ts.mu.txn = txn 206 } 207 ts.mu.txnStart = timeutil.Now() 208 ts.mu.Unlock() 209 if historicalTimestamp != nil { 210 ts.setHistoricalTimestamp(ts.Ctx, *historicalTimestamp) 211 } 212 if err := ts.setPriority(priority); err != nil { 213 panic(err) 214 } 215 if err := ts.setReadOnlyMode(readOnly); err != nil { 216 panic(err) 217 } 218 } 219 220 // finishSQLTxn finalizes a transaction's results and closes the root span for 221 // the current SQL txn. This needs to be called before resetForNewSQLTxn() is 222 // called for starting another SQL txn. 223 func (ts *txnState) finishSQLTxn() { 224 ts.mon.Stop(ts.Ctx) 225 if ts.cancel != nil { 226 ts.cancel() 227 ts.cancel = nil 228 } 229 if ts.sp == nil { 230 panic("No span in context? Was resetForNewSQLTxn() called previously?") 231 } 232 233 if ts.recordingThreshold > 0 { 234 if r := tracing.GetRecording(ts.sp); r != nil { 235 if elapsed := timeutil.Since(ts.recordingStart); elapsed >= ts.recordingThreshold { 236 dump := r.String() 237 if len(dump) > 0 { 238 log.Infof(ts.Ctx, "SQL txn took %s, exceeding tracing threshold of %s:\n%s", 239 elapsed, ts.recordingThreshold, dump) 240 } 241 } 242 } else { 243 log.Warning(ts.Ctx, "Missing trace when sampled was enabled.") 244 } 245 } 246 247 ts.sp.Finish() 248 ts.sp = nil 249 ts.Ctx = nil 250 ts.mu.Lock() 251 ts.mu.txn = nil 252 ts.mu.txnStart = time.Time{} 253 ts.mu.Unlock() 254 ts.recordingThreshold = 0 255 } 256 257 // finishExternalTxn is a stripped-down version of finishSQLTxn used by 258 // connExecutors that run within a higher-level transaction (through the 259 // InternalExecutor). These guys don't want to mess with the transaction per-se, 260 // but still want to clean up other stuff. 261 func (ts *txnState) finishExternalTxn() { 262 if ts.Ctx == nil { 263 ts.mon.Stop(ts.connCtx) 264 } else { 265 ts.mon.Stop(ts.Ctx) 266 } 267 if ts.cancel != nil { 268 ts.cancel() 269 ts.cancel = nil 270 } 271 if ts.sp != nil { 272 ts.sp.Finish() 273 } 274 ts.sp = nil 275 ts.Ctx = nil 276 ts.mu.Lock() 277 ts.mu.txn = nil 278 ts.mu.Unlock() 279 } 280 281 func (ts *txnState) setHistoricalTimestamp(ctx context.Context, historicalTimestamp hlc.Timestamp) { 282 ts.mu.Lock() 283 ts.mu.txn.SetFixedTimestamp(ctx, historicalTimestamp) 284 ts.mu.Unlock() 285 ts.isHistorical = true 286 } 287 288 // getReadTimestamp returns the transaction's current read timestamp. 289 func (ts *txnState) getReadTimestamp() hlc.Timestamp { 290 ts.mu.RLock() 291 defer ts.mu.RUnlock() 292 return ts.mu.txn.ReadTimestamp() 293 } 294 295 func (ts *txnState) setPriority(userPriority roachpb.UserPriority) error { 296 ts.mu.Lock() 297 err := ts.mu.txn.SetUserPriority(userPriority) 298 ts.mu.Unlock() 299 if err != nil { 300 return err 301 } 302 ts.priority = userPriority 303 return nil 304 } 305 306 func (ts *txnState) setReadOnlyMode(mode tree.ReadWriteMode) error { 307 switch mode { 308 case tree.UnspecifiedReadWriteMode: 309 return nil 310 case tree.ReadOnly: 311 ts.readOnly = true 312 case tree.ReadWrite: 313 if ts.isHistorical { 314 return tree.ErrAsOfSpecifiedWithReadWrite 315 } 316 ts.readOnly = false 317 default: 318 return errors.AssertionFailedf("unknown read mode: %s", errors.Safe(mode)) 319 } 320 return nil 321 } 322 323 // advanceCode is part of advanceInfo; it instructs the module managing the 324 // statements buffer on what action to take. 325 type advanceCode int 326 327 //go:generate stringer -type=advanceCode 328 const ( 329 advanceUnknown advanceCode = iota 330 // stayInPlace means that the cursor should remain where it is. The same 331 // statement will be executed next. 332 stayInPlace 333 // advanceOne means that the cursor should be advanced by one position. This 334 // is the code commonly used after a successful statement execution. 335 advanceOne 336 // skipBatch means that the cursor should skip over any remaining commands 337 // that are part of the current batch and be positioned on the first 338 // comamnd in the next batch. 339 skipBatch 340 341 // rewind means that the cursor should be moved back to the position indicated 342 // by rewCap. 343 rewind 344 ) 345 346 // txnEvent is part of advanceInfo, informing the connExecutor about some 347 // transaction events. It is used by the connExecutor to clear state associated 348 // with a SQL transaction (other than the state encapsulated in TxnState; e.g. 349 // schema changes and portals). 350 // 351 //go:generate stringer -type=txnEvent 352 type txnEvent int 353 354 const ( 355 noEvent txnEvent = iota 356 357 // txnStart means that the statement that just ran started a new transaction. 358 // Note that when a transaction is restarted, txnStart event is not emitted. 359 txnStart 360 // txnCommit means that the transaction has committed (successfully). This 361 // doesn't mean that the SQL txn is necessarily "finished" - this event can be 362 // generated by a RELEASE statement and the connection is still waiting for a 363 // COMMIT. 364 // This event is produced both when entering the CommitWait state and also 365 // when leaving it. 366 txnCommit 367 // txnRollback means that the SQL transaction has been rolled back (completely 368 // rolled back, not to a savepoint). It is generated when an implicit 369 // transaction fails and when an explicit transaction runs a ROLLBACK. 370 txnRollback 371 // txnRestart means that the transaction is restarting. The iteration of the 372 // txn just finished will not commit. It is generated when we're about to 373 // auto-retry a txn and after a rollback to a savepoint placed at the start of 374 // the transaction. This allows such savepoints to reset more state than other 375 // savepoints. 376 txnRestart 377 ) 378 379 // advanceInfo represents instructions for the connExecutor about what statement 380 // to execute next (how to move its cursor over the input statements) and how 381 // to handle the results produced so far - can they be delivered to the client 382 // ASAP or not. advanceInfo is the "output" of performing a state transition. 383 type advanceInfo struct { 384 code advanceCode 385 386 // txnEvent is filled in when the transaction commits, aborts or starts 387 // waiting for a retry. 388 txnEvent txnEvent 389 390 // Fields for the rewind code: 391 392 // rewCap is the capability to rewind to the beginning of the transaction. 393 // rewCap.rewindAndUnlock() needs to be called to perform the promised rewind. 394 // 395 // This field should not be set directly; buildRewindInstructions() should be 396 // used. 397 rewCap rewindCapability 398 } 399 400 // transitionCtx is a bag of fields needed by some state machine events. 401 type transitionCtx struct { 402 db *kv.DB 403 nodeIDOrZero roachpb.NodeID // zero on SQL tenant servers, see #48008 404 clock *hlc.Clock 405 // connMon is the connExecutor's monitor. New transactions will create a child 406 // monitor tracking txn-scoped objects. 407 connMon *mon.BytesMonitor 408 // The Tracer used to create root spans for new txns if the parent ctx doesn't 409 // have a span. 410 tracer opentracing.Tracer 411 // sessionTracing provides access to the session's tracing interface. The 412 // state machine needs to see if session tracing is enabled. 413 sessionTracing *SessionTracing 414 settings *cluster.Settings 415 } 416 417 var noRewind = rewindCapability{} 418 419 // setAdvanceInfo sets the adv field. This has to be called as part of any state 420 // transition. The connExecutor is supposed to inspect adv after any transition 421 // and act on it. 422 func (ts *txnState) setAdvanceInfo(code advanceCode, rewCap rewindCapability, ev txnEvent) { 423 if ts.adv.code != advanceUnknown { 424 panic("previous advanceInfo has not been consume()d") 425 } 426 if code != rewind && rewCap != noRewind { 427 panic("if rewCap is specified, code needs to be rewind") 428 } 429 ts.adv = advanceInfo{ 430 code: code, 431 rewCap: rewCap, 432 txnEvent: ev, 433 } 434 } 435 436 // consumerAdvanceInfo returns the advanceInfo set by the last transition and 437 // resets the state so that another transition can overwrite it. 438 func (ts *txnState) consumeAdvanceInfo() advanceInfo { 439 adv := ts.adv 440 ts.adv = advanceInfo{} 441 return adv 442 }