github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/txn_state.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    21  	"github.com/cockroachdb/cockroach/pkg/util/contextutil"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    25  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    26  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    29  	"github.com/cockroachdb/errors"
    30  	"github.com/cockroachdb/logtags"
    31  	opentracing "github.com/opentracing/opentracing-go"
    32  )
    33  
    34  // txnState contains state associated with an ongoing SQL txn; it constitutes
    35  // the ExtendedState of a connExecutor's state machine (defined in conn_fsm.go).
    36  // It contains fields that are mutated as side-effects of state transitions;
    37  // notably the KV client.Txn.  All mutations to txnState are performed through
    38  // calling fsm.Machine.Apply(event); see conn_fsm.go for the definition of the
    39  // state machine.
    40  type txnState struct {
    41  	// Mutable fields accessed from goroutines not synchronized by this txn's
    42  	// session, such as when a SHOW SESSIONS statement is executed on another
    43  	// session.
    44  	//
    45  	// Note that reads of mu.txn from the session's main goroutine do not require
    46  	// acquiring a read lock - since only that goroutine will ever write to
    47  	// mu.txn. Writes to mu.txn do require a write lock to guarantee safety with
    48  	// reads by other goroutines.
    49  	mu struct {
    50  		syncutil.RWMutex
    51  
    52  		txn *kv.Txn
    53  
    54  		// txnStart records the time that txn started.
    55  		txnStart time.Time
    56  	}
    57  
    58  	// connCtx is the connection's context. This is the parent of Ctx.
    59  	connCtx context.Context
    60  
    61  	// Ctx is the context for everything running in this SQL txn.
    62  	// This is only set while the session's state is not stateNoTxn.
    63  	Ctx context.Context
    64  
    65  	// sp is the span corresponding to the SQL txn. These are often root spans, as
    66  	// SQL txns are frequently the level at which we do tracing.
    67  	sp opentracing.Span
    68  	// recordingThreshold, is not zero, indicates that sp is recording and that
    69  	// the recording should be dumped to the log if execution of the transaction
    70  	// took more than this.
    71  	recordingThreshold time.Duration
    72  	recordingStart     time.Time
    73  
    74  	// cancel is Ctx's cancellation function. Called upon COMMIT/ROLLBACK of the
    75  	// transaction to release resources associated with the context. nil when no
    76  	// txn is in progress.
    77  	cancel context.CancelFunc
    78  
    79  	// The timestamp to report for current_timestamp(), now() etc.
    80  	// This must be constant for the lifetime of a SQL transaction.
    81  	sqlTimestamp time.Time
    82  
    83  	// The transaction's priority.
    84  	priority roachpb.UserPriority
    85  
    86  	// The transaction's read only state.
    87  	readOnly bool
    88  
    89  	// Set to true when the current transaction is using a historical timestamp
    90  	// through the use of AS OF SYSTEM TIME.
    91  	isHistorical bool
    92  
    93  	// mon tracks txn-bound objects like the running state of
    94  	// planNode in the midst of performing a computation.
    95  	mon *mon.BytesMonitor
    96  
    97  	// adv is overwritten after every transition. It represents instructions for
    98  	// for moving the cursor over the stream of input statements to the next
    99  	// statement to be executed.
   100  	// Do not use directly; set through setAdvanceInfo() and read through
   101  	// consumeAdvanceInfo().
   102  	adv advanceInfo
   103  
   104  	// txnAbortCount is incremented whenever the state transitions to
   105  	// stateAborted.
   106  	txnAbortCount *metric.Counter
   107  }
   108  
   109  // txnType represents the type of a SQL transaction.
   110  type txnType int
   111  
   112  //go:generate stringer -type=txnType
   113  const (
   114  	// implicitTxn means that the txn was created for a (single) SQL statement
   115  	// executed outside of a transaction.
   116  	implicitTxn txnType = iota
   117  	// explicitTxn means that the txn was explicitly started with a BEGIN
   118  	// statement.
   119  	explicitTxn
   120  )
   121  
   122  // resetForNewSQLTxn (re)initializes the txnState for a new transaction.
   123  // It creates a new client.Txn and initializes it using the session defaults.
   124  //
   125  // connCtx: The context in which the new transaction is started (usually a
   126  // 	 connection's context). ts.Ctx will be set to a child context and should be
   127  // 	 used for everything that happens within this SQL transaction.
   128  // txnType: The type of the starting txn.
   129  // sqlTimestamp: The timestamp to report for current_timestamp(), now() etc.
   130  // historicalTimestamp: If non-nil indicates that the transaction is historical
   131  //   and should be fixed to this timestamp.
   132  // priority: The transaction's priority.
   133  // readOnly: The read-only character of the new txn.
   134  // txn: If not nil, this txn will be used instead of creating a new txn. If so,
   135  //      all the other arguments need to correspond to the attributes of this txn.
   136  // tranCtx: A bag of extra execution context.
   137  func (ts *txnState) resetForNewSQLTxn(
   138  	connCtx context.Context,
   139  	txnType txnType,
   140  	sqlTimestamp time.Time,
   141  	historicalTimestamp *hlc.Timestamp,
   142  	priority roachpb.UserPriority,
   143  	readOnly tree.ReadWriteMode,
   144  	txn *kv.Txn,
   145  	tranCtx transitionCtx,
   146  ) {
   147  	// Reset state vars to defaults.
   148  	ts.sqlTimestamp = sqlTimestamp
   149  	ts.isHistorical = false
   150  
   151  	// Create a context for this transaction. It will include a root span that
   152  	// will contain everything executed as part of the upcoming SQL txn, including
   153  	// (automatic or user-directed) retries. The span is closed by finishSQLTxn().
   154  	// TODO(andrei): figure out how to close these spans on server shutdown? Ties
   155  	// into a larger discussion about how to drain SQL and rollback open txns.
   156  	var sp opentracing.Span
   157  	opName := sqlTxnName
   158  
   159  	// Create a span for the new txn. The span is always Recordable to support the
   160  	// use of session tracing, which may start recording on it.
   161  	// TODO(andrei): We should use tracing.EnsureChildSpan() as that's much more
   162  	// efficient that StartSpan (and also it'd be simpler), but that interface
   163  	// doesn't current support the Recordable option.
   164  	if parentSp := opentracing.SpanFromContext(connCtx); parentSp != nil {
   165  		// Create a child span for this SQL txn.
   166  		sp = parentSp.Tracer().StartSpan(
   167  			opName,
   168  			opentracing.ChildOf(parentSp.Context()), tracing.Recordable,
   169  			tracing.LogTagsFromCtx(connCtx),
   170  		)
   171  	} else {
   172  		// Create a root span for this SQL txn.
   173  		sp = tranCtx.tracer.(*tracing.Tracer).StartRootSpan(
   174  			opName, logtags.FromContext(connCtx), tracing.RecordableSpan)
   175  	}
   176  
   177  	if txnType == implicitTxn {
   178  		sp.SetTag("implicit", "true")
   179  	}
   180  
   181  	alreadyRecording := tranCtx.sessionTracing.Enabled()
   182  	duration := traceTxnThreshold.Get(&tranCtx.settings.SV)
   183  	if !alreadyRecording && (duration > 0) {
   184  		tracing.StartRecording(sp, tracing.SnowballRecording)
   185  		ts.recordingThreshold = duration
   186  		ts.recordingStart = timeutil.Now()
   187  	}
   188  
   189  	// Put the new span in the context.
   190  	txnCtx := opentracing.ContextWithSpan(connCtx, sp)
   191  
   192  	if !tracing.IsRecordable(sp) {
   193  		log.Fatalf(connCtx, "non-recordable transaction span of type: %T", sp)
   194  	}
   195  
   196  	ts.sp = sp
   197  	ts.Ctx, ts.cancel = contextutil.WithCancel(txnCtx)
   198  
   199  	ts.mon.Start(ts.Ctx, tranCtx.connMon, mon.BoundAccount{} /* reserved */)
   200  	ts.mu.Lock()
   201  	if txn == nil {
   202  		ts.mu.txn = kv.NewTxnWithSteppingEnabled(ts.Ctx, tranCtx.db, tranCtx.nodeIDOrZero)
   203  		ts.mu.txn.SetDebugName(opName)
   204  	} else {
   205  		ts.mu.txn = txn
   206  	}
   207  	ts.mu.txnStart = timeutil.Now()
   208  	ts.mu.Unlock()
   209  	if historicalTimestamp != nil {
   210  		ts.setHistoricalTimestamp(ts.Ctx, *historicalTimestamp)
   211  	}
   212  	if err := ts.setPriority(priority); err != nil {
   213  		panic(err)
   214  	}
   215  	if err := ts.setReadOnlyMode(readOnly); err != nil {
   216  		panic(err)
   217  	}
   218  }
   219  
   220  // finishSQLTxn finalizes a transaction's results and closes the root span for
   221  // the current SQL txn. This needs to be called before resetForNewSQLTxn() is
   222  // called for starting another SQL txn.
   223  func (ts *txnState) finishSQLTxn() {
   224  	ts.mon.Stop(ts.Ctx)
   225  	if ts.cancel != nil {
   226  		ts.cancel()
   227  		ts.cancel = nil
   228  	}
   229  	if ts.sp == nil {
   230  		panic("No span in context? Was resetForNewSQLTxn() called previously?")
   231  	}
   232  
   233  	if ts.recordingThreshold > 0 {
   234  		if r := tracing.GetRecording(ts.sp); r != nil {
   235  			if elapsed := timeutil.Since(ts.recordingStart); elapsed >= ts.recordingThreshold {
   236  				dump := r.String()
   237  				if len(dump) > 0 {
   238  					log.Infof(ts.Ctx, "SQL txn took %s, exceeding tracing threshold of %s:\n%s",
   239  						elapsed, ts.recordingThreshold, dump)
   240  				}
   241  			}
   242  		} else {
   243  			log.Warning(ts.Ctx, "Missing trace when sampled was enabled.")
   244  		}
   245  	}
   246  
   247  	ts.sp.Finish()
   248  	ts.sp = nil
   249  	ts.Ctx = nil
   250  	ts.mu.Lock()
   251  	ts.mu.txn = nil
   252  	ts.mu.txnStart = time.Time{}
   253  	ts.mu.Unlock()
   254  	ts.recordingThreshold = 0
   255  }
   256  
   257  // finishExternalTxn is a stripped-down version of finishSQLTxn used by
   258  // connExecutors that run within a higher-level transaction (through the
   259  // InternalExecutor). These guys don't want to mess with the transaction per-se,
   260  // but still want to clean up other stuff.
   261  func (ts *txnState) finishExternalTxn() {
   262  	if ts.Ctx == nil {
   263  		ts.mon.Stop(ts.connCtx)
   264  	} else {
   265  		ts.mon.Stop(ts.Ctx)
   266  	}
   267  	if ts.cancel != nil {
   268  		ts.cancel()
   269  		ts.cancel = nil
   270  	}
   271  	if ts.sp != nil {
   272  		ts.sp.Finish()
   273  	}
   274  	ts.sp = nil
   275  	ts.Ctx = nil
   276  	ts.mu.Lock()
   277  	ts.mu.txn = nil
   278  	ts.mu.Unlock()
   279  }
   280  
   281  func (ts *txnState) setHistoricalTimestamp(ctx context.Context, historicalTimestamp hlc.Timestamp) {
   282  	ts.mu.Lock()
   283  	ts.mu.txn.SetFixedTimestamp(ctx, historicalTimestamp)
   284  	ts.mu.Unlock()
   285  	ts.isHistorical = true
   286  }
   287  
   288  // getReadTimestamp returns the transaction's current read timestamp.
   289  func (ts *txnState) getReadTimestamp() hlc.Timestamp {
   290  	ts.mu.RLock()
   291  	defer ts.mu.RUnlock()
   292  	return ts.mu.txn.ReadTimestamp()
   293  }
   294  
   295  func (ts *txnState) setPriority(userPriority roachpb.UserPriority) error {
   296  	ts.mu.Lock()
   297  	err := ts.mu.txn.SetUserPriority(userPriority)
   298  	ts.mu.Unlock()
   299  	if err != nil {
   300  		return err
   301  	}
   302  	ts.priority = userPriority
   303  	return nil
   304  }
   305  
   306  func (ts *txnState) setReadOnlyMode(mode tree.ReadWriteMode) error {
   307  	switch mode {
   308  	case tree.UnspecifiedReadWriteMode:
   309  		return nil
   310  	case tree.ReadOnly:
   311  		ts.readOnly = true
   312  	case tree.ReadWrite:
   313  		if ts.isHistorical {
   314  			return tree.ErrAsOfSpecifiedWithReadWrite
   315  		}
   316  		ts.readOnly = false
   317  	default:
   318  		return errors.AssertionFailedf("unknown read mode: %s", errors.Safe(mode))
   319  	}
   320  	return nil
   321  }
   322  
   323  // advanceCode is part of advanceInfo; it instructs the module managing the
   324  // statements buffer on what action to take.
   325  type advanceCode int
   326  
   327  //go:generate stringer -type=advanceCode
   328  const (
   329  	advanceUnknown advanceCode = iota
   330  	// stayInPlace means that the cursor should remain where it is. The same
   331  	// statement will be executed next.
   332  	stayInPlace
   333  	// advanceOne means that the cursor should be advanced by one position. This
   334  	// is the code commonly used after a successful statement execution.
   335  	advanceOne
   336  	// skipBatch means that the cursor should skip over any remaining commands
   337  	// that are part of the current batch and be positioned on the first
   338  	// comamnd in the next batch.
   339  	skipBatch
   340  
   341  	// rewind means that the cursor should be moved back to the position indicated
   342  	// by rewCap.
   343  	rewind
   344  )
   345  
   346  // txnEvent is part of advanceInfo, informing the connExecutor about some
   347  // transaction events. It is used by the connExecutor to clear state associated
   348  // with a SQL transaction (other than the state encapsulated in TxnState; e.g.
   349  // schema changes and portals).
   350  //
   351  //go:generate stringer -type=txnEvent
   352  type txnEvent int
   353  
   354  const (
   355  	noEvent txnEvent = iota
   356  
   357  	// txnStart means that the statement that just ran started a new transaction.
   358  	// Note that when a transaction is restarted, txnStart event is not emitted.
   359  	txnStart
   360  	// txnCommit means that the transaction has committed (successfully). This
   361  	// doesn't mean that the SQL txn is necessarily "finished" - this event can be
   362  	// generated by a RELEASE statement and the connection is still waiting for a
   363  	// COMMIT.
   364  	// This event is produced both when entering the CommitWait state and also
   365  	// when leaving it.
   366  	txnCommit
   367  	// txnRollback means that the SQL transaction has been rolled back (completely
   368  	// rolled back, not to a savepoint). It is generated when an implicit
   369  	// transaction fails and when an explicit transaction runs a ROLLBACK.
   370  	txnRollback
   371  	// txnRestart means that the transaction is restarting. The iteration of the
   372  	// txn just finished will not commit. It is generated when we're about to
   373  	// auto-retry a txn and after a rollback to a savepoint placed at the start of
   374  	// the transaction. This allows such savepoints to reset more state than other
   375  	// savepoints.
   376  	txnRestart
   377  )
   378  
   379  // advanceInfo represents instructions for the connExecutor about what statement
   380  // to execute next (how to move its cursor over the input statements) and how
   381  // to handle the results produced so far - can they be delivered to the client
   382  // ASAP or not. advanceInfo is the "output" of performing a state transition.
   383  type advanceInfo struct {
   384  	code advanceCode
   385  
   386  	// txnEvent is filled in when the transaction commits, aborts or starts
   387  	// waiting for a retry.
   388  	txnEvent txnEvent
   389  
   390  	// Fields for the rewind code:
   391  
   392  	// rewCap is the capability to rewind to the beginning of the transaction.
   393  	// rewCap.rewindAndUnlock() needs to be called to perform the promised rewind.
   394  	//
   395  	// This field should not be set directly; buildRewindInstructions() should be
   396  	// used.
   397  	rewCap rewindCapability
   398  }
   399  
   400  // transitionCtx is a bag of fields needed by some state machine events.
   401  type transitionCtx struct {
   402  	db           *kv.DB
   403  	nodeIDOrZero roachpb.NodeID // zero on SQL tenant servers, see #48008
   404  	clock        *hlc.Clock
   405  	// connMon is the connExecutor's monitor. New transactions will create a child
   406  	// monitor tracking txn-scoped objects.
   407  	connMon *mon.BytesMonitor
   408  	// The Tracer used to create root spans for new txns if the parent ctx doesn't
   409  	// have a span.
   410  	tracer opentracing.Tracer
   411  	// sessionTracing provides access to the session's tracing interface. The
   412  	// state machine needs to see if session tracing is enabled.
   413  	sessionTracing *SessionTracing
   414  	settings       *cluster.Settings
   415  }
   416  
   417  var noRewind = rewindCapability{}
   418  
   419  // setAdvanceInfo sets the adv field. This has to be called as part of any state
   420  // transition. The connExecutor is supposed to inspect adv after any transition
   421  // and act on it.
   422  func (ts *txnState) setAdvanceInfo(code advanceCode, rewCap rewindCapability, ev txnEvent) {
   423  	if ts.adv.code != advanceUnknown {
   424  		panic("previous advanceInfo has not been consume()d")
   425  	}
   426  	if code != rewind && rewCap != noRewind {
   427  		panic("if rewCap is specified, code needs to be rewind")
   428  	}
   429  	ts.adv = advanceInfo{
   430  		code:     code,
   431  		rewCap:   rewCap,
   432  		txnEvent: ev,
   433  	}
   434  }
   435  
   436  // consumerAdvanceInfo returns the advanceInfo set by the last transition and
   437  // resets the state so that another transition can overwrite it.
   438  func (ts *txnState) consumeAdvanceInfo() advanceInfo {
   439  	adv := ts.adv
   440  	ts.adv = advanceInfo{}
   441  	return adv
   442  }