github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/conn_executor_exec.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"runtime/pprof"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/base"
    21  	"github.com/cockroachdb/cockroach/pkg/kv"
    22  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    23  	"github.com/cockroachdb/cockroach/pkg/server/telemetry"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/lease"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    28  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    34  	"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
    35  	"github.com/cockroachdb/cockroach/pkg/util/fsm"
    36  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    37  	"github.com/cockroachdb/cockroach/pkg/util/log"
    38  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    39  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    40  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    41  	"github.com/cockroachdb/errors"
    42  	"github.com/opentracing/opentracing-go"
    43  )
    44  
    45  // execStmt executes one statement by dispatching according to the current
    46  // state. Returns an Event to be passed to the state machine, or nil if no
    47  // transition is needed. If nil is returned, then the cursor is supposed to
    48  // advance to the next statement.
    49  //
    50  // If an error is returned, the session is supposed to be considered done. Query
    51  // execution errors are not returned explicitly and they're also not
    52  // communicated to the client. Instead they're incorporated in the returned
    53  // event (the returned payload will implement payloadWithError). It is the
    54  // caller's responsibility to deliver execution errors to the client.
    55  //
    56  // Args:
    57  // stmt: The statement to execute.
    58  // res: Used to produce query results.
    59  // pinfo: The values to use for the statement's placeholders. If nil is passed,
    60  // 	 then the statement cannot have any placeholder.
    61  func (ex *connExecutor) execStmt(
    62  	ctx context.Context, stmt Statement, res RestrictedCommandResult, pinfo *tree.PlaceholderInfo,
    63  ) (fsm.Event, fsm.EventPayload, error) {
    64  	if log.V(2) || logStatementsExecuteEnabled.Get(&ex.server.cfg.Settings.SV) ||
    65  		log.HasSpanOrEvent(ctx) {
    66  		log.VEventf(ctx, 2, "executing: %s in state: %s", stmt, ex.machine.CurState())
    67  	}
    68  
    69  	// Run observer statements in a separate code path; their execution does not
    70  	// depend on the current transaction state.
    71  	if _, ok := stmt.AST.(tree.ObserverStatement); ok {
    72  		err := ex.runObserverStatement(ctx, stmt, res)
    73  		// Note that regardless of res.Err(), these observer statements don't
    74  		// generate error events; transactions are always allowed to continue.
    75  		return nil, nil, err
    76  	}
    77  
    78  	queryID := ex.generateID()
    79  	stmt.queryID = queryID
    80  
    81  	// Dispatch the statement for execution based on the current state.
    82  	var ev fsm.Event
    83  	var payload fsm.EventPayload
    84  	var err error
    85  
    86  	switch ex.machine.CurState().(type) {
    87  	case stateNoTxn:
    88  		ev, payload = ex.execStmtInNoTxnState(ctx, stmt)
    89  	case stateOpen:
    90  		if ex.server.cfg.Settings.IsCPUProfiling() {
    91  			labels := pprof.Labels(
    92  				"stmt.tag", stmt.AST.StatementTag(),
    93  				"stmt.anonymized", stmt.AnonymizedStr,
    94  			)
    95  			pprof.Do(ctx, labels, func(ctx context.Context) {
    96  				ev, payload, err = ex.execStmtInOpenState(ctx, stmt, res, pinfo)
    97  			})
    98  		} else {
    99  			ev, payload, err = ex.execStmtInOpenState(ctx, stmt, res, pinfo)
   100  		}
   101  		switch ev.(type) {
   102  		case eventNonRetriableErr:
   103  			ex.recordFailure()
   104  		}
   105  	case stateAborted:
   106  		ev, payload = ex.execStmtInAbortedState(ctx, stmt, res)
   107  	case stateCommitWait:
   108  		ev, payload = ex.execStmtInCommitWaitState(stmt, res)
   109  	default:
   110  		panic(fmt.Sprintf("unexpected txn state: %#v", ex.machine.CurState()))
   111  	}
   112  
   113  	return ev, payload, err
   114  }
   115  
   116  func (ex *connExecutor) recordFailure() {
   117  	ex.metrics.EngineMetrics.FailureCount.Inc(1)
   118  }
   119  
   120  // execStmtInOpenState executes one statement in the context of the session's
   121  // current transaction.
   122  // It handles statements that affect the transaction state (BEGIN, COMMIT)
   123  // directly and delegates everything else to the execution engines.
   124  // Results and query execution errors are written to res.
   125  //
   126  // This method also handles "auto commit" - committing of implicit transactions.
   127  //
   128  // If an error is returned, the connection is supposed to be consider done.
   129  // Query execution errors are not returned explicitly; they're incorporated in
   130  // the returned Event.
   131  //
   132  // The returned event can be nil if no state transition is required.
   133  func (ex *connExecutor) execStmtInOpenState(
   134  	ctx context.Context, stmt Statement, res RestrictedCommandResult, pinfo *tree.PlaceholderInfo,
   135  ) (retEv fsm.Event, retPayload fsm.EventPayload, retErr error) {
   136  	ex.incrementStartedStmtCounter(stmt)
   137  	defer func() {
   138  		if retErr == nil && !payloadHasError(retPayload) {
   139  			ex.incrementExecutedStmtCounter(stmt)
   140  		}
   141  	}()
   142  	os := ex.machine.CurState().(stateOpen)
   143  
   144  	var timeoutTicker *time.Timer
   145  	queryTimedOut := false
   146  	doneAfterFunc := make(chan struct{}, 1)
   147  
   148  	// Canceling a query cancels its transaction's context so we take a reference
   149  	// to the cancelation function here.
   150  	unregisterFn := ex.addActiveQuery(stmt.queryID, stmt, ex.state.cancel)
   151  
   152  	// queryDone is a cleanup function dealing with unregistering a query.
   153  	// It also deals with overwriting res.Error to a more user-friendly message in
   154  	// case of query cancelation. res can be nil to opt out of this.
   155  	queryDone := func(ctx context.Context, res RestrictedCommandResult) {
   156  		if timeoutTicker != nil {
   157  			if !timeoutTicker.Stop() {
   158  				// Wait for the timer callback to complete to avoid a data race on
   159  				// queryTimedOut.
   160  				<-doneAfterFunc
   161  			}
   162  		}
   163  		unregisterFn()
   164  
   165  		// Detect context cancelation and overwrite whatever error might have been
   166  		// set on the result before. The idea is that once the query's context is
   167  		// canceled, all sorts of actors can detect the cancelation and set all
   168  		// sorts of errors on the result. Rather than trying to impose discipline
   169  		// in that jungle, we just overwrite them all here with an error that's
   170  		// nicer to look at for the client.
   171  		if res != nil && ctx.Err() != nil && res.Err() != nil {
   172  			if queryTimedOut {
   173  				res.SetError(sqlbase.QueryTimeoutError)
   174  			} else {
   175  				res.SetError(sqlbase.QueryCanceledError)
   176  			}
   177  		}
   178  	}
   179  	// Generally we want to unregister after the auto-commit below. However, in
   180  	// case we'll execute the statement through the parallel execution queue,
   181  	// we'll pass the responsibility for unregistering to the queue.
   182  	defer func() {
   183  		if queryDone != nil {
   184  			queryDone(ctx, res)
   185  		}
   186  	}()
   187  
   188  	p := &ex.planner
   189  	stmtTS := ex.server.cfg.Clock.PhysicalTime()
   190  	ex.statsCollector.reset(&ex.server.sqlStats, ex.appStats, &ex.phaseTimes)
   191  	ex.resetPlanner(ctx, p, ex.state.mu.txn, stmtTS)
   192  	p.sessionDataMutator.paramStatusUpdater = res
   193  	p.noticeSender = res
   194  
   195  	var shouldCollectDiagnostics bool
   196  	var finishCollectionDiagnostics StmtDiagnosticsTraceFinishFunc
   197  
   198  	if explainBundle, ok := stmt.AST.(*tree.ExplainAnalyzeDebug); ok {
   199  		telemetry.Inc(sqltelemetry.ExplainAnalyzeDebugUseCounter)
   200  		// Always collect diagnostics for EXPLAIN ANALYZE (DEBUG).
   201  		shouldCollectDiagnostics = true
   202  		// Strip off the explain node to execute the inner statement.
   203  		stmt.AST = explainBundle.Statement
   204  		// TODO(radu): should we trim the "EXPLAIN ANALYZE (DEBUG)" part from
   205  		// stmt.SQL?
   206  
   207  		// Clear any ExpectedTypes we set if we prepared this statement (they
   208  		// reflect the column types of the EXPLAIN itself and not those of the inner
   209  		// statement).
   210  		stmt.ExpectedTypes = nil
   211  
   212  		// EXPLAIN ANALYZE (DEBUG) does not return the rows for the given query;
   213  		// instead it returns some text which includes a URL.
   214  		// TODO(radu): maybe capture some of the rows and include them in the
   215  		// bundle.
   216  		p.discardRows = true
   217  	} else {
   218  		shouldCollectDiagnostics, finishCollectionDiagnostics = ex.stmtDiagnosticsRecorder.ShouldCollectDiagnostics(ctx, stmt.AST)
   219  		if shouldCollectDiagnostics {
   220  			telemetry.Inc(sqltelemetry.StatementDiagnosticsCollectedCounter)
   221  		}
   222  	}
   223  
   224  	if shouldCollectDiagnostics {
   225  		p.collectBundle = true
   226  		tr := ex.server.cfg.AmbientCtx.Tracer
   227  		origCtx := ctx
   228  		var sp opentracing.Span
   229  		ctx, sp = tracing.StartSnowballTrace(ctx, tr, "traced statement")
   230  		// TODO(radu): consider removing this if/when #46164 is addressed.
   231  		p.extendedEvalCtx.Context = ctx
   232  		defer func() {
   233  			// Record the statement information that we've collected.
   234  			// Note that in case of implicit transactions, the trace contains the auto-commit too.
   235  			sp.Finish()
   236  			trace := tracing.GetRecording(sp)
   237  			ie := p.extendedEvalCtx.InternalExecutor.(*InternalExecutor)
   238  			if finishCollectionDiagnostics != nil {
   239  				bundle, collectionErr := buildStatementBundle(
   240  					origCtx, ex.server.cfg.DB, ie, &p.curPlan, trace,
   241  				)
   242  				finishCollectionDiagnostics(origCtx, bundle.trace, bundle.zip, collectionErr)
   243  			} else {
   244  				// Handle EXPLAIN ANALYZE (DEBUG).
   245  				// If there was a communication error, no point in setting any results.
   246  				if retErr == nil {
   247  					retErr = setExplainBundleResult(
   248  						origCtx, res, stmt.AST, trace, &p.curPlan, ie, ex.server.cfg,
   249  					)
   250  				}
   251  			}
   252  		}()
   253  	}
   254  
   255  	if ex.sessionData.StmtTimeout > 0 {
   256  		timeoutTicker = time.AfterFunc(
   257  			ex.sessionData.StmtTimeout-timeutil.Since(ex.phaseTimes[sessionQueryReceived]),
   258  			func() {
   259  				ex.cancelQuery(stmt.queryID)
   260  				queryTimedOut = true
   261  				doneAfterFunc <- struct{}{}
   262  			})
   263  	}
   264  
   265  	defer func() {
   266  		if filter := ex.server.cfg.TestingKnobs.StatementFilter; retErr == nil && filter != nil {
   267  			var execErr error
   268  			if perr, ok := retPayload.(payloadWithError); ok {
   269  				execErr = perr.errorCause()
   270  			}
   271  			filter(ctx, stmt.String(), execErr)
   272  		}
   273  
   274  		// Do the auto-commit, if necessary.
   275  		if retEv != nil || retErr != nil {
   276  			return
   277  		}
   278  		if os.ImplicitTxn.Get() {
   279  			retEv, retPayload = ex.handleAutoCommit(ctx, stmt.AST)
   280  			return
   281  		}
   282  	}()
   283  
   284  	makeErrEvent := func(err error) (fsm.Event, fsm.EventPayload, error) {
   285  		ev, payload := ex.makeErrEvent(err, stmt.AST)
   286  		return ev, payload, nil
   287  	}
   288  
   289  	switch s := stmt.AST.(type) {
   290  	case *tree.BeginTransaction:
   291  		// BEGIN is always an error when in the Open state. It's legitimate only in
   292  		// the NoTxn state.
   293  		return makeErrEvent(errTransactionInProgress)
   294  
   295  	case *tree.CommitTransaction:
   296  		// CommitTransaction is executed fully here; there's no plan for it.
   297  		ev, payload := ex.commitSQLTransaction(ctx, stmt.AST)
   298  		return ev, payload, nil
   299  
   300  	case *tree.RollbackTransaction:
   301  		// RollbackTransaction is executed fully here; there's no plan for it.
   302  		ev, payload := ex.rollbackSQLTransaction(ctx)
   303  		return ev, payload, nil
   304  
   305  	case *tree.Savepoint:
   306  		return ex.execSavepointInOpenState(ctx, s, res)
   307  
   308  	case *tree.ReleaseSavepoint:
   309  		ev, payload := ex.execRelease(ctx, s, res)
   310  		return ev, payload, nil
   311  
   312  	case *tree.RollbackToSavepoint:
   313  		ev, payload := ex.execRollbackToSavepointInOpenState(ctx, s, res)
   314  		return ev, payload, nil
   315  
   316  	case *tree.Prepare:
   317  		// This is handling the SQL statement "PREPARE". See execPrepare for
   318  		// handling of the protocol-level command for preparing statements.
   319  		name := s.Name.String()
   320  		if _, ok := ex.extraTxnState.prepStmtsNamespace.prepStmts[name]; ok {
   321  			err := pgerror.Newf(
   322  				pgcode.DuplicatePreparedStatement,
   323  				"prepared statement %q already exists", name,
   324  			)
   325  			return makeErrEvent(err)
   326  		}
   327  		var typeHints tree.PlaceholderTypes
   328  		if len(s.Types) > 0 {
   329  			if len(s.Types) > stmt.NumPlaceholders {
   330  				err := pgerror.Newf(pgcode.Syntax, "too many types provided")
   331  				return makeErrEvent(err)
   332  			}
   333  			typeHints = make(tree.PlaceholderTypes, stmt.NumPlaceholders)
   334  			for i, t := range s.Types {
   335  				resolved, err := tree.ResolveType(ctx, t, ex.planner.semaCtx.GetTypeResolver())
   336  				if err != nil {
   337  					return makeErrEvent(err)
   338  				}
   339  				typeHints[i] = resolved
   340  			}
   341  		}
   342  		if _, err := ex.addPreparedStmt(
   343  			ctx, name,
   344  			Statement{
   345  				Statement: parser.Statement{
   346  					// We need the SQL string just for the part that comes after
   347  					// "PREPARE ... AS",
   348  					// TODO(radu): it would be nice if the parser would figure out this
   349  					// string and store it in tree.Prepare.
   350  					SQL:             tree.AsStringWithFlags(s.Statement, tree.FmtParsable),
   351  					AST:             s.Statement,
   352  					NumPlaceholders: stmt.NumPlaceholders,
   353  					NumAnnotations:  stmt.NumAnnotations,
   354  				},
   355  			},
   356  			typeHints,
   357  			PreparedStatementOriginSQL,
   358  		); err != nil {
   359  			return makeErrEvent(err)
   360  		}
   361  		return nil, nil, nil
   362  
   363  	case *tree.Execute:
   364  		// Replace the `EXECUTE foo` statement with the prepared statement, and
   365  		// continue execution below.
   366  		name := s.Name.String()
   367  		ps, ok := ex.extraTxnState.prepStmtsNamespace.prepStmts[name]
   368  		if !ok {
   369  			err := pgerror.Newf(
   370  				pgcode.InvalidSQLStatementName,
   371  				"prepared statement %q does not exist", name,
   372  			)
   373  			return makeErrEvent(err)
   374  		}
   375  		var err error
   376  		pinfo, err = fillInPlaceholders(ctx, ps, name, s.Params, ex.sessionData.SearchPath)
   377  		if err != nil {
   378  			return makeErrEvent(err)
   379  		}
   380  
   381  		stmt.Statement = ps.Statement
   382  		stmt.Prepared = ps
   383  		stmt.ExpectedTypes = ps.Columns
   384  		stmt.AnonymizedStr = ps.AnonymizedStr
   385  		res.ResetStmtType(ps.AST)
   386  
   387  		if s.DiscardRows {
   388  			p.discardRows = true
   389  		}
   390  	}
   391  
   392  	p.semaCtx.Annotations = tree.MakeAnnotations(stmt.NumAnnotations)
   393  
   394  	// For regular statements (the ones that get to this point), we
   395  	// don't return any event unless an error happens.
   396  
   397  	if os.ImplicitTxn.Get() {
   398  		asOfTs, err := p.isAsOf(ctx, stmt.AST)
   399  		if err != nil {
   400  			return makeErrEvent(err)
   401  		}
   402  		if asOfTs != nil {
   403  			p.semaCtx.AsOfTimestamp = asOfTs
   404  			p.extendedEvalCtx.SetTxnTimestamp(asOfTs.GoTime())
   405  			ex.state.setHistoricalTimestamp(ctx, *asOfTs)
   406  		}
   407  	} else {
   408  		// If we're in an explicit txn, we allow AOST but only if it matches with
   409  		// the transaction's timestamp. This is useful for running AOST statements
   410  		// using the InternalExecutor inside an external transaction; one might want
   411  		// to do that to force p.avoidCachedDescriptors to be set below.
   412  		ts, err := p.isAsOf(ctx, stmt.AST)
   413  		if err != nil {
   414  			return makeErrEvent(err)
   415  		}
   416  		if ts != nil {
   417  			if readTs := ex.state.getReadTimestamp(); *ts != readTs {
   418  				err = pgerror.Newf(pgcode.Syntax,
   419  					"inconsistent AS OF SYSTEM TIME timestamp; expected: %s", readTs)
   420  				err = errors.WithHint(err, "try SET TRANSACTION AS OF SYSTEM TIME")
   421  				return makeErrEvent(err)
   422  			}
   423  			p.semaCtx.AsOfTimestamp = ts
   424  		}
   425  	}
   426  
   427  	// The first order of business is to ensure proper sequencing
   428  	// semantics.  As per PostgreSQL's dialect specs, the "read" part of
   429  	// statements always see the data as per a snapshot of the database
   430  	// taken the instant the statement begins to run. In particular a
   431  	// mutation does not see its own writes. If a query contains
   432  	// multiple mutations using CTEs (WITH) or a read part following a
   433  	// mutation, all still operate on the same read snapshot.
   434  	//
   435  	// (To communicate data between CTEs and a main query, the result
   436  	// set / RETURNING can be used instead. However this is not relevant
   437  	// here.)
   438  
   439  	// We first ensure stepping mode is enabled.
   440  	//
   441  	// This ought to be done just once when a txn gets initialized;
   442  	// unfortunately, there are too many places where the txn object
   443  	// is re-configured, re-set etc without using NewTxnWithSteppingEnabled().
   444  	//
   445  	// Manually hunting them down and calling ConfigureStepping() each
   446  	// time would be error prone (and increase the change that a future
   447  	// change would forget to add the call).
   448  	//
   449  	// TODO(andrei): really the code should be re-architectued to ensure
   450  	// that all uses of SQL execution initialize the client.Txn using a
   451  	// single/common function. That would be where the stepping mode
   452  	// gets enabled once for all SQL statements executed "underneath".
   453  	prevSteppingMode := ex.state.mu.txn.ConfigureStepping(ctx, kv.SteppingEnabled)
   454  	defer func() { _ = ex.state.mu.txn.ConfigureStepping(ctx, prevSteppingMode) }()
   455  
   456  	// Then we create a sequencing point.
   457  	//
   458  	// This is not the only place where a sequencing point is
   459  	// placed. There are also sequencing point after every stage of
   460  	// constraint checks and cascading actions at the _end_ of a
   461  	// statement's execution.
   462  	//
   463  	// TODO(knz): At the time of this writing CockroachDB performs
   464  	// cascading actions and the corresponding FK existence checks
   465  	// interleaved with mutations. This is incorrect; the correct
   466  	// behavior, as described in issue
   467  	// https://github.com/cockroachdb/cockroach/issues/33475, is to
   468  	// execute cascading actions no earlier than after all the "main
   469  	// effects" of the current statement (including all its CTEs) have
   470  	// completed. There should be a sequence point between the end of
   471  	// the main execution and the start of the cascading actions, as
   472  	// well as in-between very stage of cascading actions.
   473  	// This TODO can be removed when the cascading code is reorganized
   474  	// accordingly and the missing call to Step() is introduced.
   475  	if err := ex.state.mu.txn.Step(ctx); err != nil {
   476  		return makeErrEvent(err)
   477  	}
   478  
   479  	if err := p.semaCtx.Placeholders.Assign(pinfo, stmt.NumPlaceholders); err != nil {
   480  		return makeErrEvent(err)
   481  	}
   482  	p.extendedEvalCtx.Placeholders = &p.semaCtx.Placeholders
   483  	p.extendedEvalCtx.Annotations = &p.semaCtx.Annotations
   484  	ex.phaseTimes[plannerStartExecStmt] = timeutil.Now()
   485  	p.stmt = &stmt
   486  	p.cancelChecker = sqlbase.NewCancelChecker(ctx)
   487  	p.autoCommit = os.ImplicitTxn.Get() && !ex.server.cfg.TestingKnobs.DisableAutoCommit
   488  	if err := ex.dispatchToExecutionEngine(ctx, p, res); err != nil {
   489  		return nil, nil, err
   490  	}
   491  	if err := res.Err(); err != nil {
   492  		return makeErrEvent(err)
   493  	}
   494  
   495  	txn := ex.state.mu.txn
   496  
   497  	if !os.ImplicitTxn.Get() && txn.IsSerializablePushAndRefreshNotPossible() {
   498  		rc, canAutoRetry := ex.getRewindTxnCapability()
   499  		if canAutoRetry {
   500  			ev := eventRetriableErr{
   501  				IsCommit:     fsm.FromBool(isCommit(stmt.AST)),
   502  				CanAutoRetry: fsm.FromBool(canAutoRetry),
   503  			}
   504  			txn.ManualRestart(ctx, ex.server.cfg.Clock.Now())
   505  			payload := eventRetriableErrPayload{
   506  				err: roachpb.NewTransactionRetryWithProtoRefreshError(
   507  					"serializable transaction timestamp pushed (detected by connExecutor)",
   508  					txn.ID(),
   509  					// No updated transaction required; we've already manually updated our
   510  					// client.Txn.
   511  					roachpb.Transaction{},
   512  				),
   513  				rewCap: rc,
   514  			}
   515  			return ev, payload, nil
   516  		}
   517  	}
   518  	// No event was generated.
   519  	return nil, nil, nil
   520  }
   521  
   522  // checkTableTwoVersionInvariant checks whether any new table schema being
   523  // modified written at a version V has only valid leases at version = V - 1.
   524  // A transaction retry error is returned whenever the invariant is violated.
   525  // Before returning the retry error the current transaction is
   526  // rolled-back and the function waits until there are only outstanding
   527  // leases on the current version. This affords the retry to succeed in the
   528  // event that there are no other schema changes simultaneously contending with
   529  // this txn.
   530  //
   531  // checkTableTwoVersionInvariant blocks until it's legal for the modified
   532  // table descriptors (if any) to be committed.
   533  // Reminder: a descriptor version v can only be written at a timestamp
   534  // that's not covered by a lease on version v-2. So, if the current
   535  // txn wants to write some updated descriptors, it needs
   536  // to wait until all incompatible leases are revoked or expire. If
   537  // incompatible leases exist, we'll block waiting for these leases to
   538  // go away. Then, the transaction is restarted by generating a retriable error.
   539  // Note that we're relying on the fact that the number of conflicting
   540  // leases will only go down over time: no new conflicting leases can be
   541  // created as of the time of this call because v-2 can't be leased once
   542  // v-1 exists.
   543  //
   544  // If this method succeeds it is the caller's responsibility to release the
   545  // executor's table leases after the txn commits so that schema changes can
   546  // proceed.
   547  func (ex *connExecutor) checkTableTwoVersionInvariant(ctx context.Context) error {
   548  	tables := ex.extraTxnState.descCollection.GetTablesWithNewVersion()
   549  	if tables == nil {
   550  		return nil
   551  	}
   552  	txn := ex.state.mu.txn
   553  	if txn.IsCommitted() {
   554  		panic("transaction has already committed")
   555  	}
   556  
   557  	// We potentially hold leases for tables which we've modified which
   558  	// we need to drop. Say we're updating tables at version V. All leases
   559  	// for version V-2 need to be dropped immediately, otherwise the check
   560  	// below that nobody holds leases for version V-2 will fail. Worse yet,
   561  	// the code below loops waiting for nobody to hold leases on V-2. We also
   562  	// may hold leases for version V-1 of modified tables that are good to drop
   563  	// but not as vital for correctness. It's good to drop them because as soon
   564  	// as this transaction commits jobs may start and will need to wait until
   565  	// the lease expires. It is safe because V-1 must remain valid until this
   566  	// transaction commits; if we commit then nobody else could have written
   567  	// a new V beneath us because we've already laid down an intent.
   568  	//
   569  	// All this being said, we must retain our leases on tables which we have
   570  	// not modified to ensure that our writes to those other tables in this
   571  	// transaction remain valid.
   572  	ex.extraTxnState.descCollection.ReleaseTableLeases(ctx, tables)
   573  
   574  	// We know that so long as there are no leases on the updated tables as of
   575  	// the current provisional commit timestamp for this transaction then if this
   576  	// transaction ends up committing then there won't have been any created
   577  	// in the meantime.
   578  	count, err := lease.CountLeases(ctx, ex.server.cfg.InternalExecutor, tables, txn.ProvisionalCommitTimestamp())
   579  	if err != nil {
   580  		return err
   581  	}
   582  	if count == 0 {
   583  		return nil
   584  	}
   585  
   586  	// Restart the transaction so that it is able to replay itself at a newer timestamp
   587  	// with the hope that the next time around there will be leases only at the current
   588  	// version.
   589  	retryErr := txn.PrepareRetryableError(ctx,
   590  		fmt.Sprintf(
   591  			`cannot publish new versions for tables: %v, old versions still in use`,
   592  			tables))
   593  	// We cleanup the transaction and create a new transaction after
   594  	// waiting for the invariant to be satisfied because the wait time
   595  	// might be extensive and intents can block out leases being created
   596  	// on a descriptor.
   597  	//
   598  	// TODO(vivek): Change this to restart a txn while fixing #20526 . All the
   599  	// table descriptor intents can be laid down here after the invariant
   600  	// has been checked.
   601  	userPriority := txn.UserPriority()
   602  	// We cleanup the transaction and create a new transaction wait time
   603  	// might be extensive and so we'd better get rid of all the intents.
   604  	txn.CleanupOnError(ctx, retryErr)
   605  	// Release the rest of our leases on unmodified tables so we don't hold up
   606  	// schema changes there and potentially create a deadlock.
   607  	ex.extraTxnState.descCollection.ReleaseLeases(ctx)
   608  
   609  	// Wait until all older version leases have been released or expired.
   610  	for r := retry.StartWithCtx(ctx, base.DefaultRetryOptions()); r.Next(); {
   611  		// Use the current clock time.
   612  		now := ex.server.cfg.Clock.Now()
   613  		count, err := lease.CountLeases(ctx, ex.server.cfg.InternalExecutor, tables, now)
   614  		if err != nil {
   615  			return err
   616  		}
   617  		if count == 0 {
   618  			break
   619  		}
   620  		if ex.server.cfg.SchemaChangerTestingKnobs.TwoVersionLeaseViolation != nil {
   621  			ex.server.cfg.SchemaChangerTestingKnobs.TwoVersionLeaseViolation()
   622  		}
   623  	}
   624  
   625  	// Create a new transaction to retry with a higher timestamp than the
   626  	// timestamps used in the retry loop above.
   627  	ex.state.mu.txn = kv.NewTxnWithSteppingEnabled(ctx, ex.transitionCtx.db, ex.transitionCtx.nodeIDOrZero)
   628  	if err := ex.state.mu.txn.SetUserPriority(userPriority); err != nil {
   629  		return err
   630  	}
   631  	return retryErr
   632  }
   633  
   634  // commitSQLTransaction executes a commit after the execution of a
   635  // stmt, which can be any statement when executing a statement with an
   636  // implicit transaction, or a COMMIT statement when using an explicit
   637  // transaction.
   638  func (ex *connExecutor) commitSQLTransaction(
   639  	ctx context.Context, stmt tree.Statement,
   640  ) (fsm.Event, fsm.EventPayload) {
   641  	err := ex.commitSQLTransactionInternal(ctx, stmt)
   642  	if err != nil {
   643  		return ex.makeErrEvent(err, stmt)
   644  	}
   645  	return eventTxnFinish{}, eventTxnFinishPayload{commit: true}
   646  }
   647  
   648  func (ex *connExecutor) commitSQLTransactionInternal(
   649  	ctx context.Context, stmt tree.Statement,
   650  ) error {
   651  	if err := validatePrimaryKeys(&ex.extraTxnState.descCollection); err != nil {
   652  		return err
   653  	}
   654  
   655  	if err := ex.checkTableTwoVersionInvariant(ctx); err != nil {
   656  		return err
   657  	}
   658  
   659  	if err := ex.state.mu.txn.Commit(ctx); err != nil {
   660  		return err
   661  	}
   662  
   663  	// Now that we've committed, if we modified any table we need to make sure
   664  	// to release the leases for them so that the schema change can proceed and
   665  	// we don't block the client.
   666  	if tables := ex.extraTxnState.descCollection.GetTablesWithNewVersion(); tables != nil {
   667  		ex.extraTxnState.descCollection.ReleaseLeases(ctx)
   668  	}
   669  	return nil
   670  }
   671  
   672  // validatePrimaryKeys verifies that all tables modified in the transaction have
   673  // an enabled primary key after potentially undergoing DROP PRIMARY KEY, which
   674  // is required to be followed by ADD PRIMARY KEY.
   675  func validatePrimaryKeys(tc *descs.Collection) error {
   676  	modifiedTables := tc.GetTablesWithNewVersion()
   677  	for i := range modifiedTables {
   678  		table := tc.GetUncommittedTableByID(modifiedTables[i].ID).MutableTableDescriptor
   679  		if !table.HasPrimaryKey() {
   680  			return unimplemented.NewWithIssuef(48026,
   681  				"primary key of table %s dropped without subsequent addition of new primary key",
   682  				table.Name,
   683  			)
   684  		}
   685  	}
   686  	return nil
   687  }
   688  
   689  // rollbackSQLTransaction executes a ROLLBACK statement: the KV transaction is
   690  // rolled-back and an event is produced.
   691  func (ex *connExecutor) rollbackSQLTransaction(ctx context.Context) (fsm.Event, fsm.EventPayload) {
   692  	if err := ex.state.mu.txn.Rollback(ctx); err != nil {
   693  		log.Warningf(ctx, "txn rollback failed: %s", err)
   694  	}
   695  	// We're done with this txn.
   696  	return eventTxnFinish{}, eventTxnFinishPayload{commit: false}
   697  }
   698  
   699  // dispatchToExecutionEngine executes the statement, writes the result to res
   700  // and returns an event for the connection's state machine.
   701  //
   702  // If an error is returned, the connection needs to stop processing queries.
   703  // Query execution errors are written to res; they are not returned; it is
   704  // expected that the caller will inspect res and react to query errors by
   705  // producing an appropriate state machine event.
   706  func (ex *connExecutor) dispatchToExecutionEngine(
   707  	ctx context.Context, planner *planner, res RestrictedCommandResult,
   708  ) error {
   709  	stmt := planner.stmt
   710  	ex.sessionTracing.TracePlanStart(ctx, stmt.AST.StatementTag())
   711  	ex.statsCollector.phaseTimes[plannerStartLogicalPlan] = timeutil.Now()
   712  
   713  	// Prepare the plan. Note, the error is processed below. Everything
   714  	// between here and there needs to happen even if there's an error.
   715  	err := ex.makeExecPlan(ctx, planner)
   716  	// We'll be closing the plan manually below after execution; this
   717  	// defer is a catch-all in case some other return path is taken.
   718  	defer planner.curPlan.close(ctx)
   719  
   720  	if planner.autoCommit {
   721  		planner.curPlan.flags.Set(planFlagImplicitTxn)
   722  	}
   723  
   724  	// Certain statements want their results to go to the client
   725  	// directly. Configure this here.
   726  	if planner.curPlan.avoidBuffering {
   727  		res.DisableBuffering()
   728  	}
   729  
   730  	defer func() {
   731  		planner.maybeLogStatement(
   732  			ctx,
   733  			ex.executorType,
   734  			ex.extraTxnState.autoRetryCounter,
   735  			res.RowsAffected(),
   736  			res.Err(),
   737  			ex.statsCollector.phaseTimes[sessionQueryReceived],
   738  		)
   739  	}()
   740  
   741  	ex.statsCollector.phaseTimes[plannerEndLogicalPlan] = timeutil.Now()
   742  	ex.sessionTracing.TracePlanEnd(ctx, err)
   743  
   744  	// Finally, process the planning error from above.
   745  	if err != nil {
   746  		res.SetError(err)
   747  		return nil
   748  	}
   749  
   750  	var cols sqlbase.ResultColumns
   751  	if stmt.AST.StatementType() == tree.Rows {
   752  		cols = planner.curPlan.main.planColumns()
   753  	}
   754  	if err := ex.initStatementResult(ctx, res, stmt, cols); err != nil {
   755  		res.SetError(err)
   756  		return nil
   757  	}
   758  
   759  	ex.sessionTracing.TracePlanCheckStart(ctx)
   760  	distributePlan := willDistributePlan(
   761  		ctx, planner.execCfg.NodeID, ex.sessionData.DistSQLMode, planner.curPlan.main,
   762  	)
   763  	ex.sessionTracing.TracePlanCheckEnd(ctx, nil, distributePlan)
   764  
   765  	if ex.server.cfg.TestingKnobs.BeforeExecute != nil {
   766  		ex.server.cfg.TestingKnobs.BeforeExecute(ctx, stmt.String())
   767  	}
   768  
   769  	ex.statsCollector.phaseTimes[plannerStartExecStmt] = timeutil.Now()
   770  
   771  	ex.mu.Lock()
   772  	queryMeta, ok := ex.mu.ActiveQueries[stmt.queryID]
   773  	if !ok {
   774  		ex.mu.Unlock()
   775  		panic(fmt.Sprintf("query %d not in registry", stmt.queryID))
   776  	}
   777  	queryMeta.phase = executing
   778  	queryMeta.isDistributed = distributePlan
   779  	progAtomic := &queryMeta.progressAtomic
   780  	ex.mu.Unlock()
   781  
   782  	// We need to set the "exec done" flag early because
   783  	// curPlan.close(), which will need to observe it, may be closed
   784  	// during execution (PlanAndRun).
   785  	//
   786  	// TODO(knz): This is a mis-design. Andrei says "it's OK if
   787  	// execution closes the plan" but it transfers responsibility to
   788  	// run any "finalizers" on the plan (including plan sampling for
   789  	// stats) to the execution engine. That's a lot of responsibility
   790  	// to transfer! It would be better if this responsibility remained
   791  	// around here.
   792  	planner.curPlan.flags.Set(planFlagExecDone)
   793  
   794  	if distributePlan {
   795  		planner.curPlan.flags.Set(planFlagDistributed)
   796  	} else {
   797  		planner.curPlan.flags.Set(planFlagDistSQLLocal)
   798  	}
   799  	ex.sessionTracing.TraceExecStart(ctx, "distributed")
   800  	bytesRead, rowsRead, err := ex.execWithDistSQLEngine(ctx, planner, stmt.AST.StatementType(), res, distributePlan, progAtomic)
   801  	ex.sessionTracing.TraceExecEnd(ctx, res.Err(), res.RowsAffected())
   802  	ex.statsCollector.phaseTimes[plannerEndExecStmt] = timeutil.Now()
   803  
   804  	// Record the statement summary. This also closes the plan if the
   805  	// plan has not been closed earlier.
   806  	ex.recordStatementSummary(
   807  		ctx, planner,
   808  		ex.extraTxnState.autoRetryCounter, res.RowsAffected(), res.Err(), bytesRead, rowsRead,
   809  	)
   810  	if ex.server.cfg.TestingKnobs.AfterExecute != nil {
   811  		ex.server.cfg.TestingKnobs.AfterExecute(ctx, stmt.String(), res.Err())
   812  	}
   813  
   814  	return err
   815  }
   816  
   817  // makeExecPlan creates an execution plan and populates planner.curPlan using
   818  // the cost-based optimizer.
   819  func (ex *connExecutor) makeExecPlan(ctx context.Context, planner *planner) error {
   820  	planner.curPlan.init(planner.stmt, ex.appStats)
   821  	if planner.collectBundle {
   822  		planner.curPlan.instrumentation.savePlanString = true
   823  	}
   824  
   825  	if err := planner.makeOptimizerPlan(ctx); err != nil {
   826  		log.VEventf(ctx, 1, "optimizer plan failed: %v", err)
   827  		return err
   828  	}
   829  
   830  	// TODO(knz): Remove this accounting if/when savepoint rollbacks
   831  	// support rolling back over DDL.
   832  	if planner.curPlan.flags.IsSet(planFlagIsDDL) {
   833  		ex.extraTxnState.numDDL++
   834  	}
   835  
   836  	return nil
   837  }
   838  
   839  // execWithDistSQLEngine converts a plan to a distributed SQL physical plan and
   840  // runs it.
   841  // If an error is returned, the connection needs to stop processing queries.
   842  // Query execution errors are written to res; they are not returned.
   843  func (ex *connExecutor) execWithDistSQLEngine(
   844  	ctx context.Context,
   845  	planner *planner,
   846  	stmtType tree.StatementType,
   847  	res RestrictedCommandResult,
   848  	distribute bool,
   849  	progressAtomic *uint64,
   850  ) (bytesRead, rowsRead int64, _ error) {
   851  	recv := MakeDistSQLReceiver(
   852  		ctx, res, stmtType,
   853  		ex.server.cfg.RangeDescriptorCache, ex.server.cfg.LeaseHolderCache,
   854  		planner.txn,
   855  		func(ts hlc.Timestamp) {
   856  			ex.server.cfg.Clock.Update(ts)
   857  		},
   858  		&ex.sessionTracing,
   859  	)
   860  	recv.progressAtomic = progressAtomic
   861  	defer recv.Release()
   862  
   863  	evalCtx := planner.ExtendedEvalContext()
   864  	planCtx := ex.server.cfg.DistSQLPlanner.NewPlanningCtx(ctx, evalCtx, planner.txn, distribute)
   865  	planCtx.planner = planner
   866  	planCtx.stmtType = recv.stmtType
   867  	if planner.collectBundle {
   868  		planCtx.saveDiagram = func(diagram execinfrapb.FlowDiagram) {
   869  			planner.curPlan.distSQLDiagrams = append(planner.curPlan.distSQLDiagrams, diagram)
   870  		}
   871  	}
   872  
   873  	var evalCtxFactory func() *extendedEvalContext
   874  	if len(planner.curPlan.subqueryPlans) != 0 ||
   875  		len(planner.curPlan.cascades) != 0 ||
   876  		len(planner.curPlan.checkPlans) != 0 {
   877  		// The factory reuses the same object because the contexts are not used
   878  		// concurrently.
   879  		var factoryEvalCtx extendedEvalContext
   880  		ex.initEvalCtx(ctx, &factoryEvalCtx, planner)
   881  		evalCtxFactory = func() *extendedEvalContext {
   882  			ex.resetEvalCtx(&factoryEvalCtx, planner.txn, planner.ExtendedEvalContext().StmtTimestamp)
   883  			factoryEvalCtx.Placeholders = &planner.semaCtx.Placeholders
   884  			factoryEvalCtx.Annotations = &planner.semaCtx.Annotations
   885  			// Query diagnostics can change the Context; make sure we are using the
   886  			// same one.
   887  			// TODO(radu): consider removing this if/when #46164 is addressed.
   888  			factoryEvalCtx.Context = evalCtx.Context
   889  			return &factoryEvalCtx
   890  		}
   891  	}
   892  
   893  	if len(planner.curPlan.subqueryPlans) != 0 {
   894  		if !ex.server.cfg.DistSQLPlanner.PlanAndRunSubqueries(
   895  			ctx, planner, evalCtxFactory, planner.curPlan.subqueryPlans, recv, distribute,
   896  		) {
   897  			return recv.bytesRead, recv.rowsRead, recv.commErr
   898  		}
   899  	}
   900  	recv.discardRows = planner.discardRows
   901  	// We pass in whether or not we wanted to distribute this plan, which tells
   902  	// the planner whether or not to plan remote table readers.
   903  	cleanup := ex.server.cfg.DistSQLPlanner.PlanAndRun(
   904  		ctx, evalCtx, planCtx, planner.txn, planner.curPlan.main, recv,
   905  	)
   906  	// Note that we're not cleaning up right away because postqueries might
   907  	// need to have access to the main query tree.
   908  	defer cleanup()
   909  	if recv.commErr != nil || res.Err() != nil {
   910  		return recv.bytesRead, recv.rowsRead, recv.commErr
   911  	}
   912  
   913  	ex.server.cfg.DistSQLPlanner.PlanAndRunCascadesAndChecks(
   914  		ctx, planner, evalCtxFactory, &planner.curPlan.planComponents, recv, distribute,
   915  	)
   916  
   917  	return recv.bytesRead, recv.rowsRead, recv.commErr
   918  }
   919  
   920  // beginTransactionTimestampsAndReadMode computes the timestamps and
   921  // ReadWriteMode to be used for the associated transaction state based on the
   922  // values of the statement's Modes. Note that this method may reset the
   923  // connExecutor's planner in order to compute the timestamp for the AsOf clause
   924  // if it exists. The timestamps correspond to the timestamps passed to
   925  // makeEventTxnStartPayload; txnSQLTimestamp propagates to become the
   926  // TxnTimestamp while historicalTimestamp populated with a non-nil value only
   927  // if the BeginTransaction statement has a non-nil AsOf clause expression. A
   928  // non-nil historicalTimestamp implies a ReadOnly rwMode.
   929  func (ex *connExecutor) beginTransactionTimestampsAndReadMode(
   930  	ctx context.Context, s *tree.BeginTransaction,
   931  ) (
   932  	rwMode tree.ReadWriteMode,
   933  	txnSQLTimestamp time.Time,
   934  	historicalTimestamp *hlc.Timestamp,
   935  	err error,
   936  ) {
   937  	now := ex.server.cfg.Clock.PhysicalTime()
   938  	if s.Modes.AsOf.Expr == nil {
   939  		rwMode = ex.readWriteModeWithSessionDefault(s.Modes.ReadWriteMode)
   940  		return rwMode, now, nil, nil
   941  	}
   942  	ex.statsCollector.reset(&ex.server.sqlStats, ex.appStats, &ex.phaseTimes)
   943  	p := &ex.planner
   944  	ex.resetPlanner(ctx, p, nil /* txn */, now)
   945  	ts, err := p.EvalAsOfTimestamp(ctx, s.Modes.AsOf)
   946  	if err != nil {
   947  		return 0, time.Time{}, nil, err
   948  	}
   949  	// NB: This check should never return an error because the parser should
   950  	// disallow the creation of a TransactionModes struct which both has an
   951  	// AOST clause and is ReadWrite but performing a check decouples this code
   952  	// from that and hopefully adds clarity that the returning of ReadOnly with
   953  	// a historical timestamp is intended.
   954  	if s.Modes.ReadWriteMode == tree.ReadWrite {
   955  		return 0, time.Time{}, nil, tree.ErrAsOfSpecifiedWithReadWrite
   956  	}
   957  	return tree.ReadOnly, ts.GoTime(), &ts, nil
   958  }
   959  
   960  // execStmtInNoTxnState "executes" a statement when no transaction is in scope.
   961  // For anything but BEGIN, this method doesn't actually execute the statement;
   962  // it just returns an Event that will generate a transaction. The statement will
   963  // then be executed again, but this time in the Open state (implicit txn).
   964  //
   965  // Note that eventTxnStart, which is generally returned by this method, causes
   966  // the state to change and previous results to be flushed, but for implicit txns
   967  // the cursor is not advanced. This means that the statement will run again in
   968  // stateOpen, at each point its results will also be flushed.
   969  func (ex *connExecutor) execStmtInNoTxnState(
   970  	ctx context.Context, stmt Statement,
   971  ) (_ fsm.Event, payload fsm.EventPayload) {
   972  	switch s := stmt.AST.(type) {
   973  	case *tree.BeginTransaction:
   974  		ex.incrementStartedStmtCounter(stmt)
   975  		defer func() {
   976  			if !payloadHasError(payload) {
   977  				ex.incrementExecutedStmtCounter(stmt)
   978  			}
   979  		}()
   980  		mode, sqlTs, historicalTs, err := ex.beginTransactionTimestampsAndReadMode(ctx, s)
   981  		if err != nil {
   982  			return ex.makeErrEvent(err, s)
   983  		}
   984  		return eventTxnStart{ImplicitTxn: fsm.False},
   985  			makeEventTxnStartPayload(
   986  				ex.txnPriorityWithSessionDefault(s.Modes.UserPriority),
   987  				mode,
   988  				sqlTs,
   989  				historicalTs,
   990  				ex.transitionCtx)
   991  	case *tree.CommitTransaction, *tree.ReleaseSavepoint,
   992  		*tree.RollbackTransaction, *tree.SetTransaction, *tree.Savepoint:
   993  		return ex.makeErrEvent(errNoTransactionInProgress, stmt.AST)
   994  	default:
   995  		// NB: Implicit transactions are created without a historical timestamp even
   996  		// though the statement might contain an AOST clause. In these cases the
   997  		// clause is evaluated and applied execStmtInOpenState.
   998  		return eventTxnStart{ImplicitTxn: fsm.True},
   999  			makeEventTxnStartPayload(
  1000  				ex.txnPriorityWithSessionDefault(tree.UnspecifiedUserPriority),
  1001  				ex.readWriteModeWithSessionDefault(tree.UnspecifiedReadWriteMode),
  1002  				ex.server.cfg.Clock.PhysicalTime(),
  1003  				nil, /* historicalTimestamp */
  1004  				ex.transitionCtx)
  1005  	}
  1006  }
  1007  
  1008  // execStmtInAbortedState executes a statement in a txn that's in state
  1009  // Aborted or RestartWait. All statements result in error events except:
  1010  // - COMMIT / ROLLBACK: aborts the current transaction.
  1011  // - ROLLBACK TO SAVEPOINT / SAVEPOINT: reopens the current transaction,
  1012  //   allowing it to be retried.
  1013  func (ex *connExecutor) execStmtInAbortedState(
  1014  	ctx context.Context, stmt Statement, res RestrictedCommandResult,
  1015  ) (_ fsm.Event, payload fsm.EventPayload) {
  1016  	ex.incrementStartedStmtCounter(stmt)
  1017  	defer func() {
  1018  		if !payloadHasError(payload) {
  1019  			ex.incrementExecutedStmtCounter(stmt)
  1020  		}
  1021  	}()
  1022  
  1023  	reject := func() (fsm.Event, fsm.EventPayload) {
  1024  		ev := eventNonRetriableErr{IsCommit: fsm.False}
  1025  		payload := eventNonRetriableErrPayload{
  1026  			err: sqlbase.NewTransactionAbortedError("" /* customMsg */),
  1027  		}
  1028  		return ev, payload
  1029  	}
  1030  
  1031  	switch s := stmt.AST.(type) {
  1032  	case *tree.CommitTransaction, *tree.RollbackTransaction:
  1033  		if _, ok := s.(*tree.CommitTransaction); ok {
  1034  			// Note: Postgres replies to COMMIT of failed txn with "ROLLBACK" too.
  1035  			res.ResetStmtType((*tree.RollbackTransaction)(nil))
  1036  		}
  1037  		return ex.rollbackSQLTransaction(ctx)
  1038  
  1039  	case *tree.RollbackToSavepoint:
  1040  		return ex.execRollbackToSavepointInAbortedState(ctx, s)
  1041  
  1042  	case *tree.Savepoint:
  1043  		if ex.isCommitOnReleaseSavepoint(s.Name) {
  1044  			// We allow SAVEPOINT cockroach_restart as an alternative to ROLLBACK TO
  1045  			// SAVEPOINT cockroach_restart in the Aborted state. This is needed
  1046  			// because any client driver (that we know of) which links subtransaction
  1047  			// `ROLLBACK/RELEASE` to an object's lifetime will fail to `ROLLBACK` on a
  1048  			// failed `RELEASE`. Instead, we now can use the creation of another
  1049  			// subtransaction object (which will issue another `SAVEPOINT` statement)
  1050  			// to indicate retry intent. Specifically, this change was prompted by
  1051  			// subtransaction handling in `libpqxx` (C++ driver) and `rust-postgres`
  1052  			// (Rust driver).
  1053  			res.ResetStmtType((*tree.RollbackToSavepoint)(nil))
  1054  			return ex.execRollbackToSavepointInAbortedState(
  1055  				ctx, &tree.RollbackToSavepoint{Savepoint: s.Name})
  1056  		}
  1057  		return reject()
  1058  
  1059  	default:
  1060  		return reject()
  1061  	}
  1062  }
  1063  
  1064  // execStmtInCommitWaitState executes a statement in a txn that's in state
  1065  // CommitWait.
  1066  // Everything but COMMIT/ROLLBACK causes errors. ROLLBACK is treated like COMMIT.
  1067  func (ex *connExecutor) execStmtInCommitWaitState(
  1068  	stmt Statement, res RestrictedCommandResult,
  1069  ) (ev fsm.Event, payload fsm.EventPayload) {
  1070  	ex.incrementStartedStmtCounter(stmt)
  1071  	defer func() {
  1072  		if !payloadHasError(payload) {
  1073  			ex.incrementExecutedStmtCounter(stmt)
  1074  		}
  1075  	}()
  1076  	switch stmt.AST.(type) {
  1077  	case *tree.CommitTransaction, *tree.RollbackTransaction:
  1078  		// Reply to a rollback with the COMMIT tag, by analogy to what we do when we
  1079  		// get a COMMIT in state Aborted.
  1080  		res.ResetStmtType((*tree.CommitTransaction)(nil))
  1081  		return eventTxnFinish{}, eventTxnFinishPayload{commit: false}
  1082  	default:
  1083  		ev = eventNonRetriableErr{IsCommit: fsm.False}
  1084  		payload = eventNonRetriableErrPayload{
  1085  			err: sqlbase.NewTransactionCommittedError(),
  1086  		}
  1087  		return ev, payload
  1088  	}
  1089  }
  1090  
  1091  // runObserverStatement executes the given observer statement.
  1092  //
  1093  // If an error is returned, the connection needs to stop processing queries.
  1094  func (ex *connExecutor) runObserverStatement(
  1095  	ctx context.Context, stmt Statement, res RestrictedCommandResult,
  1096  ) error {
  1097  	switch sqlStmt := stmt.AST.(type) {
  1098  	case *tree.ShowTransactionStatus:
  1099  		return ex.runShowTransactionState(ctx, res)
  1100  	case *tree.ShowSavepointStatus:
  1101  		return ex.runShowSavepointState(ctx, res)
  1102  	case *tree.ShowSyntax:
  1103  		return ex.runShowSyntax(ctx, sqlStmt.Statement, res)
  1104  	case *tree.SetTracing:
  1105  		ex.runSetTracing(ctx, sqlStmt, res)
  1106  		return nil
  1107  	default:
  1108  		res.SetError(errors.AssertionFailedf("unrecognized observer statement type %T", stmt.AST))
  1109  		return nil
  1110  	}
  1111  }
  1112  
  1113  // runShowSyntax executes a SHOW SYNTAX <stmt> query.
  1114  //
  1115  // If an error is returned, the connection needs to stop processing queries.
  1116  func (ex *connExecutor) runShowSyntax(
  1117  	ctx context.Context, stmt string, res RestrictedCommandResult,
  1118  ) error {
  1119  	res.SetColumns(ctx, sqlbase.ShowSyntaxColumns)
  1120  	var commErr error
  1121  	parser.RunShowSyntax(ctx, stmt,
  1122  		func(ctx context.Context, field, msg string) {
  1123  			commErr = res.AddRow(ctx, tree.Datums{tree.NewDString(field), tree.NewDString(msg)})
  1124  		},
  1125  		func(ctx context.Context, err error) {
  1126  			sqltelemetry.RecordError(ctx, err, &ex.server.cfg.Settings.SV)
  1127  		},
  1128  	)
  1129  	return commErr
  1130  }
  1131  
  1132  // runShowTransactionState executes a SHOW TRANSACTION STATUS statement.
  1133  //
  1134  // If an error is returned, the connection needs to stop processing queries.
  1135  func (ex *connExecutor) runShowTransactionState(
  1136  	ctx context.Context, res RestrictedCommandResult,
  1137  ) error {
  1138  	res.SetColumns(ctx, sqlbase.ResultColumns{{Name: "TRANSACTION STATUS", Typ: types.String}})
  1139  
  1140  	state := fmt.Sprintf("%s", ex.machine.CurState())
  1141  	return res.AddRow(ctx, tree.Datums{tree.NewDString(state)})
  1142  }
  1143  
  1144  func (ex *connExecutor) runSetTracing(
  1145  	ctx context.Context, n *tree.SetTracing, res RestrictedCommandResult,
  1146  ) {
  1147  	if len(n.Values) == 0 {
  1148  		res.SetError(errors.AssertionFailedf("set tracing missing argument"))
  1149  		return
  1150  	}
  1151  
  1152  	modes := make([]string, len(n.Values))
  1153  	for i, v := range n.Values {
  1154  		v = unresolvedNameToStrVal(v)
  1155  		var strMode string
  1156  		switch val := v.(type) {
  1157  		case *tree.StrVal:
  1158  			strMode = val.RawString()
  1159  		case *tree.DBool:
  1160  			if *val {
  1161  				strMode = "on"
  1162  			} else {
  1163  				strMode = "off"
  1164  			}
  1165  		default:
  1166  			res.SetError(pgerror.New(pgcode.Syntax,
  1167  				"expected string or boolean for set tracing argument"))
  1168  			return
  1169  		}
  1170  		modes[i] = strMode
  1171  	}
  1172  
  1173  	if err := ex.enableTracing(modes); err != nil {
  1174  		res.SetError(err)
  1175  	}
  1176  }
  1177  
  1178  func (ex *connExecutor) enableTracing(modes []string) error {
  1179  	traceKV := false
  1180  	recordingType := tracing.SnowballRecording
  1181  	enableMode := true
  1182  	showResults := false
  1183  
  1184  	for _, s := range modes {
  1185  		switch strings.ToLower(s) {
  1186  		case "results":
  1187  			showResults = true
  1188  		case "on":
  1189  			enableMode = true
  1190  		case "off":
  1191  			enableMode = false
  1192  		case "kv":
  1193  			traceKV = true
  1194  		case "local":
  1195  			recordingType = tracing.SingleNodeRecording
  1196  		case "cluster":
  1197  			recordingType = tracing.SnowballRecording
  1198  		default:
  1199  			return pgerror.Newf(pgcode.Syntax,
  1200  				"set tracing: unknown mode %q", s)
  1201  		}
  1202  	}
  1203  	if !enableMode {
  1204  		return ex.sessionTracing.StopTracing()
  1205  	}
  1206  	return ex.sessionTracing.StartTracing(recordingType, traceKV, showResults)
  1207  }
  1208  
  1209  // addActiveQuery adds a running query to the list of running queries.
  1210  //
  1211  // It returns a cleanup function that needs to be run when the query is no
  1212  // longer executing. NOTE(andrei): As of Feb 2018, "executing" does not imply
  1213  // that the results have been delivered to the client.
  1214  func (ex *connExecutor) addActiveQuery(
  1215  	queryID ClusterWideID, stmt Statement, cancelFun context.CancelFunc,
  1216  ) func() {
  1217  
  1218  	_, hidden := stmt.AST.(tree.HiddenFromShowQueries)
  1219  	qm := &queryMeta{
  1220  		txnID:         ex.state.mu.txn.ID(),
  1221  		start:         ex.phaseTimes[sessionQueryReceived],
  1222  		rawStmt:       stmt.SQL,
  1223  		phase:         preparing,
  1224  		isDistributed: false,
  1225  		ctxCancel:     cancelFun,
  1226  		hidden:        hidden,
  1227  	}
  1228  	ex.mu.Lock()
  1229  	ex.mu.ActiveQueries[queryID] = qm
  1230  	ex.mu.Unlock()
  1231  	return func() {
  1232  		ex.mu.Lock()
  1233  		_, ok := ex.mu.ActiveQueries[queryID]
  1234  		if !ok {
  1235  			ex.mu.Unlock()
  1236  			panic(fmt.Sprintf("query %d missing from ActiveQueries", queryID))
  1237  		}
  1238  		delete(ex.mu.ActiveQueries, queryID)
  1239  		ex.mu.LastActiveQuery = stmt.AST
  1240  
  1241  		ex.mu.Unlock()
  1242  	}
  1243  }
  1244  
  1245  // handleAutoCommit commits the KV transaction if it hasn't been committed
  1246  // already.
  1247  //
  1248  // It's possible that the statement constituting the implicit txn has already
  1249  // committed it (in case it tried to run as a 1PC). This method detects that
  1250  // case.
  1251  // NOTE(andrei): It bothers me some that we're peeking at txn to figure out
  1252  // whether we committed or not, where SQL could already know that - individual
  1253  // statements could report this back through the Event.
  1254  //
  1255  // Args:
  1256  // stmt: The statement that we just ran.
  1257  func (ex *connExecutor) handleAutoCommit(
  1258  	ctx context.Context, stmt tree.Statement,
  1259  ) (fsm.Event, fsm.EventPayload) {
  1260  	txn := ex.state.mu.txn
  1261  	if txn.IsCommitted() {
  1262  		log.Event(ctx, "statement execution committed the txn")
  1263  		return eventTxnFinish{}, eventTxnFinishPayload{commit: true}
  1264  	}
  1265  
  1266  	if knob := ex.server.cfg.TestingKnobs.BeforeAutoCommit; knob != nil {
  1267  		if err := knob(ctx, stmt.String()); err != nil {
  1268  			return ex.makeErrEvent(err, stmt)
  1269  		}
  1270  	}
  1271  
  1272  	ev, payload := ex.commitSQLTransaction(ctx, stmt)
  1273  	var err error
  1274  	if perr, ok := payload.(payloadWithError); ok {
  1275  		err = perr.errorCause()
  1276  	}
  1277  	log.VEventf(ctx, 2, "AutoCommit. err: %v", err)
  1278  	return ev, payload
  1279  }
  1280  
  1281  // incrementStartedStmtCounter increments the appropriate started
  1282  // statement counter for stmt's type.
  1283  func (ex *connExecutor) incrementStartedStmtCounter(stmt Statement) {
  1284  	ex.metrics.StartedStatementCounters.incrementCount(ex, stmt.AST)
  1285  }
  1286  
  1287  // incrementExecutedStmtCounter increments the appropriate executed
  1288  // statement counter for stmt's type.
  1289  func (ex *connExecutor) incrementExecutedStmtCounter(stmt Statement) {
  1290  	ex.metrics.ExecutedStatementCounters.incrementCount(ex, stmt.AST)
  1291  }
  1292  
  1293  // payloadHasError returns true if the passed payload implements
  1294  // payloadWithError.
  1295  func payloadHasError(payload fsm.EventPayload) bool {
  1296  	_, hasErr := payload.(payloadWithError)
  1297  	return hasErr
  1298  }
  1299  
  1300  // recordTransactionStart records the start of the transaction and returns a
  1301  // closure to be called once the transaction finishes.
  1302  func (ex *connExecutor) recordTransactionStart() func(txnEvent) {
  1303  	ex.state.mu.RLock()
  1304  	txnStart := ex.state.mu.txnStart
  1305  	ex.state.mu.RUnlock()
  1306  	implicit := ex.implicitTxn()
  1307  	return func(ev txnEvent) { ex.recordTransaction(ev, implicit, txnStart) }
  1308  }
  1309  
  1310  func (ex *connExecutor) recordTransaction(ev txnEvent, implicit bool, txnStart time.Time) {
  1311  	txnEnd := timeutil.Now()
  1312  	txnTime := txnEnd.Sub(txnStart)
  1313  	ex.metrics.EngineMetrics.SQLTxnLatency.RecordValue(txnTime.Nanoseconds())
  1314  	ex.statsCollector.recordTransaction(
  1315  		txnTime.Seconds(),
  1316  		ev,
  1317  		implicit,
  1318  	)
  1319  }