github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/conn_executor.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"io"
    17  	"math"
    18  	"strings"
    19  	"sync/atomic"
    20  	"time"
    21  	"unicode/utf8"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/config"
    24  	"github.com/cockroachdb/cockroach/pkg/kv"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/server/serverpb"
    27  	"github.com/cockroachdb/cockroach/pkg/server/telemetry"
    28  	"github.com/cockroachdb/cockroach/pkg/settings"
    29  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/database"
    30  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/descs"
    31  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    32  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    33  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    34  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    35  	"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
    36  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    37  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    38  	"github.com/cockroachdb/cockroach/pkg/util"
    39  	"github.com/cockroachdb/cockroach/pkg/util/envutil"
    40  	"github.com/cockroachdb/cockroach/pkg/util/errorutil"
    41  	"github.com/cockroachdb/cockroach/pkg/util/fsm"
    42  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    43  	"github.com/cockroachdb/cockroach/pkg/util/log"
    44  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    45  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    46  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    47  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    48  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    49  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    50  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    51  	"github.com/cockroachdb/errors"
    52  	"github.com/cockroachdb/logtags"
    53  	"golang.org/x/net/trace"
    54  )
    55  
    56  // noteworthyMemoryUsageBytes is the minimum size tracked by a
    57  // transaction or session monitor before the monitor starts explicitly
    58  // logging overall usage growth in the log.
    59  var noteworthyMemoryUsageBytes = envutil.EnvOrDefaultInt64("COCKROACH_NOTEWORTHY_SESSION_MEMORY_USAGE", 1024*1024)
    60  
    61  // A connExecutor is in charge of executing queries received on a given client
    62  // connection. The connExecutor implements a state machine (dictated by the
    63  // Postgres/pgwire session semantics). The state machine is supposed to run
    64  // asynchronously wrt the client connection: it receives input statements
    65  // through a stmtBuf and produces results through a clientComm interface. The
    66  // connExecutor maintains a cursor over the statementBuffer and executes
    67  // statements / produces results for one statement at a time. The cursor points
    68  // at all times to the statement that the connExecutor is currently executing.
    69  // Results for statements before the cursor have already been produced (but not
    70  // necessarily delivered to the client). Statements after the cursor are queued
    71  // for future execution. Keeping already executed statements in the buffer is
    72  // useful in case of automatic retries (in which case statements from the
    73  // retried transaction have to be executed again); the connExecutor is in charge
    74  // of removing old statements that are no longer needed for retries from the
    75  // (head of the) buffer. Separately, the implementer of the clientComm interface
    76  // (e.g. the pgwire module) is in charge of keeping track of what results have
    77  // been delivered to the client and what results haven't (yet).
    78  //
    79  // The connExecutor has two main responsibilities: to dispatch queries to the
    80  // execution engine(s) and relay their results to the clientComm, and to
    81  // implement the state machine maintaining the various aspects of a connection's
    82  // state. The state machine implementation is further divided into two aspects:
    83  // maintaining the transaction status of the connection (outside of a txn,
    84  // inside a txn, in an aborted txn, in a txn awaiting client restart, etc.) and
    85  // maintaining the cursor position (i.e. correctly jumping to whatever the
    86  // "next" statement to execute is in various situations).
    87  //
    88  // The cursor normally advances one statement at a time, but it can also skip
    89  // some statements (remaining statements in a query string are skipped once an
    90  // error is encountered) and it can sometimes be rewound when performing
    91  // automatic retries. Rewinding can only be done if results for the rewound
    92  // statements have not actually been delivered to the client; see below.
    93  //
    94  //                                                   +---------------------+
    95  //                                                   |connExecutor         |
    96  //                                                   |                     |
    97  //                                                   +->execution+--------------+
    98  //                                                   ||  +                 |    |
    99  //                                                   ||  |fsm.Event        |    |
   100  //                                                   ||  |                 |    |
   101  //                                                   ||  v                 |    |
   102  //                                                   ||  fsm.Machine(TxnStateTransitions)
   103  //                                                   ||  +  +--------+     |    |
   104  //      +--------------------+                       ||  |  |txnState|     |    |
   105  //      |stmtBuf             |                       ||  |  +--------+     |    |
   106  //      |                    | statements are read   ||  |                 |    |
   107  //      | +-+-+ +-+-+ +-+-+  +------------------------+  |                 |    |
   108  //      | | | | | | | | | |  |                       |   |   +-------------+    |
   109  //  +---> +-+-+ +++-+ +-+-+  |                       |   |   |session data |    |
   110  //  |   |        ^           |                       |   |   +-------------+    |
   111  //  |   |        |   +-----------------------------------+                 |    |
   112  //  |   |        +   v       | cursor is advanced    |  advanceInfo        |    |
   113  //  |   |       cursor       |                       |                     |    |
   114  //  |   +--------------------+                       +---------------------+    |
   115  //  |                                                                           |
   116  //  |                                                                           |
   117  //  +-------------+                                                             |
   118  //                +--------+                                                    |
   119  //                | parser |                                                    |
   120  //                +--------+                                                    |
   121  //                |                                                             |
   122  //                |                                                             |
   123  //                |                                   +----------------+        |
   124  //        +-------+------+                            |execution engine<--------+
   125  //        | pgwire conn  |               +------------+(local/DistSQL) |
   126  //        |              |               |            +----------------+
   127  //        |   +----------+               |
   128  //        |   |clientComm<---------------+
   129  //        |   +----------+           results are produced
   130  //        |              |
   131  //        +-------^------+
   132  //                |
   133  //                |
   134  //        +-------+------+
   135  //        | SQL client   |
   136  //        +--------------+
   137  //
   138  // The connExecutor is disconnected from client communication (i.e. generally
   139  // network communication - i.e. pgwire.conn); the module doing client
   140  // communication is responsible for pushing statements into the buffer and for
   141  // providing an implementation of the clientConn interface (and thus sending
   142  // results to the client). The connExecutor does not control when
   143  // results are delivered to the client, but still it does have some influence
   144  // over that; this is because of the fact that the possibility of doing
   145  // automatic retries goes away the moment results for the transaction in
   146  // question are delivered to the client. The communication module has full
   147  // freedom in sending results whenever it sees fit; however the connExecutor
   148  // influences communication in the following ways:
   149  //
   150  // a) When deciding whether an automatic retry can be performed for a
   151  // transaction, the connExecutor needs to:
   152  //
   153  //   1) query the communication status to check that no results for the txn have
   154  //   been delivered to the client and, if this check passes:
   155  //   2) lock the communication so that no further results are delivered to the
   156  //   client, and, eventually:
   157  //   3) rewind the clientComm to a certain position corresponding to the start
   158  //   of the transaction, thereby discarding all the results that had been
   159  //   accumulated for the previous attempt to run the transaction in question.
   160  //
   161  // These steps are all orchestrated through clientComm.lockCommunication() and
   162  // rewindCapability{}.
   163  //
   164  // b) The connExecutor sometimes ask the clientComm to deliver everything
   165  // (most commonly in response to a Sync command).
   166  //
   167  // As of Feb 2018, the pgwire.conn delivers results synchronously to the client
   168  // when its internal buffer overflows. In principle, delivery of result could be
   169  // done asynchronously wrt the processing of commands (e.g. we could have a
   170  // timing policy in addition to the buffer size). The first implementation of
   171  // that showed a performance impact of involving a channel communication in the
   172  // Sync processing path.
   173  //
   174  //
   175  // Implementation notes:
   176  //
   177  // --- Error handling ---
   178  //
   179  // The key to understanding how the connExecutor handles errors is understanding
   180  // the fact that there's two distinct categories of errors to speak of. There
   181  // are "query execution errors" and there are the rest. Most things fall in the
   182  // former category: invalid queries, queries that fail constraints at runtime,
   183  // data unavailability errors, retriable errors (i.e. serializability
   184  // violations) "internal errors" (e.g. connection problems in the cluster). This
   185  // category of errors doesn't represent dramatic events as far as the connExecutor
   186  // is concerned: they produce "results" for the query to be passed to the client
   187  // just like more successful queries do and they produce Events for the
   188  // state machine just like the successful queries (the events in question
   189  // are generally event{non}RetriableErr and they generally cause the
   190  // state machine to move to the Aborted state, but the connExecutor doesn't
   191  // concern itself with this). The way the connExecutor reacts to these errors is
   192  // the same as how it reacts to a successful query completing: it moves the
   193  // cursor over the incoming statements as instructed by the state machine and
   194  // continues running statements.
   195  //
   196  // And then there's other errors that don't have anything to do with a
   197  // particular query, but with the connExecutor itself. In other languages, these
   198  // would perhaps be modeled as Exceptions: we want them to unwind the stack
   199  // significantly. These errors cause the connExecutor.run() to break out of its
   200  // loop and return an error. Example of such errors include errors in
   201  // communication with the client (e.g. the network connection is broken) or the
   202  // connection's context being canceled.
   203  //
   204  // All of connExecutor's methods only return errors for the 2nd category. Query
   205  // execution errors are written to a CommandResult. Low-level methods don't
   206  // operate on a CommandResult directly; instead they operate on a wrapper
   207  // (resultWithStoredErr), which provides access to the query error for purposes
   208  // of building the correct state machine event.
   209  //
   210  // --- Context management ---
   211  //
   212  // At the highest level, there's connExecutor.run() that takes a context. That
   213  // context is supposed to represent "the connection's context": its lifetime is
   214  // the client connection's lifetime and it is assigned to
   215  // connEx.ctxHolder.connCtx. Below that, every SQL transaction has its own
   216  // derived context because that's the level at which we trace operations. The
   217  // lifetime of SQL transactions is determined by the txnState: the state machine
   218  // decides when transactions start and end in txnState.performStateTransition().
   219  // When we're inside a SQL transaction, most operations are considered to happen
   220  // in the context of that txn. When there's no SQL transaction (i.e.
   221  // stateNoTxn), everything happens in the connection's context.
   222  //
   223  // High-level code in connExecutor is agnostic of whether it currently is inside
   224  // a txn or not. To deal with both cases, such methods don't explicitly take a
   225  // context; instead they use connEx.Ctx(), which returns the appropriate ctx
   226  // based on the current state.
   227  // Lower-level code (everything from connEx.execStmt() and below which runs in
   228  // between state transitions) knows what state its running in, and so the usual
   229  // pattern of explicitly taking a context as an argument is used.
   230  
   231  // Server is the top level singleton for handling SQL connections. It creates
   232  // connExecutors to server every incoming connection.
   233  type Server struct {
   234  	_ util.NoCopy
   235  
   236  	cfg *ExecutorConfig
   237  
   238  	// sqlStats tracks per-application statistics for all applications on each
   239  	// node. Newly collected statistics flow into sqlStats.
   240  	sqlStats sqlStats
   241  	// reportedStats is a pool of stats that is held for reporting, and is
   242  	// cleared on a lower interval than sqlStats. Stats from sqlStats flow
   243  	// into reported stats when sqlStats is cleared.
   244  	reportedStats sqlStats
   245  
   246  	reCache *tree.RegexpCache
   247  
   248  	// pool is the parent monitor for all session monitors except "internal" ones.
   249  	pool *mon.BytesMonitor
   250  
   251  	// Metrics is used to account normal queries.
   252  	Metrics Metrics
   253  
   254  	// InternalMetrics is used to account internal queries.
   255  	InternalMetrics Metrics
   256  
   257  	// dbCache is a cache for database descriptors, maintained through Gossip
   258  	// updates.
   259  	dbCache *databaseCacheHolder
   260  }
   261  
   262  // Metrics collects timeseries data about SQL activity.
   263  type Metrics struct {
   264  	// EngineMetrics is exported as required by the metrics.Struct magic we use
   265  	// for metrics registration.
   266  	EngineMetrics EngineMetrics
   267  
   268  	// StartedStatementCounters contains metrics for statements initiated by
   269  	// users. These metrics count user-initiated operations, regardless of
   270  	// success (in particular, TxnCommitCount is the number of COMMIT statements
   271  	// attempted, not the number of transactions that successfully commit).
   272  	StartedStatementCounters StatementCounters
   273  
   274  	// ExecutedStatementCounters contains metrics for successfully executed
   275  	// statements.
   276  	ExecutedStatementCounters StatementCounters
   277  }
   278  
   279  // NewServer creates a new Server. Start() needs to be called before the Server
   280  // is used.
   281  func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server {
   282  	systemCfg := config.NewSystemConfig(cfg.DefaultZoneConfig)
   283  	return &Server{
   284  		cfg:             cfg,
   285  		Metrics:         makeMetrics(false /*internal*/),
   286  		InternalMetrics: makeMetrics(true /*internal*/),
   287  		// dbCache will be updated on Start().
   288  		dbCache:       newDatabaseCacheHolder(database.NewCache(cfg.Codec, systemCfg)),
   289  		pool:          pool,
   290  		sqlStats:      sqlStats{st: cfg.Settings, apps: make(map[string]*appStats)},
   291  		reportedStats: sqlStats{st: cfg.Settings, apps: make(map[string]*appStats)},
   292  		reCache:       tree.NewRegexpCache(512),
   293  	}
   294  }
   295  
   296  func makeMetrics(internal bool) Metrics {
   297  	return Metrics{
   298  		EngineMetrics: EngineMetrics{
   299  			DistSQLSelectCount:    metric.NewCounter(getMetricMeta(MetaDistSQLSelect, internal)),
   300  			SQLOptFallbackCount:   metric.NewCounter(getMetricMeta(MetaSQLOptFallback, internal)),
   301  			SQLOptPlanCacheHits:   metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheHits, internal)),
   302  			SQLOptPlanCacheMisses: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheMisses, internal)),
   303  
   304  			// TODO(mrtracy): See HistogramWindowInterval in server/config.go for the 6x factor.
   305  			DistSQLExecLatency: metric.NewLatency(getMetricMeta(MetaDistSQLExecLatency, internal),
   306  				6*metricsSampleInterval),
   307  			SQLExecLatency: metric.NewLatency(getMetricMeta(MetaSQLExecLatency, internal),
   308  				6*metricsSampleInterval),
   309  			DistSQLServiceLatency: metric.NewLatency(getMetricMeta(MetaDistSQLServiceLatency, internal),
   310  				6*metricsSampleInterval),
   311  			SQLServiceLatency: metric.NewLatency(getMetricMeta(MetaSQLServiceLatency, internal),
   312  				6*metricsSampleInterval),
   313  			SQLTxnLatency: metric.NewLatency(getMetricMeta(MetaSQLTxnLatency, internal),
   314  				6*metricsSampleInterval),
   315  
   316  			TxnAbortCount: metric.NewCounter(getMetricMeta(MetaTxnAbort, internal)),
   317  			FailureCount:  metric.NewCounter(getMetricMeta(MetaFailure, internal)),
   318  		},
   319  		StartedStatementCounters:  makeStartedStatementCounters(internal),
   320  		ExecutedStatementCounters: makeExecutedStatementCounters(internal),
   321  	}
   322  }
   323  
   324  // Start starts the Server's background processing.
   325  func (s *Server) Start(ctx context.Context, stopper *stop.Stopper) {
   326  	gossipUpdateC := s.cfg.Gossip.DeprecatedRegisterSystemConfigChannel(47150)
   327  	stopper.RunWorker(ctx, func(ctx context.Context) {
   328  		for {
   329  			select {
   330  			case <-gossipUpdateC:
   331  				sysCfg := s.cfg.Gossip.DeprecatedSystemConfig(47150)
   332  				s.dbCache.updateSystemConfig(sysCfg)
   333  			case <-stopper.ShouldStop():
   334  				return
   335  			}
   336  		}
   337  	})
   338  	// Start a loop to clear SQL stats at the max reset interval. This is
   339  	// to ensure that we always have some worker clearing SQL stats to avoid
   340  	// continually allocating space for the SQL stats. Additionally, spawn
   341  	// a loop to clear the reported stats at the same large interval just
   342  	// in case the telemetry worker fails.
   343  	s.PeriodicallyClearSQLStats(ctx, stopper, MaxSQLStatReset, &s.sqlStats, s.ResetSQLStats)
   344  	s.PeriodicallyClearSQLStats(ctx, stopper, MaxSQLStatReset, &s.reportedStats, s.ResetReportedStats)
   345  	// Start a second loop to clear SQL stats at the requested interval.
   346  	s.PeriodicallyClearSQLStats(ctx, stopper, SQLStatReset, &s.sqlStats, s.ResetSQLStats)
   347  }
   348  
   349  // ResetSQLStats resets the executor's collected sql statistics.
   350  func (s *Server) ResetSQLStats(ctx context.Context) {
   351  	// Dump the SQL stats into the reported stats before clearing the SQL stats.
   352  	s.sqlStats.resetAndMaybeDumpStats(ctx, &s.reportedStats)
   353  }
   354  
   355  // ResetReportedStats resets the executor's collected reported stats.
   356  func (s *Server) ResetReportedStats(ctx context.Context) {
   357  	s.reportedStats.resetAndMaybeDumpStats(ctx, nil /* target */)
   358  }
   359  
   360  // GetScrubbedStmtStats returns the statement statistics by app, with the
   361  // queries scrubbed of their identifiers. Any statements which cannot be
   362  // scrubbed will be omitted from the returned map.
   363  func (s *Server) GetScrubbedStmtStats() []roachpb.CollectedStatementStatistics {
   364  	return s.sqlStats.getScrubbedStmtStats(s.cfg.VirtualSchemas)
   365  }
   366  
   367  // Avoid lint errors.
   368  var _ = (*Server).GetScrubbedStmtStats
   369  
   370  // GetUnscrubbedStmtStats returns the same thing as GetScrubbedStmtStats, except
   371  // identifiers (e.g. table and column names) aren't scrubbed from the statements.
   372  func (s *Server) GetUnscrubbedStmtStats() []roachpb.CollectedStatementStatistics {
   373  	return s.sqlStats.getUnscrubbedStmtStats(s.cfg.VirtualSchemas)
   374  }
   375  
   376  // GetScrubbedReportingStats does the same thing as GetScrubbedStmtStats but
   377  // returns statistics from the reported stats pool.
   378  func (s *Server) GetScrubbedReportingStats() []roachpb.CollectedStatementStatistics {
   379  	return s.reportedStats.getScrubbedStmtStats(s.cfg.VirtualSchemas)
   380  }
   381  
   382  // GetUnscrubbedReportingStats does the same thing as GetUnscrubbedStmtStats but
   383  // returns statistics from the reported stats pool.
   384  func (s *Server) GetUnscrubbedReportingStats() []roachpb.CollectedStatementStatistics {
   385  	return s.reportedStats.getUnscrubbedStmtStats(s.cfg.VirtualSchemas)
   386  }
   387  
   388  // GetStmtStatsLastReset returns the time at which the statement statistics were
   389  // last cleared.
   390  func (s *Server) GetStmtStatsLastReset() time.Time {
   391  	return s.sqlStats.getLastReset()
   392  }
   393  
   394  // GetExecutorConfig returns this server's executor config.
   395  func (s *Server) GetExecutorConfig() *ExecutorConfig {
   396  	return s.cfg
   397  }
   398  
   399  // SetupConn creates a connExecutor for the client connection.
   400  //
   401  // When this method returns there are no resources allocated yet that
   402  // need to be close()d.
   403  //
   404  // Args:
   405  // args: The initial session parameters. They are validated by SetupConn
   406  //   and an error is returned if this validation fails.
   407  // stmtBuf: The incoming statement for the new connExecutor.
   408  // clientComm: The interface through which the new connExecutor is going to
   409  //   produce results for the client.
   410  // memMetrics: The metrics that statements executed on this connection will
   411  //   contribute to.
   412  func (s *Server) SetupConn(
   413  	ctx context.Context,
   414  	args SessionArgs,
   415  	stmtBuf *StmtBuf,
   416  	clientComm ClientComm,
   417  	memMetrics MemoryMetrics,
   418  ) (ConnectionHandler, error) {
   419  	sd := s.newSessionData(args)
   420  
   421  	// Set the SessionData from args.SessionDefaults. This also validates the
   422  	// respective values.
   423  	sdMut := s.makeSessionDataMutator(sd, args.SessionDefaults)
   424  	if err := resetSessionVars(ctx, &sdMut); err != nil {
   425  		log.Errorf(ctx, "error setting up client session: %s", err)
   426  		return ConnectionHandler{}, err
   427  	}
   428  
   429  	ex := s.newConnExecutor(
   430  		ctx, sd, args.SessionDefaults, stmtBuf, clientComm, memMetrics, &s.Metrics,
   431  		s.sqlStats.getStatsForApplication(sd.ApplicationName),
   432  	)
   433  	return ConnectionHandler{ex}, nil
   434  }
   435  
   436  // ConnectionHandler is the interface between the result of SetupConn
   437  // and the ServeConn below. It encapsulates the connExecutor and hides
   438  // it away from other packages.
   439  type ConnectionHandler struct {
   440  	ex *connExecutor
   441  }
   442  
   443  // GetUnqualifiedIntSize implements pgwire.sessionDataProvider and returns
   444  // the type that INT should be parsed as.
   445  func (h ConnectionHandler) GetUnqualifiedIntSize() *types.T {
   446  	var size int
   447  	if h.ex != nil {
   448  		// The executor will be nil in certain testing situations where
   449  		// no server is actually present.
   450  		size = h.ex.sessionData.DefaultIntSize
   451  	}
   452  	switch size {
   453  	case 4, 32:
   454  		return types.Int4
   455  	default:
   456  		return types.Int
   457  	}
   458  }
   459  
   460  // GetParamStatus retrieves the configured value of the session
   461  // variable identified by varName. This is used for the initial
   462  // message sent to a client during a session set-up.
   463  func (h ConnectionHandler) GetParamStatus(ctx context.Context, varName string) string {
   464  	name := strings.ToLower(varName)
   465  	v, ok := varGen[name]
   466  	if !ok {
   467  		log.Fatalf(ctx, "programming error: status param %q must be defined session var", varName)
   468  		return ""
   469  	}
   470  	hasDefault, defVal := getSessionVarDefaultString(name, v, h.ex.dataMutator)
   471  	if !hasDefault {
   472  		log.Fatalf(ctx, "programming error: status param %q must have a default value", varName)
   473  		return ""
   474  	}
   475  	return defVal
   476  }
   477  
   478  // ServeConn serves a client connection by reading commands from the stmtBuf
   479  // embedded in the ConnHandler.
   480  //
   481  // If not nil, reserved represents memory reserved for the connection. The
   482  // connExecutor takes ownership of this memory.
   483  func (s *Server) ServeConn(
   484  	ctx context.Context, h ConnectionHandler, reserved mon.BoundAccount, cancel context.CancelFunc,
   485  ) error {
   486  	defer func() {
   487  		r := recover()
   488  		h.ex.closeWrapper(ctx, r)
   489  	}()
   490  	return h.ex.run(ctx, s.pool, reserved, cancel)
   491  }
   492  
   493  // newSessionData a SessionData that can be passed to newConnExecutor.
   494  func (s *Server) newSessionData(args SessionArgs) *sessiondata.SessionData {
   495  	sd := &sessiondata.SessionData{
   496  		User:              args.User,
   497  		RemoteAddr:        args.RemoteAddr,
   498  		ResultsBufferSize: args.ConnResultsBufferSize,
   499  	}
   500  	s.populateMinimalSessionData(sd)
   501  	return sd
   502  }
   503  
   504  func (s *Server) makeSessionDataMutator(
   505  	sd *sessiondata.SessionData, defaults SessionDefaults,
   506  ) sessionDataMutator {
   507  	return sessionDataMutator{
   508  		data:               sd,
   509  		defaults:           defaults,
   510  		settings:           s.cfg.Settings,
   511  		paramStatusUpdater: &noopParamStatusUpdater{},
   512  	}
   513  }
   514  
   515  // populateMinimalSessionData populates sd with some minimal values needed for
   516  // not crashing. Fields of sd that are already set are not overwritten.
   517  func (s *Server) populateMinimalSessionData(sd *sessiondata.SessionData) {
   518  	if sd.SequenceState == nil {
   519  		sd.SequenceState = sessiondata.NewSequenceState()
   520  	}
   521  	if sd.DataConversion == (sessiondata.DataConversionConfig{}) {
   522  		sd.DataConversion = sessiondata.DataConversionConfig{
   523  			Location: time.UTC,
   524  		}
   525  	}
   526  	if len(sd.SearchPath.GetPathArray()) == 0 {
   527  		sd.SearchPath = sqlbase.DefaultSearchPath
   528  	}
   529  }
   530  
   531  // newConnExecutor creates a new connExecutor.
   532  //
   533  // sd is expected to be fully initialized with the values of all the session
   534  // vars.
   535  // sdDefaults controls what the session vars will be reset to through
   536  // RESET statements.
   537  func (s *Server) newConnExecutor(
   538  	ctx context.Context,
   539  	sd *sessiondata.SessionData,
   540  	sdDefaults SessionDefaults,
   541  	stmtBuf *StmtBuf,
   542  	clientComm ClientComm,
   543  	memMetrics MemoryMetrics,
   544  	srvMetrics *Metrics,
   545  	appStats *appStats,
   546  ) *connExecutor {
   547  	// Create the various monitors.
   548  	// The session monitors are started in activate().
   549  	sessionRootMon := mon.MakeMonitor(
   550  		"session root",
   551  		mon.MemoryResource,
   552  		memMetrics.CurBytesCount,
   553  		memMetrics.MaxBytesHist,
   554  		-1 /* increment */, math.MaxInt64, s.cfg.Settings,
   555  	)
   556  	sessionMon := mon.MakeMonitor(
   557  		"session",
   558  		mon.MemoryResource,
   559  		memMetrics.SessionCurBytesCount,
   560  		memMetrics.SessionMaxBytesHist,
   561  		-1 /* increment */, noteworthyMemoryUsageBytes, s.cfg.Settings,
   562  	)
   563  	// The txn monitor is started in txnState.resetForNewSQLTxn().
   564  	txnMon := mon.MakeMonitor(
   565  		"txn",
   566  		mon.MemoryResource,
   567  		memMetrics.TxnCurBytesCount,
   568  		memMetrics.TxnMaxBytesHist,
   569  		-1 /* increment */, noteworthyMemoryUsageBytes, s.cfg.Settings,
   570  	)
   571  
   572  	nodeIDOrZero, _ := s.cfg.NodeID.OptionalNodeID()
   573  	sdMutator := new(sessionDataMutator)
   574  	*sdMutator = s.makeSessionDataMutator(sd, sdDefaults)
   575  
   576  	ex := &connExecutor{
   577  		server:      s,
   578  		metrics:     srvMetrics,
   579  		stmtBuf:     stmtBuf,
   580  		clientComm:  clientComm,
   581  		mon:         &sessionRootMon,
   582  		sessionMon:  &sessionMon,
   583  		sessionData: sd,
   584  		dataMutator: sdMutator,
   585  		state: txnState{
   586  			mon:     &txnMon,
   587  			connCtx: ctx,
   588  		},
   589  		transitionCtx: transitionCtx{
   590  			db:           s.cfg.DB,
   591  			nodeIDOrZero: nodeIDOrZero,
   592  			clock:        s.cfg.Clock,
   593  			// Future transaction's monitors will inherits from sessionRootMon.
   594  			connMon:  &sessionRootMon,
   595  			tracer:   s.cfg.AmbientCtx.Tracer,
   596  			settings: s.cfg.Settings,
   597  		},
   598  		memMetrics: memMetrics,
   599  		planner:    planner{execCfg: s.cfg, alloc: &sqlbase.DatumAlloc{}},
   600  
   601  		// ctxHolder will be reset at the start of run(). We only define
   602  		// it here so that an early call to close() doesn't panic.
   603  		ctxHolder:                 ctxHolder{connCtx: ctx},
   604  		executorType:              executorTypeExec,
   605  		hasCreatedTemporarySchema: false,
   606  		stmtDiagnosticsRecorder:   s.cfg.StmtDiagnosticsRecorder,
   607  	}
   608  
   609  	ex.state.txnAbortCount = ex.metrics.EngineMetrics.TxnAbortCount
   610  
   611  	// The transaction_read_only variable is special; its updates need to be
   612  	// hooked-up to the executor.
   613  	sdMutator.setCurTxnReadOnly = func(val bool) {
   614  		ex.state.readOnly = val
   615  	}
   616  
   617  	sdMutator.onTempSchemaCreation = func() {
   618  		ex.hasCreatedTemporarySchema = true
   619  	}
   620  
   621  	ex.applicationName.Store(ex.sessionData.ApplicationName)
   622  	ex.appStats = appStats
   623  	sdMutator.RegisterOnSessionDataChange("application_name", func(newName string) {
   624  		ex.applicationName.Store(newName)
   625  		ex.appStats = ex.server.sqlStats.getStatsForApplication(newName)
   626  	})
   627  
   628  	ex.phaseTimes[sessionInit] = timeutil.Now()
   629  	ex.extraTxnState.prepStmtsNamespace = prepStmtNamespace{
   630  		prepStmts: make(map[string]*PreparedStatement),
   631  		portals:   make(map[string]*PreparedPortal),
   632  	}
   633  	ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos = prepStmtNamespace{
   634  		prepStmts: make(map[string]*PreparedStatement),
   635  		portals:   make(map[string]*PreparedPortal),
   636  	}
   637  	ex.extraTxnState.descCollection = descs.MakeCollection(s.cfg.LeaseManager,
   638  		s.cfg.Settings, s.dbCache.getDatabaseCache(), s.dbCache)
   639  	ex.extraTxnState.txnRewindPos = -1
   640  	ex.mu.ActiveQueries = make(map[ClusterWideID]*queryMeta)
   641  	ex.machine = fsm.MakeMachine(TxnStateTransitions, stateNoTxn{}, &ex.state)
   642  
   643  	ex.sessionTracing.ex = ex
   644  	ex.transitionCtx.sessionTracing = &ex.sessionTracing
   645  	ex.statsCollector = ex.newStatsCollector()
   646  	ex.initPlanner(ctx, &ex.planner)
   647  
   648  	return ex
   649  }
   650  
   651  // newConnExecutorWithTxn creates a connExecutor that will execute statements
   652  // under a higher-level txn. This connExecutor runs with a different state
   653  // machine, much reduced from the regular one. It cannot initiate or end
   654  // transactions (so, no BEGIN, COMMIT, ROLLBACK, no auto-commit, no automatic
   655  // retries).
   656  //
   657  // If there is no error, this function also activate()s the returned
   658  // executor, so the caller does not need to run the
   659  // activation. However this means that run() or close() must be called
   660  // to release resources.
   661  func (s *Server) newConnExecutorWithTxn(
   662  	ctx context.Context,
   663  	sd *sessiondata.SessionData,
   664  	sdDefaults SessionDefaults,
   665  	stmtBuf *StmtBuf,
   666  	clientComm ClientComm,
   667  	parentMon *mon.BytesMonitor,
   668  	memMetrics MemoryMetrics,
   669  	srvMetrics *Metrics,
   670  	txn *kv.Txn,
   671  	tcModifier descs.ModifiedCollectionCopier,
   672  	appStats *appStats,
   673  ) *connExecutor {
   674  	ex := s.newConnExecutor(ctx, sd, sdDefaults, stmtBuf, clientComm, memMetrics, srvMetrics, appStats)
   675  
   676  	// The new transaction stuff below requires active monitors and traces, so
   677  	// we need to activate the executor now.
   678  	ex.activate(ctx, parentMon, mon.BoundAccount{})
   679  
   680  	// Perform some surgery on the executor - replace its state machine and
   681  	// initialize the state.
   682  	ex.machine = fsm.MakeMachine(
   683  		BoundTxnStateTransitions,
   684  		stateOpen{ImplicitTxn: fsm.False},
   685  		&ex.state,
   686  	)
   687  	ex.state.resetForNewSQLTxn(
   688  		ctx,
   689  		explicitTxn,
   690  		txn.ReadTimestamp().GoTime(),
   691  		nil, /* historicalTimestamp */
   692  		txn.UserPriority(),
   693  		tree.ReadWrite,
   694  		txn,
   695  		ex.transitionCtx)
   696  
   697  	// Modify the Collection to match the parent executor's Collection.
   698  	// This allows the InternalExecutor to see schema changes made by the
   699  	// parent executor.
   700  	if tcModifier != nil {
   701  		tcModifier.CopyModifiedObjects(&ex.extraTxnState.descCollection)
   702  	}
   703  	return ex
   704  }
   705  
   706  // SQLStatReset is the cluster setting that controls at what interval SQL
   707  // statement statistics should be reset.
   708  var SQLStatReset = settings.RegisterPublicNonNegativeDurationSettingWithMaximum(
   709  	"diagnostics.sql_stat_reset.interval",
   710  	"interval controlling how often SQL statement statistics should "+
   711  		"be reset (should be less than diagnostics.forced_sql_stat_reset.interval). It has a max value of 24H.",
   712  	time.Hour,
   713  	time.Hour*24,
   714  )
   715  
   716  // MaxSQLStatReset is the cluster setting that controls at what interval SQL
   717  // statement statistics must be flushed within.
   718  var MaxSQLStatReset = settings.RegisterPublicNonNegativeDurationSettingWithMaximum(
   719  	"diagnostics.forced_sql_stat_reset.interval",
   720  	"interval after which SQL statement statistics are refreshed even "+
   721  		"if not collected (should be more than diagnostics.sql_stat_reset.interval). It has a max value of 24H.",
   722  	time.Hour*2, // 2 x diagnostics.sql_stat_reset.interval
   723  	time.Hour*24,
   724  )
   725  
   726  // PeriodicallyClearSQLStats spawns a loop to reset stats based on the setting
   727  // of a given duration settings variable. We take in a function to actually do
   728  // the resetting, as some stats have extra work that needs to be performed
   729  // during the reset. For example, the SQL stats need to dump into the parent
   730  // stats before clearing data fully.
   731  func (s *Server) PeriodicallyClearSQLStats(
   732  	ctx context.Context,
   733  	stopper *stop.Stopper,
   734  	setting *settings.DurationSetting,
   735  	stats *sqlStats,
   736  	reset func(ctx context.Context),
   737  ) {
   738  	stopper.RunWorker(ctx, func(ctx context.Context) {
   739  		var timer timeutil.Timer
   740  		for {
   741  			s.sqlStats.Lock()
   742  			last := stats.lastReset
   743  			s.sqlStats.Unlock()
   744  
   745  			next := last.Add(setting.Get(&s.cfg.Settings.SV))
   746  			wait := next.Sub(timeutil.Now())
   747  			if wait < 0 {
   748  				reset(ctx)
   749  			} else {
   750  				timer.Reset(wait)
   751  				select {
   752  				case <-stopper.ShouldQuiesce():
   753  					return
   754  				case <-timer.C:
   755  					timer.Read = true
   756  				}
   757  			}
   758  		}
   759  	})
   760  }
   761  
   762  type closeType int
   763  
   764  const (
   765  	normalClose closeType = iota
   766  	panicClose
   767  	// externalTxnClose means that the connExecutor has been used within a
   768  	// higher-level txn (through the InternalExecutor).
   769  	externalTxnClose
   770  )
   771  
   772  func (ex *connExecutor) closeWrapper(ctx context.Context, recovered interface{}) {
   773  	if recovered != nil {
   774  		panicErr := log.PanicAsError(1, recovered)
   775  
   776  		// If there's a statement currently being executed, we'll report
   777  		// on it.
   778  		if ex.curStmt != nil {
   779  			// A warning header guaranteed to go to stderr.
   780  			log.Shoutf(ctx, log.Severity_ERROR,
   781  				"a SQL panic has occurred while executing the following statement:\n%s",
   782  				// For the log message, the statement is not anonymized.
   783  				truncateStatementStringForTelemetry(ex.curStmt.String()))
   784  
   785  			// Embed the statement in the error object for the telemetry
   786  			// report below. The statement gets anonymized.
   787  			panicErr = WithAnonymizedStatement(panicErr, ex.curStmt)
   788  		}
   789  
   790  		// Report the panic to telemetry in any case.
   791  		log.ReportPanic(ctx, &ex.server.cfg.Settings.SV, panicErr, 1 /* depth */)
   792  
   793  		// Close the executor before propagating the panic further.
   794  		ex.close(ctx, panicClose)
   795  
   796  		// Propagate - this may be meant to stop the process.
   797  		panic(panicErr)
   798  	}
   799  	// Closing is not cancelable.
   800  	closeCtx := logtags.WithTags(context.Background(), logtags.FromContext(ctx))
   801  	ex.close(closeCtx, normalClose)
   802  }
   803  
   804  func (ex *connExecutor) close(ctx context.Context, closeType closeType) {
   805  	ex.sessionEventf(ctx, "finishing connExecutor")
   806  
   807  	if ex.hasCreatedTemporarySchema && !ex.server.cfg.TestingKnobs.DisableTempObjectsCleanupOnSessionExit {
   808  		ie := MakeInternalExecutor(ctx, ex.server, MemoryMetrics{}, ex.server.cfg.Settings)
   809  		err := cleanupSessionTempObjects(
   810  			ctx,
   811  			ex.server.cfg.Settings,
   812  			ex.server.cfg.DB,
   813  			ex.server.cfg.Codec,
   814  			&ie,
   815  			ex.sessionID,
   816  		)
   817  		if err != nil {
   818  			log.Errorf(
   819  				ctx,
   820  				"error deleting temporary objects at session close, "+
   821  					"the temp tables deletion job will retry periodically: %s",
   822  				err,
   823  			)
   824  		}
   825  	}
   826  
   827  	ev := noEvent
   828  	if _, noTxn := ex.machine.CurState().(stateNoTxn); !noTxn {
   829  		ev = txnRollback
   830  	}
   831  
   832  	if closeType == normalClose {
   833  		// We'll cleanup the SQL txn by creating a non-retriable (commit:true) event.
   834  		// This event is guaranteed to be accepted in every state.
   835  		ev := eventNonRetriableErr{IsCommit: fsm.True}
   836  		payload := eventNonRetriableErrPayload{err: pgerror.Newf(pgcode.AdminShutdown,
   837  			"connExecutor closing")}
   838  		if err := ex.machine.ApplyWithPayload(ctx, ev, payload); err != nil {
   839  			log.Warningf(ctx, "error while cleaning up connExecutor: %s", err)
   840  		}
   841  	} else if closeType == externalTxnClose {
   842  		ex.state.finishExternalTxn()
   843  	}
   844  
   845  	if err := ex.resetExtraTxnState(ctx, ex.server.dbCache, ev); err != nil {
   846  		log.Warningf(ctx, "error while cleaning up connExecutor: %s", err)
   847  	}
   848  
   849  	if closeType != panicClose {
   850  		// Close all statements and prepared portals.
   851  		ex.extraTxnState.prepStmtsNamespace.resetTo(ctx, prepStmtNamespace{})
   852  		ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos.resetTo(ctx, prepStmtNamespace{})
   853  	}
   854  
   855  	if ex.sessionTracing.Enabled() {
   856  		if err := ex.sessionTracing.StopTracing(); err != nil {
   857  			log.Warningf(ctx, "error stopping tracing: %s", err)
   858  		}
   859  	}
   860  
   861  	if ex.eventLog != nil {
   862  		ex.eventLog.Finish()
   863  		ex.eventLog = nil
   864  	}
   865  
   866  	if closeType != panicClose {
   867  		ex.state.mon.Stop(ctx)
   868  		ex.sessionMon.Stop(ctx)
   869  		ex.mon.Stop(ctx)
   870  	} else {
   871  		ex.state.mon.EmergencyStop(ctx)
   872  		ex.sessionMon.EmergencyStop(ctx)
   873  		ex.mon.EmergencyStop(ctx)
   874  	}
   875  }
   876  
   877  type connExecutor struct {
   878  	_ util.NoCopy
   879  
   880  	// The server to which this connExecutor is attached. The reference is used
   881  	// for getting access to configuration settings.
   882  	// Note: do not use server.Metrics directly. Use metrics below instead.
   883  	server *Server
   884  
   885  	// The metrics to which the statement metrics should be accounted.
   886  	// This is different whether the executor is for regular client
   887  	// queries or for "internal" queries.
   888  	metrics *Metrics
   889  
   890  	// mon tracks memory usage for SQL activity within this session. It
   891  	// is not directly used, but rather indirectly used via sessionMon
   892  	// and state.mon. sessionMon tracks session-bound objects like prepared
   893  	// statements and result sets.
   894  	//
   895  	// The reason why state.mon and mon are split is to enable
   896  	// separate reporting of statistics per transaction and per
   897  	// session. This is because the "interesting" behavior w.r.t memory
   898  	// is typically caused by transactions, not sessions. The reason why
   899  	// sessionMon and mon are split is to enable separate reporting of
   900  	// statistics for result sets (which escape transactions).
   901  	mon        *mon.BytesMonitor
   902  	sessionMon *mon.BytesMonitor
   903  	// memMetrics contains the metrics that statements executed on this connection
   904  	// will contribute to.
   905  	memMetrics MemoryMetrics
   906  
   907  	// The buffer with incoming statements to execute.
   908  	stmtBuf *StmtBuf
   909  	// The interface for communicating statement results to the client.
   910  	clientComm ClientComm
   911  	// Finity "the machine" Automaton is the state machine controlling the state
   912  	// below.
   913  	machine fsm.Machine
   914  	// state encapsulates fields related to the ongoing SQL txn. It is mutated as
   915  	// the machine's ExtendedState.
   916  	state          txnState
   917  	transitionCtx  transitionCtx
   918  	sessionTracing SessionTracing
   919  
   920  	// eventLog for SQL statements and other important session events. Will be set
   921  	// if traceSessionEventLogEnabled; it is used by ex.sessionEventf()
   922  	eventLog trace.EventLog
   923  
   924  	// extraTxnState groups fields scoped to a SQL txn that are not handled by
   925  	// ex.state, above. The rule of thumb is that, if the state influences state
   926  	// transitions, it should live in state, otherwise it can live here.
   927  	// This is only used in the Open state. extraTxnState is reset whenever a
   928  	// transaction finishes or gets retried.
   929  	extraTxnState struct {
   930  		// descCollection collects descriptors used by the current transaction.
   931  		descCollection descs.Collection
   932  
   933  		// jobs accumulates jobs staged for execution inside the transaction.
   934  		// Staging happens when executing statements that are implemented with a
   935  		// job. The jobs are staged via the function QueueJob in
   936  		// pkg/sql/planner.go. The staged jobs are executed once the transaction
   937  		// that staged them commits.
   938  		jobs jobsCollection
   939  
   940  		// autoRetryCounter keeps track of the which iteration of a transaction
   941  		// auto-retry we're currently in. It's 0 whenever the transaction state is not
   942  		// stateOpen.
   943  		autoRetryCounter int
   944  
   945  		// numDDL keeps track of how many DDL statements have been
   946  		// executed so far.
   947  		numDDL int
   948  
   949  		// txnRewindPos is the position within stmtBuf to which we'll rewind when
   950  		// performing automatic retries. This is more or less the position where the
   951  		// current transaction started.
   952  		// This field is only defined while in stateOpen.
   953  		//
   954  		// Set via setTxnRewindPos().
   955  		txnRewindPos CmdPos
   956  
   957  		// prepStmtNamespace contains the prepared statements and portals that the
   958  		// session currently has access to.
   959  		// Portals are bound to a transaction and they're all destroyed once the
   960  		// transaction finishes.
   961  		// Prepared statements are not transactional and so it's a bit weird that
   962  		// they're part of extraTxnState, but it's convenient to put them here
   963  		// because they need the same kind of "snapshoting" as the portals (see
   964  		// prepStmtsNamespaceAtTxnRewindPos).
   965  		prepStmtsNamespace prepStmtNamespace
   966  
   967  		// prepStmtsNamespaceAtTxnRewindPos is a snapshot of the prep stmts/portals
   968  		// (ex.prepStmtsNamespace) before processing the command at position
   969  		// txnRewindPos.
   970  		// Here's the deal: prepared statements are not transactional, but they do
   971  		// need to interact properly with automatic retries (i.e. rewinding the
   972  		// command buffer). When doing a rewind, we need to be able to restore the
   973  		// prep stmts as they were. We do this by taking a snapshot every time
   974  		// txnRewindPos is advanced. Prepared statements are shared between the two
   975  		// collections, but these collections are periodically reconciled.
   976  		prepStmtsNamespaceAtTxnRewindPos prepStmtNamespace
   977  
   978  		// onTxnFinish (if non-nil) will be called when txn is finished (either
   979  		// committed or aborted). It is set when txn is started but can remain
   980  		// unset when txn is executed within another higher-level txn.
   981  		onTxnFinish func(txnEvent)
   982  
   983  		// savepoints maintains the stack of savepoints currently open.
   984  		savepoints savepointStack
   985  		// savepointsAtTxnRewindPos is a snapshot of the savepoints stack before
   986  		// processing the command at position txnRewindPos. When rewinding, we're
   987  		// going to restore this snapshot.
   988  		savepointsAtTxnRewindPos savepointStack
   989  	}
   990  
   991  	// sessionData contains the user-configurable connection variables.
   992  	sessionData *sessiondata.SessionData
   993  	// dataMutator is nil for session-bound internal executors; we shouldn't issue
   994  	// statements that manipulate session state to an internal executor.
   995  	dataMutator *sessionDataMutator
   996  	// appStats tracks per-application SQL usage statistics. It is maintained to
   997  	// represent statistrics for the application currently identified by
   998  	// sessiondata.ApplicationName.
   999  	appStats *appStats
  1000  	// applicationName is the same as sessionData.ApplicationName. It's copied
  1001  	// here as an atomic so that it can be read concurrently by serialize().
  1002  	applicationName atomic.Value
  1003  
  1004  	// ctxHolder contains the connection's context in which all command executed
  1005  	// on the connection are running. This generally should not be used directly,
  1006  	// but through the Ctx() method; if we're inside a transaction, Ctx() is going
  1007  	// to return a derived context. See the Context Management comments at the top
  1008  	// of the file.
  1009  	ctxHolder ctxHolder
  1010  
  1011  	// onCancelSession is called when the SessionRegistry is cancels this session.
  1012  	// For pgwire connections, this is hooked up to canceling the connection's
  1013  	// context.
  1014  	// If nil, canceling this session will be a no-op.
  1015  	onCancelSession context.CancelFunc
  1016  
  1017  	// planner is the "default planner" on a session, to save planner allocations
  1018  	// during serial execution. Since planners are not threadsafe, this is only
  1019  	// safe to use when a statement is not being parallelized. It must be reset
  1020  	// before using.
  1021  	planner planner
  1022  	// phaseTimes tracks session- and transaction-level phase times. It is
  1023  	// copied-by-value when resetting statsCollector before executing each
  1024  	// statement.
  1025  	phaseTimes phaseTimes
  1026  
  1027  	// statsCollector is used to collect statistics about SQL statements and
  1028  	// transactions.
  1029  	statsCollector *sqlStatsCollector
  1030  
  1031  	// mu contains of all elements of the struct that can be changed
  1032  	// after initialization, and may be accessed from another thread.
  1033  	mu struct {
  1034  		syncutil.RWMutex
  1035  
  1036  		// ActiveQueries contains all queries in flight.
  1037  		ActiveQueries map[ClusterWideID]*queryMeta
  1038  
  1039  		// LastActiveQuery contains a reference to the AST of the last
  1040  		// query that ran on this session.
  1041  		LastActiveQuery tree.Statement
  1042  	}
  1043  
  1044  	// curStmt is the statement that's currently being prepared or executed, if
  1045  	// any. This is printed by high-level panic recovery.
  1046  	curStmt tree.Statement
  1047  
  1048  	sessionID ClusterWideID
  1049  
  1050  	// activated determines whether activate() was called already.
  1051  	// When this is set, close() must be called to release resources.
  1052  	activated bool
  1053  
  1054  	// draining is set if we've received a DrainRequest. Once this is set, we're
  1055  	// going to find a suitable time to close the connection.
  1056  	draining bool
  1057  
  1058  	// executorType is set to whether this executor is an ordinary executor which
  1059  	// responds to user queries or an internal one.
  1060  	executorType executorType
  1061  
  1062  	// hasCreatedTemporarySchema is set if the executor has created a
  1063  	// temporary schema, which requires special cleanup on close.
  1064  	hasCreatedTemporarySchema bool
  1065  
  1066  	// stmtDiagnosticsRecorder is used to track which queries need to have
  1067  	// information collected.
  1068  	stmtDiagnosticsRecorder StmtDiagnosticsRecorder
  1069  }
  1070  
  1071  // ctxHolder contains a connection's context and, while session tracing is
  1072  // enabled, a derived context with a recording span. The connExecutor should use
  1073  // the latter while session tracing is active, or the former otherwise; that's
  1074  // what the ctx() method returns.
  1075  type ctxHolder struct {
  1076  	connCtx           context.Context
  1077  	sessionTracingCtx context.Context
  1078  }
  1079  
  1080  func (ch *ctxHolder) ctx() context.Context {
  1081  	if ch.sessionTracingCtx != nil {
  1082  		return ch.sessionTracingCtx
  1083  	}
  1084  	return ch.connCtx
  1085  }
  1086  
  1087  func (ch *ctxHolder) hijack(sessionTracingCtx context.Context) {
  1088  	if ch.sessionTracingCtx != nil {
  1089  		panic("hijack already in effect")
  1090  	}
  1091  	ch.sessionTracingCtx = sessionTracingCtx
  1092  }
  1093  
  1094  func (ch *ctxHolder) unhijack() {
  1095  	if ch.sessionTracingCtx == nil {
  1096  		panic("hijack not in effect")
  1097  	}
  1098  	ch.sessionTracingCtx = nil
  1099  }
  1100  
  1101  type prepStmtNamespace struct {
  1102  	// prepStmts contains the prepared statements currently available on the
  1103  	// session.
  1104  	prepStmts map[string]*PreparedStatement
  1105  	// portals contains the portals currently available on the session.
  1106  	portals map[string]*PreparedPortal
  1107  }
  1108  
  1109  func (ns prepStmtNamespace) String() string {
  1110  	var sb strings.Builder
  1111  	sb.WriteString("Prep stmts: ")
  1112  	for name := range ns.prepStmts {
  1113  		sb.WriteString(name + " ")
  1114  	}
  1115  	sb.WriteString("Portals: ")
  1116  	for name := range ns.portals {
  1117  		sb.WriteString(name + " ")
  1118  	}
  1119  	return sb.String()
  1120  }
  1121  
  1122  // resetTo resets a namespace to equate another one (`to`). All the receiver's
  1123  // references are release and all the to's references are duplicated.
  1124  //
  1125  // An empty `to` can be passed in to deallocate everything.
  1126  func (ns *prepStmtNamespace) resetTo(ctx context.Context, to prepStmtNamespace) {
  1127  	for name, p := range ns.prepStmts {
  1128  		p.decRef(ctx)
  1129  		delete(ns.prepStmts, name)
  1130  	}
  1131  	for name, p := range ns.portals {
  1132  		p.decRef(ctx)
  1133  		delete(ns.portals, name)
  1134  	}
  1135  
  1136  	for name, ps := range to.prepStmts {
  1137  		ps.incRef(ctx)
  1138  		ns.prepStmts[name] = ps
  1139  	}
  1140  	for name, p := range to.portals {
  1141  		p.incRef(ctx)
  1142  		ns.portals[name] = p
  1143  	}
  1144  }
  1145  
  1146  // resetExtraTxnState resets the fields of ex.extraTxnState when a transaction
  1147  // commits, rolls back or restarts.
  1148  func (ex *connExecutor) resetExtraTxnState(
  1149  	ctx context.Context, dbCacheHolder *databaseCacheHolder, ev txnEvent,
  1150  ) error {
  1151  	ex.extraTxnState.jobs = nil
  1152  
  1153  	ex.extraTxnState.descCollection.ReleaseAll(ctx)
  1154  
  1155  	ex.extraTxnState.descCollection.ResetDatabaseCache(dbCacheHolder.getDatabaseCache())
  1156  
  1157  	// Close all portals.
  1158  	for name, p := range ex.extraTxnState.prepStmtsNamespace.portals {
  1159  		p.decRef(ctx)
  1160  		delete(ex.extraTxnState.prepStmtsNamespace.portals, name)
  1161  	}
  1162  
  1163  	switch ev {
  1164  	case txnCommit, txnRollback:
  1165  		ex.extraTxnState.savepoints.clear()
  1166  		// After txn is finished, we need to call onTxnFinish (if it's non-nil).
  1167  		if ex.extraTxnState.onTxnFinish != nil {
  1168  			ex.extraTxnState.onTxnFinish(ev)
  1169  			ex.extraTxnState.onTxnFinish = nil
  1170  		}
  1171  	}
  1172  	// NOTE: on txnRestart we don't need to muck with the savepoints stack. It's either a
  1173  	// a ROLLBACK TO SAVEPOINT that generated the event, and that statement deals with the
  1174  	// savepoints, or it's a rewind which also deals with them.
  1175  
  1176  	return nil
  1177  }
  1178  
  1179  // Ctx returns the transaction's ctx, if we're inside a transaction, or the
  1180  // session's context otherwise.
  1181  func (ex *connExecutor) Ctx() context.Context {
  1182  	if _, ok := ex.machine.CurState().(stateNoTxn); ok {
  1183  		return ex.ctxHolder.ctx()
  1184  	}
  1185  	// stateInternalError is used by the InternalExecutor.
  1186  	if _, ok := ex.machine.CurState().(stateInternalError); ok {
  1187  		return ex.ctxHolder.ctx()
  1188  	}
  1189  	return ex.state.Ctx
  1190  }
  1191  
  1192  // activate engages the use of resources that must be cleaned up
  1193  // afterwards. after activate() completes, the close() method must be
  1194  // called.
  1195  //
  1196  // Args:
  1197  // parentMon: The root monitor.
  1198  // reserved: Memory reserved for the connection. The connExecutor takes
  1199  //   ownership of this memory.
  1200  func (ex *connExecutor) activate(
  1201  	ctx context.Context, parentMon *mon.BytesMonitor, reserved mon.BoundAccount,
  1202  ) {
  1203  	// Note: we pass `reserved` to sessionRootMon where it causes it to act as a
  1204  	// buffer. This is not done for sessionMon nor state.mon: these monitors don't
  1205  	// start with any buffer, so they'll need to ask their "parent" for memory as
  1206  	// soon as the first allocation. This is acceptable because the session is
  1207  	// single threaded, and the point of buffering is just to avoid contention.
  1208  	ex.mon.Start(ctx, parentMon, reserved)
  1209  	ex.sessionMon.Start(ctx, ex.mon, mon.BoundAccount{})
  1210  
  1211  	// Enable the trace if configured.
  1212  	if traceSessionEventLogEnabled.Get(&ex.server.cfg.Settings.SV) {
  1213  		remoteStr := "<admin>"
  1214  		if ex.sessionData.RemoteAddr != nil {
  1215  			remoteStr = ex.sessionData.RemoteAddr.String()
  1216  		}
  1217  		ex.eventLog = trace.NewEventLog(
  1218  			fmt.Sprintf("sql session [%s]", ex.sessionData.User), remoteStr)
  1219  	}
  1220  
  1221  	ex.activated = true
  1222  }
  1223  
  1224  // run implements the run loop for a connExecutor. Commands are read one by one
  1225  // from the input buffer; they are executed and the resulting state transitions
  1226  // are performed.
  1227  //
  1228  // run returns when either the stmtBuf is closed by someone else or when an
  1229  // error is propagated from query execution. Note that query errors are not
  1230  // propagated as errors to this layer; only things that are supposed to
  1231  // terminate the session are (e.g. client communication errors and ctx
  1232  // cancelations).
  1233  // run() is expected to react on ctx cancelation, but the caller needs to also
  1234  // close the stmtBuf at the same time as canceling the ctx. If cancelation
  1235  // happens in the middle of a query execution, that's expected to interrupt the
  1236  // execution and generate an error. run() is then supposed to return because the
  1237  // buffer is closed and no further commands can be read.
  1238  //
  1239  // When this returns, ex.close() needs to be called and  the connection to the
  1240  // client needs to be terminated. If it returns with an error, that error may
  1241  // represent a communication error (in which case the connection might already
  1242  // also have an error from the reading side), or some other unexpected failure.
  1243  // Returned errors have not been communicated to the client: it's up to the
  1244  // caller to do that if it wants.
  1245  //
  1246  // If not nil, reserved represents Memory reserved for the connection. The
  1247  // connExecutor takes ownership of this memory.
  1248  //
  1249  // onCancel, if not nil, will be called when the SessionRegistry cancels the
  1250  // session. TODO(andrei): This is hooked up to canceling the pgwire connection's
  1251  // context (of which ctx is also a child). It seems uncouth for the connExecutor
  1252  // to cancel a higher-level task. A better design would probably be for pgwire
  1253  // to own the SessionRegistry, instead of it being owned by the sql.Server -
  1254  // then pgwire would directly cancel its own tasks; the sessions also more
  1255  // naturally belong there. There is a problem, however, as query cancelation (as
  1256  // opposed to session cancelation) is done through the SessionRegistry and that
  1257  // does belong with the connExecutor. Introducing a query registry, separate
  1258  // from the session registry, might be too costly - the way query cancelation
  1259  // works is that every session is asked to cancel a given query until the right
  1260  // one is found. That seems like a good performance trade-off.
  1261  func (ex *connExecutor) run(
  1262  	ctx context.Context,
  1263  	parentMon *mon.BytesMonitor,
  1264  	reserved mon.BoundAccount,
  1265  	onCancel context.CancelFunc,
  1266  ) error {
  1267  	if !ex.activated {
  1268  		ex.activate(ctx, parentMon, reserved)
  1269  	}
  1270  	ex.ctxHolder.connCtx = ctx
  1271  	ex.onCancelSession = onCancel
  1272  
  1273  	ex.sessionID = ex.generateID()
  1274  	ex.server.cfg.SessionRegistry.register(ex.sessionID, ex)
  1275  	ex.planner.extendedEvalCtx.setSessionID(ex.sessionID)
  1276  	defer ex.server.cfg.SessionRegistry.deregister(ex.sessionID)
  1277  
  1278  	for {
  1279  		ex.curStmt = nil
  1280  		if err := ctx.Err(); err != nil {
  1281  			return err
  1282  		}
  1283  
  1284  		var err error
  1285  		if err = ex.execCmd(ex.Ctx()); err != nil {
  1286  			if errors.IsAny(err, io.EOF, errDrainingComplete) {
  1287  				return nil
  1288  			}
  1289  			return err
  1290  		}
  1291  	}
  1292  }
  1293  
  1294  // errDrainingComplete is returned by execCmd when the connExecutor previously got
  1295  // a DrainRequest and the time is ripe to finish this session (i.e. we're no
  1296  // longer in a transaction).
  1297  var errDrainingComplete = fmt.Errorf("draining done. this is a good time to finish this session")
  1298  
  1299  // execCmd reads the current command from the stmtBuf and executes it. The
  1300  // transaction state is modified accordingly, and the stmtBuf is advanced or
  1301  // rewinded accordingly.
  1302  //
  1303  // Returns an error if communication of results to the client has failed and the
  1304  // session should be terminated. Returns io.EOF if the stmtBuf has been closed.
  1305  // Returns drainingComplete if the session should finish because draining is
  1306  // complete (i.e. we received a DrainRequest - possibly previously - and the
  1307  // connection is found to be idle).
  1308  func (ex *connExecutor) execCmd(ctx context.Context) error {
  1309  	cmd, pos, err := ex.stmtBuf.CurCmd()
  1310  	if err != nil {
  1311  		return err // err could be io.EOF
  1312  	}
  1313  
  1314  	ctx, sp := tracing.EnsureChildSpan(
  1315  		ctx, ex.server.cfg.AmbientCtx.Tracer,
  1316  		// We print the type of command, not the String() which includes long
  1317  		// statements.
  1318  		cmd.command())
  1319  	defer sp.Finish()
  1320  
  1321  	if log.ExpensiveLogEnabled(ctx, 2) || ex.eventLog != nil {
  1322  		ex.sessionEventf(ctx, "[%s pos:%d] executing %s",
  1323  			ex.machine.CurState(), pos, cmd)
  1324  	}
  1325  
  1326  	var ev fsm.Event
  1327  	var payload fsm.EventPayload
  1328  	var res ResultBase
  1329  
  1330  	switch tcmd := cmd.(type) {
  1331  	case ExecStmt:
  1332  		if tcmd.AST == nil {
  1333  			res = ex.clientComm.CreateEmptyQueryResult(pos)
  1334  			break
  1335  		}
  1336  		ex.curStmt = tcmd.AST
  1337  
  1338  		stmtRes := ex.clientComm.CreateStatementResult(
  1339  			tcmd.AST,
  1340  			NeedRowDesc,
  1341  			pos,
  1342  			nil, /* formatCodes */
  1343  			ex.sessionData.DataConversion,
  1344  			0,  /* limit */
  1345  			"", /* portalName */
  1346  			ex.implicitTxn(),
  1347  		)
  1348  		res = stmtRes
  1349  		curStmt := Statement{Statement: tcmd.Statement}
  1350  
  1351  		ex.phaseTimes[sessionQueryReceived] = tcmd.TimeReceived
  1352  		ex.phaseTimes[sessionStartParse] = tcmd.ParseStart
  1353  		ex.phaseTimes[sessionEndParse] = tcmd.ParseEnd
  1354  
  1355  		stmtCtx := withStatement(ctx, ex.curStmt)
  1356  		ev, payload, err = ex.execStmt(stmtCtx, curStmt, stmtRes, nil /* pinfo */)
  1357  		if err != nil {
  1358  			return err
  1359  		}
  1360  	case ExecPortal:
  1361  		// ExecPortal is handled like ExecStmt, except that the placeholder info
  1362  		// is taken from the portal.
  1363  
  1364  		portal, ok := ex.extraTxnState.prepStmtsNamespace.portals[tcmd.Name]
  1365  		if !ok {
  1366  			err := pgerror.Newf(
  1367  				pgcode.InvalidCursorName, "unknown portal %q", tcmd.Name)
  1368  			ev = eventNonRetriableErr{IsCommit: fsm.False}
  1369  			payload = eventNonRetriableErrPayload{err: err}
  1370  			res = ex.clientComm.CreateErrorResult(pos)
  1371  			break
  1372  		}
  1373  		if portal.Stmt.AST == nil {
  1374  			res = ex.clientComm.CreateEmptyQueryResult(pos)
  1375  			break
  1376  		}
  1377  
  1378  		if log.ExpensiveLogEnabled(ctx, 2) {
  1379  			log.VEventf(ctx, 2, "portal resolved to: %s", portal.Stmt.AST.String())
  1380  		}
  1381  		ex.curStmt = portal.Stmt.AST
  1382  
  1383  		pinfo := &tree.PlaceholderInfo{
  1384  			PlaceholderTypesInfo: tree.PlaceholderTypesInfo{
  1385  				TypeHints: portal.Stmt.TypeHints,
  1386  				Types:     portal.Stmt.Types,
  1387  			},
  1388  			Values: portal.Qargs,
  1389  		}
  1390  
  1391  		ex.phaseTimes[sessionQueryReceived] = tcmd.TimeReceived
  1392  		// When parsing has been done earlier, via a separate parse
  1393  		// message, it is not any more part of the statistics collected
  1394  		// for this execution. In that case, we simply report that
  1395  		// parsing took no time.
  1396  		ex.phaseTimes[sessionStartParse] = time.Time{}
  1397  		ex.phaseTimes[sessionEndParse] = time.Time{}
  1398  
  1399  		stmtRes := ex.clientComm.CreateStatementResult(
  1400  			portal.Stmt.AST,
  1401  			// The client is using the extended protocol, so no row description is
  1402  			// needed.
  1403  			DontNeedRowDesc,
  1404  			pos, portal.OutFormats,
  1405  			ex.sessionData.DataConversion,
  1406  			tcmd.Limit,
  1407  			tcmd.Name,
  1408  			ex.implicitTxn(),
  1409  		)
  1410  		res = stmtRes
  1411  		curStmt := Statement{
  1412  			Statement:     portal.Stmt.Statement,
  1413  			Prepared:      portal.Stmt,
  1414  			ExpectedTypes: portal.Stmt.Columns,
  1415  			AnonymizedStr: portal.Stmt.AnonymizedStr,
  1416  		}
  1417  		stmtCtx := withStatement(ctx, ex.curStmt)
  1418  		ev, payload, err = ex.execStmt(stmtCtx, curStmt, stmtRes, pinfo)
  1419  		if err != nil {
  1420  			return err
  1421  		}
  1422  	case PrepareStmt:
  1423  		ex.curStmt = tcmd.AST
  1424  		res = ex.clientComm.CreatePrepareResult(pos)
  1425  		stmtCtx := withStatement(ctx, ex.curStmt)
  1426  		ev, payload = ex.execPrepare(stmtCtx, tcmd)
  1427  	case DescribeStmt:
  1428  		descRes := ex.clientComm.CreateDescribeResult(pos)
  1429  		res = descRes
  1430  		ev, payload = ex.execDescribe(ctx, tcmd, descRes)
  1431  	case BindStmt:
  1432  		res = ex.clientComm.CreateBindResult(pos)
  1433  		ev, payload = ex.execBind(ctx, tcmd)
  1434  	case DeletePreparedStmt:
  1435  		res = ex.clientComm.CreateDeleteResult(pos)
  1436  		ev, payload = ex.execDelPrepStmt(ctx, tcmd)
  1437  	case SendError:
  1438  		res = ex.clientComm.CreateErrorResult(pos)
  1439  		ev = eventNonRetriableErr{IsCommit: fsm.False}
  1440  		payload = eventNonRetriableErrPayload{err: tcmd.Err}
  1441  	case Sync:
  1442  		// Note that the Sync result will flush results to the network connection.
  1443  		res = ex.clientComm.CreateSyncResult(pos)
  1444  		if ex.draining {
  1445  			// If we're draining, check whether this is a good time to finish the
  1446  			// connection. If we're not inside a transaction, we stop processing
  1447  			// now. If we are inside a transaction, we'll check again the next time
  1448  			// a Sync is processed.
  1449  			if ex.idleConn() {
  1450  				// If we're about to close the connection, close res in order to flush
  1451  				// now, as we won't have an opportunity to do it later.
  1452  				res.Close(ctx, stateToTxnStatusIndicator(ex.machine.CurState()))
  1453  				return errDrainingComplete
  1454  			}
  1455  		}
  1456  	case CopyIn:
  1457  		res = ex.clientComm.CreateCopyInResult(pos)
  1458  		var err error
  1459  		ev, payload, err = ex.execCopyIn(ctx, tcmd)
  1460  		if err != nil {
  1461  			return err
  1462  		}
  1463  	case DrainRequest:
  1464  		// We received a drain request. We terminate immediately if we're not in a
  1465  		// transaction. If we are in a transaction, we'll finish as soon as a Sync
  1466  		// command (i.e. the end of a batch) is processed outside of a
  1467  		// transaction.
  1468  		ex.draining = true
  1469  		res = ex.clientComm.CreateDrainResult(pos)
  1470  		if ex.idleConn() {
  1471  			return errDrainingComplete
  1472  		}
  1473  	case Flush:
  1474  		// Closing the res will flush the connection's buffer.
  1475  		res = ex.clientComm.CreateFlushResult(pos)
  1476  	default:
  1477  		panic(fmt.Sprintf("unsupported command type: %T", cmd))
  1478  	}
  1479  
  1480  	var advInfo advanceInfo
  1481  
  1482  	// If an event was generated, feed it to the state machine.
  1483  	if ev != nil {
  1484  		var err error
  1485  		advInfo, err = ex.txnStateTransitionsApplyWrapper(ev, payload, res, pos)
  1486  		if err != nil {
  1487  			return err
  1488  		}
  1489  	} else {
  1490  		// If no event was generated synthesize an advance code.
  1491  		advInfo = advanceInfo{
  1492  			code: advanceOne,
  1493  		}
  1494  	}
  1495  
  1496  	// Decide if we need to close the result or not. We don't need to do it if
  1497  	// we're staying in place or rewinding - the statement will be executed
  1498  	// again.
  1499  	if advInfo.code != stayInPlace && advInfo.code != rewind {
  1500  		// Close the result. In case of an execution error, the result might have
  1501  		// its error set already or it might not.
  1502  		resErr := res.Err()
  1503  
  1504  		pe, ok := payload.(payloadWithError)
  1505  		if ok {
  1506  			ex.sessionEventf(ctx, "execution error: %s", pe.errorCause())
  1507  			if resErr == nil {
  1508  				res.SetError(pe.errorCause())
  1509  			}
  1510  		}
  1511  		res.Close(ctx, stateToTxnStatusIndicator(ex.machine.CurState()))
  1512  	} else {
  1513  		res.Discard()
  1514  	}
  1515  
  1516  	// Move the cursor according to what the state transition told us to do.
  1517  	switch advInfo.code {
  1518  	case advanceOne:
  1519  		ex.stmtBuf.AdvanceOne()
  1520  	case skipBatch:
  1521  		// We'll flush whatever results we have to the network. The last one must
  1522  		// be an error. This flush may seem unnecessary, as we generally only
  1523  		// flush when the client requests it through a Sync or a Flush but without
  1524  		// it the Node.js driver isn't happy. That driver likes to send "flush"
  1525  		// command and only sends Syncs once it received some data. But we ignore
  1526  		// flush commands (just like we ignore any other commands) when skipping
  1527  		// to the next batch.
  1528  		if err := ex.clientComm.Flush(pos); err != nil {
  1529  			return err
  1530  		}
  1531  		if err := ex.stmtBuf.seekToNextBatch(); err != nil {
  1532  			return err
  1533  		}
  1534  	case rewind:
  1535  		ex.rewindPrepStmtNamespace(ctx)
  1536  		ex.extraTxnState.savepoints = ex.extraTxnState.savepointsAtTxnRewindPos
  1537  		advInfo.rewCap.rewindAndUnlock(ctx)
  1538  	case stayInPlace:
  1539  		// Nothing to do. The same statement will be executed again.
  1540  	default:
  1541  		panic(fmt.Sprintf("unexpected advance code: %s", advInfo.code))
  1542  	}
  1543  
  1544  	if err := ex.updateTxnRewindPosMaybe(ctx, cmd, pos, advInfo); err != nil {
  1545  		return err
  1546  	}
  1547  
  1548  	if rewindCapability, canRewind := ex.getRewindTxnCapability(); !canRewind {
  1549  		// Trim statements that cannot be retried to reclaim memory.
  1550  		ex.stmtBuf.ltrim(ctx, pos)
  1551  	} else {
  1552  		rewindCapability.close()
  1553  	}
  1554  
  1555  	if ex.server.cfg.TestingKnobs.AfterExecCmd != nil {
  1556  		ex.server.cfg.TestingKnobs.AfterExecCmd(ctx, cmd, ex.stmtBuf)
  1557  	}
  1558  
  1559  	return nil
  1560  }
  1561  
  1562  func (ex *connExecutor) idleConn() bool {
  1563  	switch ex.machine.CurState().(type) {
  1564  	case stateNoTxn:
  1565  		return true
  1566  	case stateInternalError:
  1567  		return true
  1568  	default:
  1569  		return false
  1570  	}
  1571  }
  1572  
  1573  // updateTxnRewindPosMaybe checks whether the ex.extraTxnState.txnRewindPos
  1574  // should be advanced, based on the advInfo produced by running cmd at position
  1575  // pos.
  1576  func (ex *connExecutor) updateTxnRewindPosMaybe(
  1577  	ctx context.Context, cmd Command, pos CmdPos, advInfo advanceInfo,
  1578  ) error {
  1579  	// txnRewindPos is only maintained while in stateOpen.
  1580  	if _, ok := ex.machine.CurState().(stateOpen); !ok {
  1581  		return nil
  1582  	}
  1583  	if advInfo.txnEvent == txnStart || advInfo.txnEvent == txnRestart {
  1584  		var nextPos CmdPos
  1585  		switch advInfo.code {
  1586  		case stayInPlace:
  1587  			nextPos = pos
  1588  		case advanceOne:
  1589  			// Future rewinds will refer to the next position; the statement that
  1590  			// started the transaction (i.e. BEGIN) will not be itself be executed
  1591  			// again.
  1592  			nextPos = pos + 1
  1593  		case rewind:
  1594  			if advInfo.rewCap.rewindPos != ex.extraTxnState.txnRewindPos {
  1595  				return errors.AssertionFailedf(
  1596  					"unexpected rewind position: %d when txn start is: %d",
  1597  					errors.Safe(advInfo.rewCap.rewindPos),
  1598  					errors.Safe(ex.extraTxnState.txnRewindPos))
  1599  			}
  1600  			// txnRewindPos stays unchanged.
  1601  			return nil
  1602  		default:
  1603  			return errors.AssertionFailedf(
  1604  				"unexpected advance code when starting a txn: %s",
  1605  				errors.Safe(advInfo.code))
  1606  		}
  1607  		ex.setTxnRewindPos(ctx, nextPos)
  1608  	} else {
  1609  		// See if we can advance the rewind point even if this is not the point
  1610  		// where the transaction started. We can do that after running a special
  1611  		// statement (e.g. SET TRANSACTION or SAVEPOINT) or after most commands that
  1612  		// don't execute statements.
  1613  		// The idea is that, for example, we don't want the following sequence to
  1614  		// disable retries for what comes after the sequence:
  1615  		// 1: PrepareStmt BEGIN
  1616  		// 2: BindStmt
  1617  		// 3: ExecutePortal
  1618  		// 4: Sync
  1619  
  1620  		// Note that the current command cannot influence the rewind point if
  1621  		// if the rewind point is not current set to the command's position
  1622  		// (i.e. we don't do anything if txnRewindPos != pos).
  1623  
  1624  		if advInfo.code != advanceOne {
  1625  			panic(fmt.Sprintf("unexpected advanceCode: %s", advInfo.code))
  1626  		}
  1627  
  1628  		var canAdvance bool
  1629  		_, inOpen := ex.machine.CurState().(stateOpen)
  1630  		if inOpen && (ex.extraTxnState.txnRewindPos == pos) {
  1631  			switch tcmd := cmd.(type) {
  1632  			case ExecStmt:
  1633  				canAdvance = ex.stmtDoesntNeedRetry(tcmd.AST)
  1634  			case ExecPortal:
  1635  				portal := ex.extraTxnState.prepStmtsNamespace.portals[tcmd.Name]
  1636  				canAdvance = ex.stmtDoesntNeedRetry(portal.Stmt.AST)
  1637  			case PrepareStmt:
  1638  				canAdvance = true
  1639  			case DescribeStmt:
  1640  				canAdvance = true
  1641  			case BindStmt:
  1642  				canAdvance = true
  1643  			case DeletePreparedStmt:
  1644  				canAdvance = true
  1645  			case SendError:
  1646  				canAdvance = true
  1647  			case Sync:
  1648  				canAdvance = true
  1649  			case CopyIn:
  1650  				// Can't advance.
  1651  			case DrainRequest:
  1652  				canAdvance = true
  1653  			case Flush:
  1654  				canAdvance = true
  1655  			default:
  1656  				panic(fmt.Sprintf("unsupported cmd: %T", cmd))
  1657  			}
  1658  			if canAdvance {
  1659  				ex.setTxnRewindPos(ctx, pos+1)
  1660  			}
  1661  		}
  1662  	}
  1663  	return nil
  1664  }
  1665  
  1666  // setTxnRewindPos updates the position to which future rewinds will refer.
  1667  //
  1668  // All statements with lower position in stmtBuf (if any) are removed, as we
  1669  // won't ever need them again.
  1670  func (ex *connExecutor) setTxnRewindPos(ctx context.Context, pos CmdPos) {
  1671  	if pos <= ex.extraTxnState.txnRewindPos {
  1672  		panic(fmt.Sprintf("can only move the  txnRewindPos forward. "+
  1673  			"Was: %d; new value: %d", ex.extraTxnState.txnRewindPos, pos))
  1674  	}
  1675  	ex.extraTxnState.txnRewindPos = pos
  1676  	ex.stmtBuf.ltrim(ctx, pos)
  1677  	ex.commitPrepStmtNamespace(ctx)
  1678  	ex.extraTxnState.savepointsAtTxnRewindPos = ex.extraTxnState.savepoints.clone()
  1679  }
  1680  
  1681  // stmtDoesntNeedRetry returns true if the given statement does not need to be
  1682  // retried when performing automatic retries. This means that the results of the
  1683  // statement do not change with retries.
  1684  func (ex *connExecutor) stmtDoesntNeedRetry(stmt tree.Statement) bool {
  1685  	wrap := Statement{Statement: parser.Statement{AST: stmt}}
  1686  	return isSavepoint(wrap) || isSetTransaction(wrap)
  1687  }
  1688  
  1689  func stateToTxnStatusIndicator(s fsm.State) TransactionStatusIndicator {
  1690  	switch s.(type) {
  1691  	case stateOpen:
  1692  		return InTxnBlock
  1693  	case stateAborted:
  1694  		return InFailedTxnBlock
  1695  	case stateNoTxn:
  1696  		return IdleTxnBlock
  1697  	case stateCommitWait:
  1698  		return InTxnBlock
  1699  	case stateInternalError:
  1700  		return InTxnBlock
  1701  	default:
  1702  		panic(fmt.Sprintf("unknown state: %T", s))
  1703  	}
  1704  }
  1705  
  1706  // We handle the CopyFrom statement by creating a copyMachine and handing it
  1707  // control over the connection until the copying is done. The contract is that,
  1708  // when this is called, the pgwire.conn is not reading from the network
  1709  // connection any more until this returns. The copyMachine will to the reading
  1710  // and writing up to the CommandComplete message.
  1711  func (ex *connExecutor) execCopyIn(
  1712  	ctx context.Context, cmd CopyIn,
  1713  ) (fsm.Event, fsm.EventPayload, error) {
  1714  
  1715  	// When we're done, unblock the network connection.
  1716  	defer cmd.CopyDone.Done()
  1717  
  1718  	state := ex.machine.CurState()
  1719  	_, isNoTxn := state.(stateNoTxn)
  1720  	_, isOpen := state.(stateOpen)
  1721  	if !isNoTxn && !isOpen {
  1722  		ev := eventNonRetriableErr{IsCommit: fsm.False}
  1723  		payload := eventNonRetriableErrPayload{
  1724  			err: sqlbase.NewTransactionAbortedError("" /* customMsg */)}
  1725  		return ev, payload, nil
  1726  	}
  1727  
  1728  	// If we're in an explicit txn, then the copying will be done within that
  1729  	// txn. Otherwise, we tell the copyMachine to manage its own transactions.
  1730  	var txnOpt copyTxnOpt
  1731  	if isOpen {
  1732  		txnOpt = copyTxnOpt{
  1733  			txn:           ex.state.mu.txn,
  1734  			txnTimestamp:  ex.state.sqlTimestamp,
  1735  			stmtTimestamp: ex.server.cfg.Clock.PhysicalTime(),
  1736  		}
  1737  	}
  1738  
  1739  	var monToStop *mon.BytesMonitor
  1740  	defer func() {
  1741  		if monToStop != nil {
  1742  			monToStop.Stop(ctx)
  1743  		}
  1744  	}()
  1745  	if isNoTxn {
  1746  		// HACK: We're reaching inside ex.state and starting the monitor. Normally
  1747  		// that's driven by the state machine, but we're bypassing the state machine
  1748  		// here.
  1749  		ex.state.mon.Start(ctx, ex.sessionMon, mon.BoundAccount{} /* reserved */)
  1750  		monToStop = ex.state.mon
  1751  	}
  1752  	txnOpt.resetPlanner = func(ctx context.Context, p *planner, txn *kv.Txn, txnTS time.Time, stmtTS time.Time) {
  1753  		// HACK: We're reaching inside ex.state and changing sqlTimestamp by hand.
  1754  		// It is used by resetPlanner. Normally sqlTimestamp is updated by the
  1755  		// state machine, but the copyMachine manages its own transactions without
  1756  		// going through the state machine.
  1757  		ex.state.sqlTimestamp = txnTS
  1758  		ex.statsCollector = ex.newStatsCollector()
  1759  		ex.statsCollector.reset(&ex.server.sqlStats, ex.appStats, &ex.phaseTimes)
  1760  		ex.initPlanner(ctx, p)
  1761  		ex.resetPlanner(ctx, p, txn, stmtTS)
  1762  	}
  1763  	var cm copyMachineInterface
  1764  	var err error
  1765  	if table := cmd.Stmt.Table; table.Table() == fileUploadTable && table.Schema() == crdbInternalName {
  1766  		cm, err = newFileUploadMachine(ctx, cmd.Conn, cmd.Stmt, txnOpt, ex.server.cfg)
  1767  	} else {
  1768  		cm, err = newCopyMachine(
  1769  			ctx, cmd.Conn, cmd.Stmt, txnOpt, ex.server.cfg,
  1770  			// execInsertPlan
  1771  			func(ctx context.Context, p *planner, res RestrictedCommandResult) error {
  1772  				_, _, err := ex.execWithDistSQLEngine(ctx, p, tree.RowsAffected, res, false /* distribute */, nil /* progressAtomic */)
  1773  				return err
  1774  			},
  1775  		)
  1776  	}
  1777  	if err != nil {
  1778  		ev := eventNonRetriableErr{IsCommit: fsm.False}
  1779  		payload := eventNonRetriableErrPayload{err: err}
  1780  		return ev, payload, nil
  1781  	}
  1782  	if err := cm.run(ctx); err != nil {
  1783  		// TODO(andrei): We don't have a retriable error story for the copy machine.
  1784  		// When running outside of a txn, the copyMachine should probably do retries
  1785  		// internally. When not, it's unclear what we should do. For now, we abort
  1786  		// the txn (if any).
  1787  		// We also don't have a story for distinguishing communication errors (which
  1788  		// should terminate the connection) from query errors. For now, we treat all
  1789  		// errors as query errors.
  1790  		ev := eventNonRetriableErr{IsCommit: fsm.False}
  1791  		payload := eventNonRetriableErrPayload{err: err}
  1792  		return ev, payload, nil
  1793  	}
  1794  	return nil, nil, nil
  1795  }
  1796  
  1797  // stmtHasNoData returns true if describing a result of the input statement
  1798  // type should return NoData.
  1799  func stmtHasNoData(stmt tree.Statement) bool {
  1800  	return stmt == nil || stmt.StatementType() != tree.Rows
  1801  }
  1802  
  1803  // generateID generates a unique ID based on the SQL instance ID and its current
  1804  // HLC timestamp. These IDs are either scoped at the query level or at the
  1805  // session level.
  1806  func (ex *connExecutor) generateID() ClusterWideID {
  1807  	return GenerateClusterWideID(ex.server.cfg.Clock.Now(), ex.server.cfg.NodeID.SQLInstanceID())
  1808  }
  1809  
  1810  // commitPrepStmtNamespace deallocates everything in
  1811  // prepStmtsNamespaceAtTxnRewindPos that's not part of prepStmtsNamespace.
  1812  func (ex *connExecutor) commitPrepStmtNamespace(ctx context.Context) {
  1813  	ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos.resetTo(
  1814  		ctx, ex.extraTxnState.prepStmtsNamespace)
  1815  }
  1816  
  1817  // commitPrepStmtNamespace deallocates everything in prepStmtsNamespace that's
  1818  // not part of prepStmtsNamespaceAtTxnRewindPos.
  1819  func (ex *connExecutor) rewindPrepStmtNamespace(ctx context.Context) {
  1820  	ex.extraTxnState.prepStmtsNamespace.resetTo(
  1821  		ctx, ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos)
  1822  }
  1823  
  1824  // getRewindTxnCapability checks whether rewinding to the position previously
  1825  // set through setTxnRewindPos() is possible and, if it is, returns a
  1826  // rewindCapability bound to that position. The returned bool is true if the
  1827  // rewind is possible. If it is, client communication is blocked until the
  1828  // rewindCapability is exercised.
  1829  func (ex *connExecutor) getRewindTxnCapability() (rewindCapability, bool) {
  1830  	cl := ex.clientComm.LockCommunication()
  1831  
  1832  	// If we already delivered results at or past the start position, we can't
  1833  	// rewind.
  1834  	if cl.ClientPos() >= ex.extraTxnState.txnRewindPos {
  1835  		cl.Close()
  1836  		return rewindCapability{}, false
  1837  	}
  1838  	return rewindCapability{
  1839  		cl:        cl,
  1840  		buf:       ex.stmtBuf,
  1841  		rewindPos: ex.extraTxnState.txnRewindPos,
  1842  	}, true
  1843  }
  1844  
  1845  // isCommit returns true if stmt is a "COMMIT" statement.
  1846  func isCommit(stmt tree.Statement) bool {
  1847  	_, ok := stmt.(*tree.CommitTransaction)
  1848  	return ok
  1849  }
  1850  
  1851  func errIsRetriable(err error) bool {
  1852  	return errors.HasType(err, (*roachpb.TransactionRetryWithProtoRefreshError)(nil))
  1853  }
  1854  
  1855  // makeErrEvent takes an error and returns either an eventRetriableErr or an
  1856  // eventNonRetriableErr, depending on the error type.
  1857  func (ex *connExecutor) makeErrEvent(err error, stmt tree.Statement) (fsm.Event, fsm.EventPayload) {
  1858  	retriable := errIsRetriable(err)
  1859  	if retriable {
  1860  		rc, canAutoRetry := ex.getRewindTxnCapability()
  1861  		ev := eventRetriableErr{
  1862  			IsCommit:     fsm.FromBool(isCommit(stmt)),
  1863  			CanAutoRetry: fsm.FromBool(canAutoRetry),
  1864  		}
  1865  		payload := eventRetriableErrPayload{
  1866  			err:    err,
  1867  			rewCap: rc,
  1868  		}
  1869  		return ev, payload
  1870  	}
  1871  	ev := eventNonRetriableErr{
  1872  		IsCommit: fsm.FromBool(isCommit(stmt)),
  1873  	}
  1874  	payload := eventNonRetriableErrPayload{err: err}
  1875  	return ev, payload
  1876  }
  1877  
  1878  // setTransactionModes implements the txnModesSetter interface.
  1879  func (ex *connExecutor) setTransactionModes(
  1880  	modes tree.TransactionModes, asOfTs hlc.Timestamp,
  1881  ) error {
  1882  	// This method cheats and manipulates ex.state directly, not through an event.
  1883  	// The alternative would be to create a special event, but it's unclear how
  1884  	// that'd work given that this method is called while executing a statement.
  1885  
  1886  	// Transform the transaction options into the types needed by the state
  1887  	// machine.
  1888  	if modes.UserPriority != tree.UnspecifiedUserPriority {
  1889  		pri := txnPriorityToProto(modes.UserPriority)
  1890  		if err := ex.state.setPriority(pri); err != nil {
  1891  			return err
  1892  		}
  1893  	}
  1894  	if modes.Isolation != tree.UnspecifiedIsolation && modes.Isolation != tree.SerializableIsolation {
  1895  		return errors.AssertionFailedf(
  1896  			"unknown isolation level: %s", errors.Safe(modes.Isolation))
  1897  	}
  1898  	rwMode := modes.ReadWriteMode
  1899  	if modes.AsOf.Expr != nil && (asOfTs == hlc.Timestamp{}) {
  1900  		return errors.AssertionFailedf("expected an evaluated AS OF timestamp")
  1901  	}
  1902  	if (asOfTs != hlc.Timestamp{}) {
  1903  		ex.state.setHistoricalTimestamp(ex.Ctx(), asOfTs)
  1904  		ex.state.sqlTimestamp = asOfTs.GoTime()
  1905  		if rwMode == tree.UnspecifiedReadWriteMode {
  1906  			rwMode = tree.ReadOnly
  1907  		}
  1908  	}
  1909  	return ex.state.setReadOnlyMode(rwMode)
  1910  }
  1911  
  1912  func txnPriorityToProto(mode tree.UserPriority) roachpb.UserPriority {
  1913  	var pri roachpb.UserPriority
  1914  	switch mode {
  1915  	case tree.UnspecifiedUserPriority:
  1916  		pri = roachpb.NormalUserPriority
  1917  	case tree.Low:
  1918  		pri = roachpb.MinUserPriority
  1919  	case tree.Normal:
  1920  		pri = roachpb.NormalUserPriority
  1921  	case tree.High:
  1922  		pri = roachpb.MaxUserPriority
  1923  	default:
  1924  		log.Fatalf(context.Background(), "unknown user priority: %s", mode)
  1925  	}
  1926  	return pri
  1927  }
  1928  
  1929  func (ex *connExecutor) txnPriorityWithSessionDefault(mode tree.UserPriority) roachpb.UserPriority {
  1930  	if mode == tree.UnspecifiedUserPriority {
  1931  		mode = tree.UserPriority(ex.sessionData.DefaultTxnPriority)
  1932  	}
  1933  	return txnPriorityToProto(mode)
  1934  }
  1935  
  1936  func (ex *connExecutor) readWriteModeWithSessionDefault(
  1937  	mode tree.ReadWriteMode,
  1938  ) tree.ReadWriteMode {
  1939  	if mode == tree.UnspecifiedReadWriteMode {
  1940  		if ex.sessionData.DefaultReadOnly {
  1941  			return tree.ReadOnly
  1942  		}
  1943  		return tree.ReadWrite
  1944  	}
  1945  	return mode
  1946  }
  1947  
  1948  // initEvalCtx initializes the fields of an extendedEvalContext that stay the
  1949  // same across multiple statements. resetEvalCtx must also be called before each
  1950  // statement, to reinitialize other fields.
  1951  func (ex *connExecutor) initEvalCtx(ctx context.Context, evalCtx *extendedEvalContext, p *planner) {
  1952  	scInterface := newSchemaInterface(&ex.extraTxnState.descCollection, ex.server.cfg.VirtualSchemas)
  1953  
  1954  	ie := MakeInternalExecutor(
  1955  		ctx,
  1956  		ex.server,
  1957  		ex.memMetrics,
  1958  		ex.server.cfg.Settings,
  1959  	)
  1960  	ie.SetSessionData(ex.sessionData)
  1961  
  1962  	*evalCtx = extendedEvalContext{
  1963  		EvalContext: tree.EvalContext{
  1964  			Planner:            p,
  1965  			PrivilegedAccessor: p,
  1966  			SessionAccessor:    p,
  1967  			ClientNoticeSender: p,
  1968  			Sequence:           p,
  1969  			Tenant:             p,
  1970  			SessionData:        ex.sessionData,
  1971  			Settings:           ex.server.cfg.Settings,
  1972  			TestingKnobs:       ex.server.cfg.EvalContextTestingKnobs,
  1973  			ClusterID:          ex.server.cfg.ClusterID(),
  1974  			ClusterName:        ex.server.cfg.RPCContext.ClusterName(),
  1975  			NodeID:             ex.server.cfg.NodeID,
  1976  			Codec:              ex.server.cfg.Codec,
  1977  			Locality:           ex.server.cfg.Locality,
  1978  			ReCache:            ex.server.reCache,
  1979  			InternalExecutor:   &ie,
  1980  			DB:                 ex.server.cfg.DB,
  1981  		},
  1982  		SessionMutator:    ex.dataMutator,
  1983  		VirtualSchemas:    ex.server.cfg.VirtualSchemas,
  1984  		Tracing:           &ex.sessionTracing,
  1985  		StatusServer:      ex.server.cfg.StatusServer,
  1986  		MemMetrics:        &ex.memMetrics,
  1987  		Descs:             &ex.extraTxnState.descCollection,
  1988  		ExecCfg:           ex.server.cfg,
  1989  		DistSQLPlanner:    ex.server.cfg.DistSQLPlanner,
  1990  		TxnModesSetter:    ex,
  1991  		Jobs:              &ex.extraTxnState.jobs,
  1992  		schemaAccessors:   scInterface,
  1993  		sqlStatsCollector: ex.statsCollector,
  1994  	}
  1995  }
  1996  
  1997  // resetEvalCtx initializes the fields of evalCtx that can change
  1998  // during a session (i.e. the fields not set by initEvalCtx).
  1999  //
  2000  // stmtTS is the timestamp that the statement_timestamp() SQL builtin will
  2001  // return for statements executed with this evalCtx. Since generally each
  2002  // statement is supposed to have a different timestamp, the evalCtx generally
  2003  // shouldn't be reused across statements.
  2004  func (ex *connExecutor) resetEvalCtx(evalCtx *extendedEvalContext, txn *kv.Txn, stmtTS time.Time) {
  2005  	evalCtx.TxnState = ex.getTransactionState()
  2006  	evalCtx.TxnReadOnly = ex.state.readOnly
  2007  	evalCtx.TxnImplicit = ex.implicitTxn()
  2008  	evalCtx.StmtTimestamp = stmtTS
  2009  	evalCtx.TxnTimestamp = ex.state.sqlTimestamp
  2010  	evalCtx.Placeholders = nil
  2011  	evalCtx.Annotations = nil
  2012  	evalCtx.IVarContainer = nil
  2013  	evalCtx.Context = ex.Ctx()
  2014  	evalCtx.Txn = txn
  2015  	evalCtx.Mon = ex.state.mon
  2016  	evalCtx.PrepareOnly = false
  2017  	evalCtx.SkipNormalize = false
  2018  }
  2019  
  2020  // getTransactionState retrieves a text representation of the given state.
  2021  func (ex *connExecutor) getTransactionState() string {
  2022  	state := ex.machine.CurState()
  2023  	if ex.implicitTxn() {
  2024  		// If the statement reading the state is in an implicit transaction, then we
  2025  		// want to tell NoTxn to the client.
  2026  		state = stateNoTxn{}
  2027  	}
  2028  	return state.(fmt.Stringer).String()
  2029  }
  2030  
  2031  func (ex *connExecutor) implicitTxn() bool {
  2032  	state := ex.machine.CurState()
  2033  	os, ok := state.(stateOpen)
  2034  	return ok && os.ImplicitTxn.Get()
  2035  }
  2036  
  2037  // initPlanner initializes a planner so it can can be used for planning a
  2038  // query in the context of this session.
  2039  func (ex *connExecutor) initPlanner(ctx context.Context, p *planner) {
  2040  	p.cancelChecker = sqlbase.NewCancelChecker(ctx)
  2041  
  2042  	ex.initEvalCtx(ctx, &p.extendedEvalCtx, p)
  2043  
  2044  	p.sessionDataMutator = ex.dataMutator
  2045  	p.noticeSender = nil
  2046  	p.preparedStatements = ex.getPrepStmtsAccessor()
  2047  
  2048  	p.queryCacheSession.Init()
  2049  	p.optPlanningCtx.init(p)
  2050  }
  2051  
  2052  func (ex *connExecutor) resetPlanner(
  2053  	ctx context.Context, p *planner, txn *kv.Txn, stmtTS time.Time,
  2054  ) {
  2055  	p.txn = txn
  2056  	p.stmt = nil
  2057  
  2058  	p.cancelChecker.Reset(ctx)
  2059  
  2060  	p.semaCtx = tree.MakeSemaContext()
  2061  	p.semaCtx.Location = &ex.sessionData.DataConversion.Location
  2062  	p.semaCtx.SearchPath = ex.sessionData.SearchPath
  2063  	p.semaCtx.AsOfTimestamp = nil
  2064  	p.semaCtx.Annotations = nil
  2065  	p.semaCtx.TypeResolver = p
  2066  
  2067  	ex.resetEvalCtx(&p.extendedEvalCtx, txn, stmtTS)
  2068  
  2069  	p.autoCommit = false
  2070  	p.isPreparing = false
  2071  	p.avoidCachedDescriptors = false
  2072  	p.discardRows = false
  2073  	p.collectBundle = false
  2074  }
  2075  
  2076  // txnStateTransitionsApplyWrapper is a wrapper on top of Machine built with the
  2077  // TxnStateTransitions above. Its point is to detect when we go in and out of
  2078  // transactions and update some state.
  2079  //
  2080  // Any returned error indicates an unrecoverable error for the session;
  2081  // execution on this connection should be interrupted.
  2082  func (ex *connExecutor) txnStateTransitionsApplyWrapper(
  2083  	ev fsm.Event, payload fsm.EventPayload, res ResultBase, pos CmdPos,
  2084  ) (advanceInfo, error) {
  2085  	var implicitTxn bool
  2086  	if os, ok := ex.machine.CurState().(stateOpen); ok {
  2087  		implicitTxn = os.ImplicitTxn.Get()
  2088  	}
  2089  
  2090  	err := ex.machine.ApplyWithPayload(withStatement(ex.Ctx(), ex.curStmt), ev, payload)
  2091  	if err != nil {
  2092  		if errors.HasType(err, (*fsm.TransitionNotFoundError)(nil)) {
  2093  			panic(err)
  2094  		}
  2095  		return advanceInfo{}, err
  2096  	}
  2097  
  2098  	advInfo := ex.state.consumeAdvanceInfo()
  2099  
  2100  	if advInfo.code == rewind {
  2101  		ex.extraTxnState.autoRetryCounter++
  2102  	}
  2103  
  2104  	// Handle transaction events which cause updates to txnState.
  2105  	switch advInfo.txnEvent {
  2106  	case noEvent:
  2107  	case txnStart:
  2108  		ex.extraTxnState.autoRetryCounter = 0
  2109  		ex.extraTxnState.onTxnFinish = ex.recordTransactionStart()
  2110  	case txnCommit:
  2111  		if res.Err() != nil {
  2112  			err := errorutil.UnexpectedWithIssueErrorf(
  2113  				26687,
  2114  				"programming error: non-error event %s generated even though res.Err() has been set to: %s",
  2115  				errors.Safe(advInfo.txnEvent.String()),
  2116  				res.Err())
  2117  			log.Errorf(ex.Ctx(), "%v", err)
  2118  			errorutil.SendReport(ex.Ctx(), &ex.server.cfg.Settings.SV, err)
  2119  			return advanceInfo{}, err
  2120  		}
  2121  
  2122  		handleErr := func(err error) {
  2123  			if implicitTxn {
  2124  				// The schema change/job failed but it was also the only
  2125  				// operation in the transaction. In this case, the transaction's
  2126  				// error is the schema change error.
  2127  				// TODO (lucy): I'm not sure the above is true. What about DROP TABLE
  2128  				// with multiple tables?
  2129  				res.SetError(err)
  2130  			} else {
  2131  				// The schema change/job failed but everything else in the
  2132  				// transaction was actually committed successfully already. At
  2133  				// this point, it is too late to cancel the transaction. In
  2134  				// effect, we have violated the "A" of ACID.
  2135  				//
  2136  				// This situation is sufficiently serious that we cannot let the
  2137  				// error that caused the schema change to fail flow back to the
  2138  				// client as-is. We replace it by a custom code dedicated to
  2139  				// this situation. Replacement occurs because this error code is
  2140  				// a "serious error" and the code computation logic will give it
  2141  				// a higher priority.
  2142  				//
  2143  				// We also print out the original error code as prefix of the
  2144  				// error message, in case it was a serious error.
  2145  				newErr := pgerror.Wrapf(err,
  2146  					pgcode.TransactionCommittedWithSchemaChangeFailure,
  2147  					"transaction committed but schema change aborted with error: (%s)",
  2148  					pgerror.GetPGCode(err))
  2149  				newErr = errors.WithHint(newErr,
  2150  					"Some of the non-DDL statements may have committed successfully, "+
  2151  						"but some of the DDL statement(s) failed.\nManual inspection may be "+
  2152  						"required to determine the actual state of the database.")
  2153  				newErr = errors.WithIssueLink(newErr,
  2154  					errors.IssueLink{IssueURL: "https://github.com/cockroachdb/cockroach/issues/42061"})
  2155  				res.SetError(newErr)
  2156  			}
  2157  		}
  2158  		ex.notifyStatsRefresherOfNewTables(ex.Ctx())
  2159  
  2160  		if err := ex.server.cfg.JobRegistry.Run(
  2161  			ex.ctxHolder.connCtx,
  2162  			ex.server.cfg.InternalExecutor,
  2163  			ex.extraTxnState.jobs); err != nil {
  2164  			handleErr(err)
  2165  		}
  2166  
  2167  		// Wait for the cache to reflect the dropped databases if any.
  2168  		ex.extraTxnState.descCollection.WaitForCacheToDropDatabases(ex.Ctx())
  2169  
  2170  		fallthrough
  2171  	case txnRestart, txnRollback:
  2172  		if err := ex.resetExtraTxnState(ex.Ctx(), ex.server.dbCache, advInfo.txnEvent); err != nil {
  2173  			return advanceInfo{}, err
  2174  		}
  2175  	default:
  2176  		return advanceInfo{}, errors.AssertionFailedf(
  2177  			"unexpected event: %v", errors.Safe(advInfo.txnEvent))
  2178  	}
  2179  
  2180  	return advInfo, nil
  2181  }
  2182  
  2183  // initStatementResult initializes res according to a query.
  2184  //
  2185  // cols represents the columns of the result rows. Should be nil if
  2186  // stmt.AST.StatementType() != tree.Rows.
  2187  //
  2188  // If an error is returned, it is to be considered a query execution error.
  2189  func (ex *connExecutor) initStatementResult(
  2190  	ctx context.Context, res RestrictedCommandResult, stmt *Statement, cols sqlbase.ResultColumns,
  2191  ) error {
  2192  	for _, c := range cols {
  2193  		if err := checkResultType(c.Typ); err != nil {
  2194  			return err
  2195  		}
  2196  	}
  2197  	if stmt.AST.StatementType() == tree.Rows {
  2198  		// Note that this call is necessary even if cols is nil.
  2199  		res.SetColumns(ctx, cols)
  2200  	}
  2201  	return nil
  2202  }
  2203  
  2204  // newStatsCollector returns a sqlStatsCollector that will record stats in the
  2205  // session's stats containers.
  2206  func (ex *connExecutor) newStatsCollector() *sqlStatsCollector {
  2207  	return newSQLStatsCollector(&ex.server.sqlStats, ex.appStats, &ex.phaseTimes)
  2208  }
  2209  
  2210  // cancelQuery is part of the registrySession interface.
  2211  func (ex *connExecutor) cancelQuery(queryID ClusterWideID) bool {
  2212  	ex.mu.Lock()
  2213  	defer ex.mu.Unlock()
  2214  	if queryMeta, exists := ex.mu.ActiveQueries[queryID]; exists {
  2215  		queryMeta.cancel()
  2216  		return true
  2217  	}
  2218  	return false
  2219  }
  2220  
  2221  // cancelSession is part of the registrySession interface.
  2222  func (ex *connExecutor) cancelSession() {
  2223  	if ex.onCancelSession == nil {
  2224  		return
  2225  	}
  2226  	// TODO(abhimadan): figure out how to send a nice error message to the client.
  2227  	ex.onCancelSession()
  2228  }
  2229  
  2230  // user is part of the registrySession interface.
  2231  func (ex *connExecutor) user() string {
  2232  	return ex.sessionData.User
  2233  }
  2234  
  2235  // serialize is part of the registrySession interface.
  2236  func (ex *connExecutor) serialize() serverpb.Session {
  2237  	ex.mu.RLock()
  2238  	defer ex.mu.RUnlock()
  2239  	ex.state.mu.RLock()
  2240  	defer ex.state.mu.RUnlock()
  2241  
  2242  	var kvTxnID *uuid.UUID
  2243  	var activeTxnInfo *serverpb.TxnInfo
  2244  	txn := ex.state.mu.txn
  2245  	if txn != nil {
  2246  		id := txn.ID()
  2247  		kvTxnID = &id
  2248  		activeTxnInfo = &serverpb.TxnInfo{
  2249  			ID:             id,
  2250  			Start:          ex.state.mu.txnStart,
  2251  			TxnDescription: txn.String(),
  2252  		}
  2253  	}
  2254  
  2255  	activeQueries := make([]serverpb.ActiveQuery, 0, len(ex.mu.ActiveQueries))
  2256  	truncateSQL := func(sql string) string {
  2257  		if len(sql) > MaxSQLBytes {
  2258  			sql = sql[:MaxSQLBytes-utf8.RuneLen('…')]
  2259  			// Ensure the resulting string is valid utf8.
  2260  			for {
  2261  				if r, _ := utf8.DecodeLastRuneInString(sql); r != utf8.RuneError {
  2262  					break
  2263  				}
  2264  				sql = sql[:len(sql)-1]
  2265  			}
  2266  			sql += "…"
  2267  		}
  2268  		return sql
  2269  	}
  2270  
  2271  	for id, query := range ex.mu.ActiveQueries {
  2272  		if query.hidden {
  2273  			continue
  2274  		}
  2275  		sql := truncateSQL(query.getStatement())
  2276  		progress := math.Float64frombits(atomic.LoadUint64(&query.progressAtomic))
  2277  		activeQueries = append(activeQueries, serverpb.ActiveQuery{
  2278  			TxnID:         query.txnID,
  2279  			ID:            id.String(),
  2280  			Start:         query.start.UTC(),
  2281  			Sql:           sql,
  2282  			IsDistributed: query.isDistributed,
  2283  			Phase:         (serverpb.ActiveQuery_Phase)(query.phase),
  2284  			Progress:      float32(progress),
  2285  		})
  2286  	}
  2287  	lastActiveQuery := ""
  2288  	if ex.mu.LastActiveQuery != nil {
  2289  		lastActiveQuery = truncateSQL(ex.mu.LastActiveQuery.String())
  2290  	}
  2291  
  2292  	remoteStr := "<admin>"
  2293  	if ex.sessionData.RemoteAddr != nil {
  2294  		remoteStr = ex.sessionData.RemoteAddr.String()
  2295  	}
  2296  
  2297  	return serverpb.Session{
  2298  		Username:        ex.sessionData.User,
  2299  		ClientAddress:   remoteStr,
  2300  		ApplicationName: ex.applicationName.Load().(string),
  2301  		Start:           ex.phaseTimes[sessionInit].UTC(),
  2302  		ActiveQueries:   activeQueries,
  2303  		ActiveTxn:       activeTxnInfo,
  2304  		KvTxnID:         kvTxnID,
  2305  		LastActiveQuery: lastActiveQuery,
  2306  		ID:              ex.sessionID.GetBytes(),
  2307  		AllocBytes:      ex.mon.AllocBytes(),
  2308  		MaxAllocBytes:   ex.mon.MaximumBytes(),
  2309  	}
  2310  }
  2311  
  2312  func (ex *connExecutor) getPrepStmtsAccessor() preparedStatementsAccessor {
  2313  	return connExPrepStmtsAccessor{
  2314  		ex: ex,
  2315  	}
  2316  }
  2317  
  2318  // sessionEventf logs a message to the session event log (if any).
  2319  func (ex *connExecutor) sessionEventf(ctx context.Context, format string, args ...interface{}) {
  2320  	if log.ExpensiveLogEnabled(ctx, 2) {
  2321  		log.VEventfDepth(ctx, 1 /* depth */, 2 /* level */, format, args...)
  2322  	}
  2323  	if ex.eventLog != nil {
  2324  		ex.eventLog.Printf(format, args...)
  2325  	}
  2326  }
  2327  
  2328  // notifyStatsRefresherOfNewTables is called on txn commit to inform
  2329  // the stats refresher that new tables exist and should have their stats
  2330  // collected now.
  2331  func (ex *connExecutor) notifyStatsRefresherOfNewTables(ctx context.Context) {
  2332  	for _, desc := range ex.extraTxnState.descCollection.GetTableDescsWithNewVersion() {
  2333  		// The CREATE STATISTICS run for an async CTAS query is initiated by the
  2334  		// SchemaChanger, so we don't do it here.
  2335  		if desc.IsTable() && !desc.IsAs() {
  2336  			// Initiate a run of CREATE STATISTICS. We use a large number
  2337  			// for rowsAffected because we want to make sure that stats always get
  2338  			// created/refreshed here.
  2339  			ex.planner.execCfg.StatsRefresher.
  2340  				NotifyMutation(desc.ID, math.MaxInt32 /* rowsAffected */)
  2341  		}
  2342  	}
  2343  }
  2344  
  2345  // StatementCounters groups metrics for counting different types of
  2346  // statements.
  2347  type StatementCounters struct {
  2348  	// QueryCount includes all statements and it is therefore the sum of
  2349  	// all the below metrics.
  2350  	QueryCount telemetry.CounterWithMetric
  2351  
  2352  	// Basic CRUD statements.
  2353  	SelectCount telemetry.CounterWithMetric
  2354  	UpdateCount telemetry.CounterWithMetric
  2355  	InsertCount telemetry.CounterWithMetric
  2356  	DeleteCount telemetry.CounterWithMetric
  2357  
  2358  	// Transaction operations.
  2359  	TxnBeginCount    telemetry.CounterWithMetric
  2360  	TxnCommitCount   telemetry.CounterWithMetric
  2361  	TxnRollbackCount telemetry.CounterWithMetric
  2362  
  2363  	// Savepoint operations. SavepointCount is for real SQL savepoints;
  2364  	// the RestartSavepoint variants are for the
  2365  	// cockroach-specific client-side retry protocol.
  2366  	SavepointCount                  telemetry.CounterWithMetric
  2367  	ReleaseSavepointCount           telemetry.CounterWithMetric
  2368  	RollbackToSavepointCount        telemetry.CounterWithMetric
  2369  	RestartSavepointCount           telemetry.CounterWithMetric
  2370  	ReleaseRestartSavepointCount    telemetry.CounterWithMetric
  2371  	RollbackToRestartSavepointCount telemetry.CounterWithMetric
  2372  
  2373  	// DdlCount counts all statements whose StatementType is DDL.
  2374  	DdlCount telemetry.CounterWithMetric
  2375  
  2376  	// MiscCount counts all statements not covered by a more specific stat above.
  2377  	MiscCount telemetry.CounterWithMetric
  2378  }
  2379  
  2380  func makeStartedStatementCounters(internal bool) StatementCounters {
  2381  	return StatementCounters{
  2382  		TxnBeginCount: telemetry.NewCounterWithMetric(
  2383  			getMetricMeta(MetaTxnBeginStarted, internal)),
  2384  		TxnCommitCount: telemetry.NewCounterWithMetric(
  2385  			getMetricMeta(MetaTxnCommitStarted, internal)),
  2386  		TxnRollbackCount: telemetry.NewCounterWithMetric(
  2387  			getMetricMeta(MetaTxnRollbackStarted, internal)),
  2388  		RestartSavepointCount: telemetry.NewCounterWithMetric(
  2389  			getMetricMeta(MetaRestartSavepointStarted, internal)),
  2390  		ReleaseRestartSavepointCount: telemetry.NewCounterWithMetric(
  2391  			getMetricMeta(MetaReleaseRestartSavepointStarted, internal)),
  2392  		RollbackToRestartSavepointCount: telemetry.NewCounterWithMetric(
  2393  			getMetricMeta(MetaRollbackToRestartSavepointStarted, internal)),
  2394  		SavepointCount: telemetry.NewCounterWithMetric(
  2395  			getMetricMeta(MetaSavepointStarted, internal)),
  2396  		ReleaseSavepointCount: telemetry.NewCounterWithMetric(
  2397  			getMetricMeta(MetaReleaseSavepointStarted, internal)),
  2398  		RollbackToSavepointCount: telemetry.NewCounterWithMetric(
  2399  			getMetricMeta(MetaRollbackToSavepointStarted, internal)),
  2400  		SelectCount: telemetry.NewCounterWithMetric(
  2401  			getMetricMeta(MetaSelectStarted, internal)),
  2402  		UpdateCount: telemetry.NewCounterWithMetric(
  2403  			getMetricMeta(MetaUpdateStarted, internal)),
  2404  		InsertCount: telemetry.NewCounterWithMetric(
  2405  			getMetricMeta(MetaInsertStarted, internal)),
  2406  		DeleteCount: telemetry.NewCounterWithMetric(
  2407  			getMetricMeta(MetaDeleteStarted, internal)),
  2408  		DdlCount: telemetry.NewCounterWithMetric(
  2409  			getMetricMeta(MetaDdlStarted, internal)),
  2410  		MiscCount: telemetry.NewCounterWithMetric(
  2411  			getMetricMeta(MetaMiscStarted, internal)),
  2412  		QueryCount: telemetry.NewCounterWithMetric(
  2413  			getMetricMeta(MetaQueryStarted, internal)),
  2414  	}
  2415  }
  2416  
  2417  func makeExecutedStatementCounters(internal bool) StatementCounters {
  2418  	return StatementCounters{
  2419  		TxnBeginCount: telemetry.NewCounterWithMetric(
  2420  			getMetricMeta(MetaTxnBeginExecuted, internal)),
  2421  		TxnCommitCount: telemetry.NewCounterWithMetric(
  2422  			getMetricMeta(MetaTxnCommitExecuted, internal)),
  2423  		TxnRollbackCount: telemetry.NewCounterWithMetric(
  2424  			getMetricMeta(MetaTxnRollbackExecuted, internal)),
  2425  		RestartSavepointCount: telemetry.NewCounterWithMetric(
  2426  			getMetricMeta(MetaRestartSavepointExecuted, internal)),
  2427  		ReleaseRestartSavepointCount: telemetry.NewCounterWithMetric(
  2428  			getMetricMeta(MetaReleaseRestartSavepointExecuted, internal)),
  2429  		RollbackToRestartSavepointCount: telemetry.NewCounterWithMetric(
  2430  			getMetricMeta(MetaRollbackToRestartSavepointExecuted, internal)),
  2431  		SavepointCount: telemetry.NewCounterWithMetric(
  2432  			getMetricMeta(MetaSavepointExecuted, internal)),
  2433  		ReleaseSavepointCount: telemetry.NewCounterWithMetric(
  2434  			getMetricMeta(MetaReleaseSavepointExecuted, internal)),
  2435  		RollbackToSavepointCount: telemetry.NewCounterWithMetric(
  2436  			getMetricMeta(MetaRollbackToSavepointExecuted, internal)),
  2437  		SelectCount: telemetry.NewCounterWithMetric(
  2438  			getMetricMeta(MetaSelectExecuted, internal)),
  2439  		UpdateCount: telemetry.NewCounterWithMetric(
  2440  			getMetricMeta(MetaUpdateExecuted, internal)),
  2441  		InsertCount: telemetry.NewCounterWithMetric(
  2442  			getMetricMeta(MetaInsertExecuted, internal)),
  2443  		DeleteCount: telemetry.NewCounterWithMetric(
  2444  			getMetricMeta(MetaDeleteExecuted, internal)),
  2445  		DdlCount: telemetry.NewCounterWithMetric(
  2446  			getMetricMeta(MetaDdlExecuted, internal)),
  2447  		MiscCount: telemetry.NewCounterWithMetric(
  2448  			getMetricMeta(MetaMiscExecuted, internal)),
  2449  		QueryCount: telemetry.NewCounterWithMetric(
  2450  			getMetricMeta(MetaQueryExecuted, internal)),
  2451  	}
  2452  }
  2453  
  2454  func (sc *StatementCounters) incrementCount(ex *connExecutor, stmt tree.Statement) {
  2455  	sc.QueryCount.Inc()
  2456  	switch t := stmt.(type) {
  2457  	case *tree.BeginTransaction:
  2458  		sc.TxnBeginCount.Inc()
  2459  	case *tree.Select:
  2460  		sc.SelectCount.Inc()
  2461  	case *tree.Update:
  2462  		sc.UpdateCount.Inc()
  2463  	case *tree.Insert:
  2464  		sc.InsertCount.Inc()
  2465  	case *tree.Delete:
  2466  		sc.DeleteCount.Inc()
  2467  	case *tree.CommitTransaction:
  2468  		sc.TxnCommitCount.Inc()
  2469  	case *tree.RollbackTransaction:
  2470  		sc.TxnRollbackCount.Inc()
  2471  	case *tree.Savepoint:
  2472  		if ex.isCommitOnReleaseSavepoint(t.Name) {
  2473  			sc.RestartSavepointCount.Inc()
  2474  		} else {
  2475  			sc.SavepointCount.Inc()
  2476  		}
  2477  	case *tree.ReleaseSavepoint:
  2478  		if ex.isCommitOnReleaseSavepoint(t.Savepoint) {
  2479  			sc.ReleaseRestartSavepointCount.Inc()
  2480  		} else {
  2481  			sc.ReleaseSavepointCount.Inc()
  2482  		}
  2483  	case *tree.RollbackToSavepoint:
  2484  		if ex.isCommitOnReleaseSavepoint(t.Savepoint) {
  2485  			sc.RollbackToRestartSavepointCount.Inc()
  2486  		} else {
  2487  			sc.RollbackToSavepointCount.Inc()
  2488  		}
  2489  	default:
  2490  		if tree.CanModifySchema(stmt) {
  2491  			sc.DdlCount.Inc()
  2492  		} else {
  2493  			sc.MiscCount.Inc()
  2494  		}
  2495  	}
  2496  }
  2497  
  2498  // connExPrepStmtsAccessor is an implementation of preparedStatementsAccessor
  2499  // that gives access to a connExecutor's prepared statements.
  2500  type connExPrepStmtsAccessor struct {
  2501  	ex *connExecutor
  2502  }
  2503  
  2504  var _ preparedStatementsAccessor = connExPrepStmtsAccessor{}
  2505  
  2506  // List is part of the preparedStatementsAccessor interface.
  2507  func (ps connExPrepStmtsAccessor) List() map[string]*PreparedStatement {
  2508  	// Return a copy of the data, to prevent modification of the map.
  2509  	stmts := ps.ex.extraTxnState.prepStmtsNamespace.prepStmts
  2510  	ret := make(map[string]*PreparedStatement, len(stmts))
  2511  	for key, stmt := range stmts {
  2512  		ret[key] = stmt
  2513  	}
  2514  	return ret
  2515  }
  2516  
  2517  // Get is part of the preparedStatementsAccessor interface.
  2518  func (ps connExPrepStmtsAccessor) Get(name string) (*PreparedStatement, bool) {
  2519  	s, ok := ps.ex.extraTxnState.prepStmtsNamespace.prepStmts[name]
  2520  	return s, ok
  2521  }
  2522  
  2523  // Delete is part of the preparedStatementsAccessor interface.
  2524  func (ps connExPrepStmtsAccessor) Delete(ctx context.Context, name string) bool {
  2525  	_, ok := ps.Get(name)
  2526  	if !ok {
  2527  		return false
  2528  	}
  2529  	ps.ex.deletePreparedStmt(ctx, name)
  2530  	return true
  2531  }
  2532  
  2533  // DeleteAll is part of the preparedStatementsAccessor interface.
  2534  func (ps connExPrepStmtsAccessor) DeleteAll(ctx context.Context) {
  2535  	ps.ex.extraTxnState.prepStmtsNamespace.resetTo(ctx, prepStmtNamespace{})
  2536  }
  2537  
  2538  // contextStatementKey is an empty type for the handle associated with the
  2539  // statement value (see context.Value).
  2540  type contextStatementKey struct{}
  2541  
  2542  // withStatement adds a SQL statement to the provided context. The statement
  2543  // will then be included in crash reports which use that context.
  2544  func withStatement(ctx context.Context, stmt tree.Statement) context.Context {
  2545  	return context.WithValue(ctx, contextStatementKey{}, stmt)
  2546  }
  2547  
  2548  // statementFromCtx returns the statement value from a context, or nil if unset.
  2549  func statementFromCtx(ctx context.Context) tree.Statement {
  2550  	stmt := ctx.Value(contextStatementKey{})
  2551  	if stmt == nil {
  2552  		return nil
  2553  	}
  2554  	return stmt.(tree.Statement)
  2555  }
  2556  
  2557  func init() {
  2558  	// Register a function to include the anonymized statement in crash reports.
  2559  	log.RegisterTagFn("statement", func(ctx context.Context) string {
  2560  		stmt := statementFromCtx(ctx)
  2561  		if stmt == nil {
  2562  			return ""
  2563  		}
  2564  		// Anonymize the statement for reporting.
  2565  		return anonymizeStmtAndConstants(stmt)
  2566  	})
  2567  }