github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/conn_executor.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "fmt" 16 "io" 17 "math" 18 "strings" 19 "sync/atomic" 20 "time" 21 "unicode/utf8" 22 23 "github.com/cockroachdb/cockroach/pkg/config" 24 "github.com/cockroachdb/cockroach/pkg/kv" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 27 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 28 "github.com/cockroachdb/cockroach/pkg/settings" 29 "github.com/cockroachdb/cockroach/pkg/sql/catalog/database" 30 "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" 31 "github.com/cockroachdb/cockroach/pkg/sql/parser" 32 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 33 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 34 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 35 "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" 36 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 37 "github.com/cockroachdb/cockroach/pkg/sql/types" 38 "github.com/cockroachdb/cockroach/pkg/util" 39 "github.com/cockroachdb/cockroach/pkg/util/envutil" 40 "github.com/cockroachdb/cockroach/pkg/util/errorutil" 41 "github.com/cockroachdb/cockroach/pkg/util/fsm" 42 "github.com/cockroachdb/cockroach/pkg/util/hlc" 43 "github.com/cockroachdb/cockroach/pkg/util/log" 44 "github.com/cockroachdb/cockroach/pkg/util/metric" 45 "github.com/cockroachdb/cockroach/pkg/util/mon" 46 "github.com/cockroachdb/cockroach/pkg/util/stop" 47 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 48 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 49 "github.com/cockroachdb/cockroach/pkg/util/tracing" 50 "github.com/cockroachdb/cockroach/pkg/util/uuid" 51 "github.com/cockroachdb/errors" 52 "github.com/cockroachdb/logtags" 53 "golang.org/x/net/trace" 54 ) 55 56 // noteworthyMemoryUsageBytes is the minimum size tracked by a 57 // transaction or session monitor before the monitor starts explicitly 58 // logging overall usage growth in the log. 59 var noteworthyMemoryUsageBytes = envutil.EnvOrDefaultInt64("COCKROACH_NOTEWORTHY_SESSION_MEMORY_USAGE", 1024*1024) 60 61 // A connExecutor is in charge of executing queries received on a given client 62 // connection. The connExecutor implements a state machine (dictated by the 63 // Postgres/pgwire session semantics). The state machine is supposed to run 64 // asynchronously wrt the client connection: it receives input statements 65 // through a stmtBuf and produces results through a clientComm interface. The 66 // connExecutor maintains a cursor over the statementBuffer and executes 67 // statements / produces results for one statement at a time. The cursor points 68 // at all times to the statement that the connExecutor is currently executing. 69 // Results for statements before the cursor have already been produced (but not 70 // necessarily delivered to the client). Statements after the cursor are queued 71 // for future execution. Keeping already executed statements in the buffer is 72 // useful in case of automatic retries (in which case statements from the 73 // retried transaction have to be executed again); the connExecutor is in charge 74 // of removing old statements that are no longer needed for retries from the 75 // (head of the) buffer. Separately, the implementer of the clientComm interface 76 // (e.g. the pgwire module) is in charge of keeping track of what results have 77 // been delivered to the client and what results haven't (yet). 78 // 79 // The connExecutor has two main responsibilities: to dispatch queries to the 80 // execution engine(s) and relay their results to the clientComm, and to 81 // implement the state machine maintaining the various aspects of a connection's 82 // state. The state machine implementation is further divided into two aspects: 83 // maintaining the transaction status of the connection (outside of a txn, 84 // inside a txn, in an aborted txn, in a txn awaiting client restart, etc.) and 85 // maintaining the cursor position (i.e. correctly jumping to whatever the 86 // "next" statement to execute is in various situations). 87 // 88 // The cursor normally advances one statement at a time, but it can also skip 89 // some statements (remaining statements in a query string are skipped once an 90 // error is encountered) and it can sometimes be rewound when performing 91 // automatic retries. Rewinding can only be done if results for the rewound 92 // statements have not actually been delivered to the client; see below. 93 // 94 // +---------------------+ 95 // |connExecutor | 96 // | | 97 // +->execution+--------------+ 98 // || + | | 99 // || |fsm.Event | | 100 // || | | | 101 // || v | | 102 // || fsm.Machine(TxnStateTransitions) 103 // || + +--------+ | | 104 // +--------------------+ || | |txnState| | | 105 // |stmtBuf | || | +--------+ | | 106 // | | statements are read || | | | 107 // | +-+-+ +-+-+ +-+-+ +------------------------+ | | | 108 // | | | | | | | | | | | | | +-------------+ | 109 // +---> +-+-+ +++-+ +-+-+ | | | |session data | | 110 // | | ^ | | | +-------------+ | 111 // | | | +-----------------------------------+ | | 112 // | | + v | cursor is advanced | advanceInfo | | 113 // | | cursor | | | | 114 // | +--------------------+ +---------------------+ | 115 // | | 116 // | | 117 // +-------------+ | 118 // +--------+ | 119 // | parser | | 120 // +--------+ | 121 // | | 122 // | | 123 // | +----------------+ | 124 // +-------+------+ |execution engine<--------+ 125 // | pgwire conn | +------------+(local/DistSQL) | 126 // | | | +----------------+ 127 // | +----------+ | 128 // | |clientComm<---------------+ 129 // | +----------+ results are produced 130 // | | 131 // +-------^------+ 132 // | 133 // | 134 // +-------+------+ 135 // | SQL client | 136 // +--------------+ 137 // 138 // The connExecutor is disconnected from client communication (i.e. generally 139 // network communication - i.e. pgwire.conn); the module doing client 140 // communication is responsible for pushing statements into the buffer and for 141 // providing an implementation of the clientConn interface (and thus sending 142 // results to the client). The connExecutor does not control when 143 // results are delivered to the client, but still it does have some influence 144 // over that; this is because of the fact that the possibility of doing 145 // automatic retries goes away the moment results for the transaction in 146 // question are delivered to the client. The communication module has full 147 // freedom in sending results whenever it sees fit; however the connExecutor 148 // influences communication in the following ways: 149 // 150 // a) When deciding whether an automatic retry can be performed for a 151 // transaction, the connExecutor needs to: 152 // 153 // 1) query the communication status to check that no results for the txn have 154 // been delivered to the client and, if this check passes: 155 // 2) lock the communication so that no further results are delivered to the 156 // client, and, eventually: 157 // 3) rewind the clientComm to a certain position corresponding to the start 158 // of the transaction, thereby discarding all the results that had been 159 // accumulated for the previous attempt to run the transaction in question. 160 // 161 // These steps are all orchestrated through clientComm.lockCommunication() and 162 // rewindCapability{}. 163 // 164 // b) The connExecutor sometimes ask the clientComm to deliver everything 165 // (most commonly in response to a Sync command). 166 // 167 // As of Feb 2018, the pgwire.conn delivers results synchronously to the client 168 // when its internal buffer overflows. In principle, delivery of result could be 169 // done asynchronously wrt the processing of commands (e.g. we could have a 170 // timing policy in addition to the buffer size). The first implementation of 171 // that showed a performance impact of involving a channel communication in the 172 // Sync processing path. 173 // 174 // 175 // Implementation notes: 176 // 177 // --- Error handling --- 178 // 179 // The key to understanding how the connExecutor handles errors is understanding 180 // the fact that there's two distinct categories of errors to speak of. There 181 // are "query execution errors" and there are the rest. Most things fall in the 182 // former category: invalid queries, queries that fail constraints at runtime, 183 // data unavailability errors, retriable errors (i.e. serializability 184 // violations) "internal errors" (e.g. connection problems in the cluster). This 185 // category of errors doesn't represent dramatic events as far as the connExecutor 186 // is concerned: they produce "results" for the query to be passed to the client 187 // just like more successful queries do and they produce Events for the 188 // state machine just like the successful queries (the events in question 189 // are generally event{non}RetriableErr and they generally cause the 190 // state machine to move to the Aborted state, but the connExecutor doesn't 191 // concern itself with this). The way the connExecutor reacts to these errors is 192 // the same as how it reacts to a successful query completing: it moves the 193 // cursor over the incoming statements as instructed by the state machine and 194 // continues running statements. 195 // 196 // And then there's other errors that don't have anything to do with a 197 // particular query, but with the connExecutor itself. In other languages, these 198 // would perhaps be modeled as Exceptions: we want them to unwind the stack 199 // significantly. These errors cause the connExecutor.run() to break out of its 200 // loop and return an error. Example of such errors include errors in 201 // communication with the client (e.g. the network connection is broken) or the 202 // connection's context being canceled. 203 // 204 // All of connExecutor's methods only return errors for the 2nd category. Query 205 // execution errors are written to a CommandResult. Low-level methods don't 206 // operate on a CommandResult directly; instead they operate on a wrapper 207 // (resultWithStoredErr), which provides access to the query error for purposes 208 // of building the correct state machine event. 209 // 210 // --- Context management --- 211 // 212 // At the highest level, there's connExecutor.run() that takes a context. That 213 // context is supposed to represent "the connection's context": its lifetime is 214 // the client connection's lifetime and it is assigned to 215 // connEx.ctxHolder.connCtx. Below that, every SQL transaction has its own 216 // derived context because that's the level at which we trace operations. The 217 // lifetime of SQL transactions is determined by the txnState: the state machine 218 // decides when transactions start and end in txnState.performStateTransition(). 219 // When we're inside a SQL transaction, most operations are considered to happen 220 // in the context of that txn. When there's no SQL transaction (i.e. 221 // stateNoTxn), everything happens in the connection's context. 222 // 223 // High-level code in connExecutor is agnostic of whether it currently is inside 224 // a txn or not. To deal with both cases, such methods don't explicitly take a 225 // context; instead they use connEx.Ctx(), which returns the appropriate ctx 226 // based on the current state. 227 // Lower-level code (everything from connEx.execStmt() and below which runs in 228 // between state transitions) knows what state its running in, and so the usual 229 // pattern of explicitly taking a context as an argument is used. 230 231 // Server is the top level singleton for handling SQL connections. It creates 232 // connExecutors to server every incoming connection. 233 type Server struct { 234 _ util.NoCopy 235 236 cfg *ExecutorConfig 237 238 // sqlStats tracks per-application statistics for all applications on each 239 // node. Newly collected statistics flow into sqlStats. 240 sqlStats sqlStats 241 // reportedStats is a pool of stats that is held for reporting, and is 242 // cleared on a lower interval than sqlStats. Stats from sqlStats flow 243 // into reported stats when sqlStats is cleared. 244 reportedStats sqlStats 245 246 reCache *tree.RegexpCache 247 248 // pool is the parent monitor for all session monitors except "internal" ones. 249 pool *mon.BytesMonitor 250 251 // Metrics is used to account normal queries. 252 Metrics Metrics 253 254 // InternalMetrics is used to account internal queries. 255 InternalMetrics Metrics 256 257 // dbCache is a cache for database descriptors, maintained through Gossip 258 // updates. 259 dbCache *databaseCacheHolder 260 } 261 262 // Metrics collects timeseries data about SQL activity. 263 type Metrics struct { 264 // EngineMetrics is exported as required by the metrics.Struct magic we use 265 // for metrics registration. 266 EngineMetrics EngineMetrics 267 268 // StartedStatementCounters contains metrics for statements initiated by 269 // users. These metrics count user-initiated operations, regardless of 270 // success (in particular, TxnCommitCount is the number of COMMIT statements 271 // attempted, not the number of transactions that successfully commit). 272 StartedStatementCounters StatementCounters 273 274 // ExecutedStatementCounters contains metrics for successfully executed 275 // statements. 276 ExecutedStatementCounters StatementCounters 277 } 278 279 // NewServer creates a new Server. Start() needs to be called before the Server 280 // is used. 281 func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server { 282 systemCfg := config.NewSystemConfig(cfg.DefaultZoneConfig) 283 return &Server{ 284 cfg: cfg, 285 Metrics: makeMetrics(false /*internal*/), 286 InternalMetrics: makeMetrics(true /*internal*/), 287 // dbCache will be updated on Start(). 288 dbCache: newDatabaseCacheHolder(database.NewCache(cfg.Codec, systemCfg)), 289 pool: pool, 290 sqlStats: sqlStats{st: cfg.Settings, apps: make(map[string]*appStats)}, 291 reportedStats: sqlStats{st: cfg.Settings, apps: make(map[string]*appStats)}, 292 reCache: tree.NewRegexpCache(512), 293 } 294 } 295 296 func makeMetrics(internal bool) Metrics { 297 return Metrics{ 298 EngineMetrics: EngineMetrics{ 299 DistSQLSelectCount: metric.NewCounter(getMetricMeta(MetaDistSQLSelect, internal)), 300 SQLOptFallbackCount: metric.NewCounter(getMetricMeta(MetaSQLOptFallback, internal)), 301 SQLOptPlanCacheHits: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheHits, internal)), 302 SQLOptPlanCacheMisses: metric.NewCounter(getMetricMeta(MetaSQLOptPlanCacheMisses, internal)), 303 304 // TODO(mrtracy): See HistogramWindowInterval in server/config.go for the 6x factor. 305 DistSQLExecLatency: metric.NewLatency(getMetricMeta(MetaDistSQLExecLatency, internal), 306 6*metricsSampleInterval), 307 SQLExecLatency: metric.NewLatency(getMetricMeta(MetaSQLExecLatency, internal), 308 6*metricsSampleInterval), 309 DistSQLServiceLatency: metric.NewLatency(getMetricMeta(MetaDistSQLServiceLatency, internal), 310 6*metricsSampleInterval), 311 SQLServiceLatency: metric.NewLatency(getMetricMeta(MetaSQLServiceLatency, internal), 312 6*metricsSampleInterval), 313 SQLTxnLatency: metric.NewLatency(getMetricMeta(MetaSQLTxnLatency, internal), 314 6*metricsSampleInterval), 315 316 TxnAbortCount: metric.NewCounter(getMetricMeta(MetaTxnAbort, internal)), 317 FailureCount: metric.NewCounter(getMetricMeta(MetaFailure, internal)), 318 }, 319 StartedStatementCounters: makeStartedStatementCounters(internal), 320 ExecutedStatementCounters: makeExecutedStatementCounters(internal), 321 } 322 } 323 324 // Start starts the Server's background processing. 325 func (s *Server) Start(ctx context.Context, stopper *stop.Stopper) { 326 gossipUpdateC := s.cfg.Gossip.DeprecatedRegisterSystemConfigChannel(47150) 327 stopper.RunWorker(ctx, func(ctx context.Context) { 328 for { 329 select { 330 case <-gossipUpdateC: 331 sysCfg := s.cfg.Gossip.DeprecatedSystemConfig(47150) 332 s.dbCache.updateSystemConfig(sysCfg) 333 case <-stopper.ShouldStop(): 334 return 335 } 336 } 337 }) 338 // Start a loop to clear SQL stats at the max reset interval. This is 339 // to ensure that we always have some worker clearing SQL stats to avoid 340 // continually allocating space for the SQL stats. Additionally, spawn 341 // a loop to clear the reported stats at the same large interval just 342 // in case the telemetry worker fails. 343 s.PeriodicallyClearSQLStats(ctx, stopper, MaxSQLStatReset, &s.sqlStats, s.ResetSQLStats) 344 s.PeriodicallyClearSQLStats(ctx, stopper, MaxSQLStatReset, &s.reportedStats, s.ResetReportedStats) 345 // Start a second loop to clear SQL stats at the requested interval. 346 s.PeriodicallyClearSQLStats(ctx, stopper, SQLStatReset, &s.sqlStats, s.ResetSQLStats) 347 } 348 349 // ResetSQLStats resets the executor's collected sql statistics. 350 func (s *Server) ResetSQLStats(ctx context.Context) { 351 // Dump the SQL stats into the reported stats before clearing the SQL stats. 352 s.sqlStats.resetAndMaybeDumpStats(ctx, &s.reportedStats) 353 } 354 355 // ResetReportedStats resets the executor's collected reported stats. 356 func (s *Server) ResetReportedStats(ctx context.Context) { 357 s.reportedStats.resetAndMaybeDumpStats(ctx, nil /* target */) 358 } 359 360 // GetScrubbedStmtStats returns the statement statistics by app, with the 361 // queries scrubbed of their identifiers. Any statements which cannot be 362 // scrubbed will be omitted from the returned map. 363 func (s *Server) GetScrubbedStmtStats() []roachpb.CollectedStatementStatistics { 364 return s.sqlStats.getScrubbedStmtStats(s.cfg.VirtualSchemas) 365 } 366 367 // Avoid lint errors. 368 var _ = (*Server).GetScrubbedStmtStats 369 370 // GetUnscrubbedStmtStats returns the same thing as GetScrubbedStmtStats, except 371 // identifiers (e.g. table and column names) aren't scrubbed from the statements. 372 func (s *Server) GetUnscrubbedStmtStats() []roachpb.CollectedStatementStatistics { 373 return s.sqlStats.getUnscrubbedStmtStats(s.cfg.VirtualSchemas) 374 } 375 376 // GetScrubbedReportingStats does the same thing as GetScrubbedStmtStats but 377 // returns statistics from the reported stats pool. 378 func (s *Server) GetScrubbedReportingStats() []roachpb.CollectedStatementStatistics { 379 return s.reportedStats.getScrubbedStmtStats(s.cfg.VirtualSchemas) 380 } 381 382 // GetUnscrubbedReportingStats does the same thing as GetUnscrubbedStmtStats but 383 // returns statistics from the reported stats pool. 384 func (s *Server) GetUnscrubbedReportingStats() []roachpb.CollectedStatementStatistics { 385 return s.reportedStats.getUnscrubbedStmtStats(s.cfg.VirtualSchemas) 386 } 387 388 // GetStmtStatsLastReset returns the time at which the statement statistics were 389 // last cleared. 390 func (s *Server) GetStmtStatsLastReset() time.Time { 391 return s.sqlStats.getLastReset() 392 } 393 394 // GetExecutorConfig returns this server's executor config. 395 func (s *Server) GetExecutorConfig() *ExecutorConfig { 396 return s.cfg 397 } 398 399 // SetupConn creates a connExecutor for the client connection. 400 // 401 // When this method returns there are no resources allocated yet that 402 // need to be close()d. 403 // 404 // Args: 405 // args: The initial session parameters. They are validated by SetupConn 406 // and an error is returned if this validation fails. 407 // stmtBuf: The incoming statement for the new connExecutor. 408 // clientComm: The interface through which the new connExecutor is going to 409 // produce results for the client. 410 // memMetrics: The metrics that statements executed on this connection will 411 // contribute to. 412 func (s *Server) SetupConn( 413 ctx context.Context, 414 args SessionArgs, 415 stmtBuf *StmtBuf, 416 clientComm ClientComm, 417 memMetrics MemoryMetrics, 418 ) (ConnectionHandler, error) { 419 sd := s.newSessionData(args) 420 421 // Set the SessionData from args.SessionDefaults. This also validates the 422 // respective values. 423 sdMut := s.makeSessionDataMutator(sd, args.SessionDefaults) 424 if err := resetSessionVars(ctx, &sdMut); err != nil { 425 log.Errorf(ctx, "error setting up client session: %s", err) 426 return ConnectionHandler{}, err 427 } 428 429 ex := s.newConnExecutor( 430 ctx, sd, args.SessionDefaults, stmtBuf, clientComm, memMetrics, &s.Metrics, 431 s.sqlStats.getStatsForApplication(sd.ApplicationName), 432 ) 433 return ConnectionHandler{ex}, nil 434 } 435 436 // ConnectionHandler is the interface between the result of SetupConn 437 // and the ServeConn below. It encapsulates the connExecutor and hides 438 // it away from other packages. 439 type ConnectionHandler struct { 440 ex *connExecutor 441 } 442 443 // GetUnqualifiedIntSize implements pgwire.sessionDataProvider and returns 444 // the type that INT should be parsed as. 445 func (h ConnectionHandler) GetUnqualifiedIntSize() *types.T { 446 var size int 447 if h.ex != nil { 448 // The executor will be nil in certain testing situations where 449 // no server is actually present. 450 size = h.ex.sessionData.DefaultIntSize 451 } 452 switch size { 453 case 4, 32: 454 return types.Int4 455 default: 456 return types.Int 457 } 458 } 459 460 // GetParamStatus retrieves the configured value of the session 461 // variable identified by varName. This is used for the initial 462 // message sent to a client during a session set-up. 463 func (h ConnectionHandler) GetParamStatus(ctx context.Context, varName string) string { 464 name := strings.ToLower(varName) 465 v, ok := varGen[name] 466 if !ok { 467 log.Fatalf(ctx, "programming error: status param %q must be defined session var", varName) 468 return "" 469 } 470 hasDefault, defVal := getSessionVarDefaultString(name, v, h.ex.dataMutator) 471 if !hasDefault { 472 log.Fatalf(ctx, "programming error: status param %q must have a default value", varName) 473 return "" 474 } 475 return defVal 476 } 477 478 // ServeConn serves a client connection by reading commands from the stmtBuf 479 // embedded in the ConnHandler. 480 // 481 // If not nil, reserved represents memory reserved for the connection. The 482 // connExecutor takes ownership of this memory. 483 func (s *Server) ServeConn( 484 ctx context.Context, h ConnectionHandler, reserved mon.BoundAccount, cancel context.CancelFunc, 485 ) error { 486 defer func() { 487 r := recover() 488 h.ex.closeWrapper(ctx, r) 489 }() 490 return h.ex.run(ctx, s.pool, reserved, cancel) 491 } 492 493 // newSessionData a SessionData that can be passed to newConnExecutor. 494 func (s *Server) newSessionData(args SessionArgs) *sessiondata.SessionData { 495 sd := &sessiondata.SessionData{ 496 User: args.User, 497 RemoteAddr: args.RemoteAddr, 498 ResultsBufferSize: args.ConnResultsBufferSize, 499 } 500 s.populateMinimalSessionData(sd) 501 return sd 502 } 503 504 func (s *Server) makeSessionDataMutator( 505 sd *sessiondata.SessionData, defaults SessionDefaults, 506 ) sessionDataMutator { 507 return sessionDataMutator{ 508 data: sd, 509 defaults: defaults, 510 settings: s.cfg.Settings, 511 paramStatusUpdater: &noopParamStatusUpdater{}, 512 } 513 } 514 515 // populateMinimalSessionData populates sd with some minimal values needed for 516 // not crashing. Fields of sd that are already set are not overwritten. 517 func (s *Server) populateMinimalSessionData(sd *sessiondata.SessionData) { 518 if sd.SequenceState == nil { 519 sd.SequenceState = sessiondata.NewSequenceState() 520 } 521 if sd.DataConversion == (sessiondata.DataConversionConfig{}) { 522 sd.DataConversion = sessiondata.DataConversionConfig{ 523 Location: time.UTC, 524 } 525 } 526 if len(sd.SearchPath.GetPathArray()) == 0 { 527 sd.SearchPath = sqlbase.DefaultSearchPath 528 } 529 } 530 531 // newConnExecutor creates a new connExecutor. 532 // 533 // sd is expected to be fully initialized with the values of all the session 534 // vars. 535 // sdDefaults controls what the session vars will be reset to through 536 // RESET statements. 537 func (s *Server) newConnExecutor( 538 ctx context.Context, 539 sd *sessiondata.SessionData, 540 sdDefaults SessionDefaults, 541 stmtBuf *StmtBuf, 542 clientComm ClientComm, 543 memMetrics MemoryMetrics, 544 srvMetrics *Metrics, 545 appStats *appStats, 546 ) *connExecutor { 547 // Create the various monitors. 548 // The session monitors are started in activate(). 549 sessionRootMon := mon.MakeMonitor( 550 "session root", 551 mon.MemoryResource, 552 memMetrics.CurBytesCount, 553 memMetrics.MaxBytesHist, 554 -1 /* increment */, math.MaxInt64, s.cfg.Settings, 555 ) 556 sessionMon := mon.MakeMonitor( 557 "session", 558 mon.MemoryResource, 559 memMetrics.SessionCurBytesCount, 560 memMetrics.SessionMaxBytesHist, 561 -1 /* increment */, noteworthyMemoryUsageBytes, s.cfg.Settings, 562 ) 563 // The txn monitor is started in txnState.resetForNewSQLTxn(). 564 txnMon := mon.MakeMonitor( 565 "txn", 566 mon.MemoryResource, 567 memMetrics.TxnCurBytesCount, 568 memMetrics.TxnMaxBytesHist, 569 -1 /* increment */, noteworthyMemoryUsageBytes, s.cfg.Settings, 570 ) 571 572 nodeIDOrZero, _ := s.cfg.NodeID.OptionalNodeID() 573 sdMutator := new(sessionDataMutator) 574 *sdMutator = s.makeSessionDataMutator(sd, sdDefaults) 575 576 ex := &connExecutor{ 577 server: s, 578 metrics: srvMetrics, 579 stmtBuf: stmtBuf, 580 clientComm: clientComm, 581 mon: &sessionRootMon, 582 sessionMon: &sessionMon, 583 sessionData: sd, 584 dataMutator: sdMutator, 585 state: txnState{ 586 mon: &txnMon, 587 connCtx: ctx, 588 }, 589 transitionCtx: transitionCtx{ 590 db: s.cfg.DB, 591 nodeIDOrZero: nodeIDOrZero, 592 clock: s.cfg.Clock, 593 // Future transaction's monitors will inherits from sessionRootMon. 594 connMon: &sessionRootMon, 595 tracer: s.cfg.AmbientCtx.Tracer, 596 settings: s.cfg.Settings, 597 }, 598 memMetrics: memMetrics, 599 planner: planner{execCfg: s.cfg, alloc: &sqlbase.DatumAlloc{}}, 600 601 // ctxHolder will be reset at the start of run(). We only define 602 // it here so that an early call to close() doesn't panic. 603 ctxHolder: ctxHolder{connCtx: ctx}, 604 executorType: executorTypeExec, 605 hasCreatedTemporarySchema: false, 606 stmtDiagnosticsRecorder: s.cfg.StmtDiagnosticsRecorder, 607 } 608 609 ex.state.txnAbortCount = ex.metrics.EngineMetrics.TxnAbortCount 610 611 // The transaction_read_only variable is special; its updates need to be 612 // hooked-up to the executor. 613 sdMutator.setCurTxnReadOnly = func(val bool) { 614 ex.state.readOnly = val 615 } 616 617 sdMutator.onTempSchemaCreation = func() { 618 ex.hasCreatedTemporarySchema = true 619 } 620 621 ex.applicationName.Store(ex.sessionData.ApplicationName) 622 ex.appStats = appStats 623 sdMutator.RegisterOnSessionDataChange("application_name", func(newName string) { 624 ex.applicationName.Store(newName) 625 ex.appStats = ex.server.sqlStats.getStatsForApplication(newName) 626 }) 627 628 ex.phaseTimes[sessionInit] = timeutil.Now() 629 ex.extraTxnState.prepStmtsNamespace = prepStmtNamespace{ 630 prepStmts: make(map[string]*PreparedStatement), 631 portals: make(map[string]*PreparedPortal), 632 } 633 ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos = prepStmtNamespace{ 634 prepStmts: make(map[string]*PreparedStatement), 635 portals: make(map[string]*PreparedPortal), 636 } 637 ex.extraTxnState.descCollection = descs.MakeCollection(s.cfg.LeaseManager, 638 s.cfg.Settings, s.dbCache.getDatabaseCache(), s.dbCache) 639 ex.extraTxnState.txnRewindPos = -1 640 ex.mu.ActiveQueries = make(map[ClusterWideID]*queryMeta) 641 ex.machine = fsm.MakeMachine(TxnStateTransitions, stateNoTxn{}, &ex.state) 642 643 ex.sessionTracing.ex = ex 644 ex.transitionCtx.sessionTracing = &ex.sessionTracing 645 ex.statsCollector = ex.newStatsCollector() 646 ex.initPlanner(ctx, &ex.planner) 647 648 return ex 649 } 650 651 // newConnExecutorWithTxn creates a connExecutor that will execute statements 652 // under a higher-level txn. This connExecutor runs with a different state 653 // machine, much reduced from the regular one. It cannot initiate or end 654 // transactions (so, no BEGIN, COMMIT, ROLLBACK, no auto-commit, no automatic 655 // retries). 656 // 657 // If there is no error, this function also activate()s the returned 658 // executor, so the caller does not need to run the 659 // activation. However this means that run() or close() must be called 660 // to release resources. 661 func (s *Server) newConnExecutorWithTxn( 662 ctx context.Context, 663 sd *sessiondata.SessionData, 664 sdDefaults SessionDefaults, 665 stmtBuf *StmtBuf, 666 clientComm ClientComm, 667 parentMon *mon.BytesMonitor, 668 memMetrics MemoryMetrics, 669 srvMetrics *Metrics, 670 txn *kv.Txn, 671 tcModifier descs.ModifiedCollectionCopier, 672 appStats *appStats, 673 ) *connExecutor { 674 ex := s.newConnExecutor(ctx, sd, sdDefaults, stmtBuf, clientComm, memMetrics, srvMetrics, appStats) 675 676 // The new transaction stuff below requires active monitors and traces, so 677 // we need to activate the executor now. 678 ex.activate(ctx, parentMon, mon.BoundAccount{}) 679 680 // Perform some surgery on the executor - replace its state machine and 681 // initialize the state. 682 ex.machine = fsm.MakeMachine( 683 BoundTxnStateTransitions, 684 stateOpen{ImplicitTxn: fsm.False}, 685 &ex.state, 686 ) 687 ex.state.resetForNewSQLTxn( 688 ctx, 689 explicitTxn, 690 txn.ReadTimestamp().GoTime(), 691 nil, /* historicalTimestamp */ 692 txn.UserPriority(), 693 tree.ReadWrite, 694 txn, 695 ex.transitionCtx) 696 697 // Modify the Collection to match the parent executor's Collection. 698 // This allows the InternalExecutor to see schema changes made by the 699 // parent executor. 700 if tcModifier != nil { 701 tcModifier.CopyModifiedObjects(&ex.extraTxnState.descCollection) 702 } 703 return ex 704 } 705 706 // SQLStatReset is the cluster setting that controls at what interval SQL 707 // statement statistics should be reset. 708 var SQLStatReset = settings.RegisterPublicNonNegativeDurationSettingWithMaximum( 709 "diagnostics.sql_stat_reset.interval", 710 "interval controlling how often SQL statement statistics should "+ 711 "be reset (should be less than diagnostics.forced_sql_stat_reset.interval). It has a max value of 24H.", 712 time.Hour, 713 time.Hour*24, 714 ) 715 716 // MaxSQLStatReset is the cluster setting that controls at what interval SQL 717 // statement statistics must be flushed within. 718 var MaxSQLStatReset = settings.RegisterPublicNonNegativeDurationSettingWithMaximum( 719 "diagnostics.forced_sql_stat_reset.interval", 720 "interval after which SQL statement statistics are refreshed even "+ 721 "if not collected (should be more than diagnostics.sql_stat_reset.interval). It has a max value of 24H.", 722 time.Hour*2, // 2 x diagnostics.sql_stat_reset.interval 723 time.Hour*24, 724 ) 725 726 // PeriodicallyClearSQLStats spawns a loop to reset stats based on the setting 727 // of a given duration settings variable. We take in a function to actually do 728 // the resetting, as some stats have extra work that needs to be performed 729 // during the reset. For example, the SQL stats need to dump into the parent 730 // stats before clearing data fully. 731 func (s *Server) PeriodicallyClearSQLStats( 732 ctx context.Context, 733 stopper *stop.Stopper, 734 setting *settings.DurationSetting, 735 stats *sqlStats, 736 reset func(ctx context.Context), 737 ) { 738 stopper.RunWorker(ctx, func(ctx context.Context) { 739 var timer timeutil.Timer 740 for { 741 s.sqlStats.Lock() 742 last := stats.lastReset 743 s.sqlStats.Unlock() 744 745 next := last.Add(setting.Get(&s.cfg.Settings.SV)) 746 wait := next.Sub(timeutil.Now()) 747 if wait < 0 { 748 reset(ctx) 749 } else { 750 timer.Reset(wait) 751 select { 752 case <-stopper.ShouldQuiesce(): 753 return 754 case <-timer.C: 755 timer.Read = true 756 } 757 } 758 } 759 }) 760 } 761 762 type closeType int 763 764 const ( 765 normalClose closeType = iota 766 panicClose 767 // externalTxnClose means that the connExecutor has been used within a 768 // higher-level txn (through the InternalExecutor). 769 externalTxnClose 770 ) 771 772 func (ex *connExecutor) closeWrapper(ctx context.Context, recovered interface{}) { 773 if recovered != nil { 774 panicErr := log.PanicAsError(1, recovered) 775 776 // If there's a statement currently being executed, we'll report 777 // on it. 778 if ex.curStmt != nil { 779 // A warning header guaranteed to go to stderr. 780 log.Shoutf(ctx, log.Severity_ERROR, 781 "a SQL panic has occurred while executing the following statement:\n%s", 782 // For the log message, the statement is not anonymized. 783 truncateStatementStringForTelemetry(ex.curStmt.String())) 784 785 // Embed the statement in the error object for the telemetry 786 // report below. The statement gets anonymized. 787 panicErr = WithAnonymizedStatement(panicErr, ex.curStmt) 788 } 789 790 // Report the panic to telemetry in any case. 791 log.ReportPanic(ctx, &ex.server.cfg.Settings.SV, panicErr, 1 /* depth */) 792 793 // Close the executor before propagating the panic further. 794 ex.close(ctx, panicClose) 795 796 // Propagate - this may be meant to stop the process. 797 panic(panicErr) 798 } 799 // Closing is not cancelable. 800 closeCtx := logtags.WithTags(context.Background(), logtags.FromContext(ctx)) 801 ex.close(closeCtx, normalClose) 802 } 803 804 func (ex *connExecutor) close(ctx context.Context, closeType closeType) { 805 ex.sessionEventf(ctx, "finishing connExecutor") 806 807 if ex.hasCreatedTemporarySchema && !ex.server.cfg.TestingKnobs.DisableTempObjectsCleanupOnSessionExit { 808 ie := MakeInternalExecutor(ctx, ex.server, MemoryMetrics{}, ex.server.cfg.Settings) 809 err := cleanupSessionTempObjects( 810 ctx, 811 ex.server.cfg.Settings, 812 ex.server.cfg.DB, 813 ex.server.cfg.Codec, 814 &ie, 815 ex.sessionID, 816 ) 817 if err != nil { 818 log.Errorf( 819 ctx, 820 "error deleting temporary objects at session close, "+ 821 "the temp tables deletion job will retry periodically: %s", 822 err, 823 ) 824 } 825 } 826 827 ev := noEvent 828 if _, noTxn := ex.machine.CurState().(stateNoTxn); !noTxn { 829 ev = txnRollback 830 } 831 832 if closeType == normalClose { 833 // We'll cleanup the SQL txn by creating a non-retriable (commit:true) event. 834 // This event is guaranteed to be accepted in every state. 835 ev := eventNonRetriableErr{IsCommit: fsm.True} 836 payload := eventNonRetriableErrPayload{err: pgerror.Newf(pgcode.AdminShutdown, 837 "connExecutor closing")} 838 if err := ex.machine.ApplyWithPayload(ctx, ev, payload); err != nil { 839 log.Warningf(ctx, "error while cleaning up connExecutor: %s", err) 840 } 841 } else if closeType == externalTxnClose { 842 ex.state.finishExternalTxn() 843 } 844 845 if err := ex.resetExtraTxnState(ctx, ex.server.dbCache, ev); err != nil { 846 log.Warningf(ctx, "error while cleaning up connExecutor: %s", err) 847 } 848 849 if closeType != panicClose { 850 // Close all statements and prepared portals. 851 ex.extraTxnState.prepStmtsNamespace.resetTo(ctx, prepStmtNamespace{}) 852 ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos.resetTo(ctx, prepStmtNamespace{}) 853 } 854 855 if ex.sessionTracing.Enabled() { 856 if err := ex.sessionTracing.StopTracing(); err != nil { 857 log.Warningf(ctx, "error stopping tracing: %s", err) 858 } 859 } 860 861 if ex.eventLog != nil { 862 ex.eventLog.Finish() 863 ex.eventLog = nil 864 } 865 866 if closeType != panicClose { 867 ex.state.mon.Stop(ctx) 868 ex.sessionMon.Stop(ctx) 869 ex.mon.Stop(ctx) 870 } else { 871 ex.state.mon.EmergencyStop(ctx) 872 ex.sessionMon.EmergencyStop(ctx) 873 ex.mon.EmergencyStop(ctx) 874 } 875 } 876 877 type connExecutor struct { 878 _ util.NoCopy 879 880 // The server to which this connExecutor is attached. The reference is used 881 // for getting access to configuration settings. 882 // Note: do not use server.Metrics directly. Use metrics below instead. 883 server *Server 884 885 // The metrics to which the statement metrics should be accounted. 886 // This is different whether the executor is for regular client 887 // queries or for "internal" queries. 888 metrics *Metrics 889 890 // mon tracks memory usage for SQL activity within this session. It 891 // is not directly used, but rather indirectly used via sessionMon 892 // and state.mon. sessionMon tracks session-bound objects like prepared 893 // statements and result sets. 894 // 895 // The reason why state.mon and mon are split is to enable 896 // separate reporting of statistics per transaction and per 897 // session. This is because the "interesting" behavior w.r.t memory 898 // is typically caused by transactions, not sessions. The reason why 899 // sessionMon and mon are split is to enable separate reporting of 900 // statistics for result sets (which escape transactions). 901 mon *mon.BytesMonitor 902 sessionMon *mon.BytesMonitor 903 // memMetrics contains the metrics that statements executed on this connection 904 // will contribute to. 905 memMetrics MemoryMetrics 906 907 // The buffer with incoming statements to execute. 908 stmtBuf *StmtBuf 909 // The interface for communicating statement results to the client. 910 clientComm ClientComm 911 // Finity "the machine" Automaton is the state machine controlling the state 912 // below. 913 machine fsm.Machine 914 // state encapsulates fields related to the ongoing SQL txn. It is mutated as 915 // the machine's ExtendedState. 916 state txnState 917 transitionCtx transitionCtx 918 sessionTracing SessionTracing 919 920 // eventLog for SQL statements and other important session events. Will be set 921 // if traceSessionEventLogEnabled; it is used by ex.sessionEventf() 922 eventLog trace.EventLog 923 924 // extraTxnState groups fields scoped to a SQL txn that are not handled by 925 // ex.state, above. The rule of thumb is that, if the state influences state 926 // transitions, it should live in state, otherwise it can live here. 927 // This is only used in the Open state. extraTxnState is reset whenever a 928 // transaction finishes or gets retried. 929 extraTxnState struct { 930 // descCollection collects descriptors used by the current transaction. 931 descCollection descs.Collection 932 933 // jobs accumulates jobs staged for execution inside the transaction. 934 // Staging happens when executing statements that are implemented with a 935 // job. The jobs are staged via the function QueueJob in 936 // pkg/sql/planner.go. The staged jobs are executed once the transaction 937 // that staged them commits. 938 jobs jobsCollection 939 940 // autoRetryCounter keeps track of the which iteration of a transaction 941 // auto-retry we're currently in. It's 0 whenever the transaction state is not 942 // stateOpen. 943 autoRetryCounter int 944 945 // numDDL keeps track of how many DDL statements have been 946 // executed so far. 947 numDDL int 948 949 // txnRewindPos is the position within stmtBuf to which we'll rewind when 950 // performing automatic retries. This is more or less the position where the 951 // current transaction started. 952 // This field is only defined while in stateOpen. 953 // 954 // Set via setTxnRewindPos(). 955 txnRewindPos CmdPos 956 957 // prepStmtNamespace contains the prepared statements and portals that the 958 // session currently has access to. 959 // Portals are bound to a transaction and they're all destroyed once the 960 // transaction finishes. 961 // Prepared statements are not transactional and so it's a bit weird that 962 // they're part of extraTxnState, but it's convenient to put them here 963 // because they need the same kind of "snapshoting" as the portals (see 964 // prepStmtsNamespaceAtTxnRewindPos). 965 prepStmtsNamespace prepStmtNamespace 966 967 // prepStmtsNamespaceAtTxnRewindPos is a snapshot of the prep stmts/portals 968 // (ex.prepStmtsNamespace) before processing the command at position 969 // txnRewindPos. 970 // Here's the deal: prepared statements are not transactional, but they do 971 // need to interact properly with automatic retries (i.e. rewinding the 972 // command buffer). When doing a rewind, we need to be able to restore the 973 // prep stmts as they were. We do this by taking a snapshot every time 974 // txnRewindPos is advanced. Prepared statements are shared between the two 975 // collections, but these collections are periodically reconciled. 976 prepStmtsNamespaceAtTxnRewindPos prepStmtNamespace 977 978 // onTxnFinish (if non-nil) will be called when txn is finished (either 979 // committed or aborted). It is set when txn is started but can remain 980 // unset when txn is executed within another higher-level txn. 981 onTxnFinish func(txnEvent) 982 983 // savepoints maintains the stack of savepoints currently open. 984 savepoints savepointStack 985 // savepointsAtTxnRewindPos is a snapshot of the savepoints stack before 986 // processing the command at position txnRewindPos. When rewinding, we're 987 // going to restore this snapshot. 988 savepointsAtTxnRewindPos savepointStack 989 } 990 991 // sessionData contains the user-configurable connection variables. 992 sessionData *sessiondata.SessionData 993 // dataMutator is nil for session-bound internal executors; we shouldn't issue 994 // statements that manipulate session state to an internal executor. 995 dataMutator *sessionDataMutator 996 // appStats tracks per-application SQL usage statistics. It is maintained to 997 // represent statistrics for the application currently identified by 998 // sessiondata.ApplicationName. 999 appStats *appStats 1000 // applicationName is the same as sessionData.ApplicationName. It's copied 1001 // here as an atomic so that it can be read concurrently by serialize(). 1002 applicationName atomic.Value 1003 1004 // ctxHolder contains the connection's context in which all command executed 1005 // on the connection are running. This generally should not be used directly, 1006 // but through the Ctx() method; if we're inside a transaction, Ctx() is going 1007 // to return a derived context. See the Context Management comments at the top 1008 // of the file. 1009 ctxHolder ctxHolder 1010 1011 // onCancelSession is called when the SessionRegistry is cancels this session. 1012 // For pgwire connections, this is hooked up to canceling the connection's 1013 // context. 1014 // If nil, canceling this session will be a no-op. 1015 onCancelSession context.CancelFunc 1016 1017 // planner is the "default planner" on a session, to save planner allocations 1018 // during serial execution. Since planners are not threadsafe, this is only 1019 // safe to use when a statement is not being parallelized. It must be reset 1020 // before using. 1021 planner planner 1022 // phaseTimes tracks session- and transaction-level phase times. It is 1023 // copied-by-value when resetting statsCollector before executing each 1024 // statement. 1025 phaseTimes phaseTimes 1026 1027 // statsCollector is used to collect statistics about SQL statements and 1028 // transactions. 1029 statsCollector *sqlStatsCollector 1030 1031 // mu contains of all elements of the struct that can be changed 1032 // after initialization, and may be accessed from another thread. 1033 mu struct { 1034 syncutil.RWMutex 1035 1036 // ActiveQueries contains all queries in flight. 1037 ActiveQueries map[ClusterWideID]*queryMeta 1038 1039 // LastActiveQuery contains a reference to the AST of the last 1040 // query that ran on this session. 1041 LastActiveQuery tree.Statement 1042 } 1043 1044 // curStmt is the statement that's currently being prepared or executed, if 1045 // any. This is printed by high-level panic recovery. 1046 curStmt tree.Statement 1047 1048 sessionID ClusterWideID 1049 1050 // activated determines whether activate() was called already. 1051 // When this is set, close() must be called to release resources. 1052 activated bool 1053 1054 // draining is set if we've received a DrainRequest. Once this is set, we're 1055 // going to find a suitable time to close the connection. 1056 draining bool 1057 1058 // executorType is set to whether this executor is an ordinary executor which 1059 // responds to user queries or an internal one. 1060 executorType executorType 1061 1062 // hasCreatedTemporarySchema is set if the executor has created a 1063 // temporary schema, which requires special cleanup on close. 1064 hasCreatedTemporarySchema bool 1065 1066 // stmtDiagnosticsRecorder is used to track which queries need to have 1067 // information collected. 1068 stmtDiagnosticsRecorder StmtDiagnosticsRecorder 1069 } 1070 1071 // ctxHolder contains a connection's context and, while session tracing is 1072 // enabled, a derived context with a recording span. The connExecutor should use 1073 // the latter while session tracing is active, or the former otherwise; that's 1074 // what the ctx() method returns. 1075 type ctxHolder struct { 1076 connCtx context.Context 1077 sessionTracingCtx context.Context 1078 } 1079 1080 func (ch *ctxHolder) ctx() context.Context { 1081 if ch.sessionTracingCtx != nil { 1082 return ch.sessionTracingCtx 1083 } 1084 return ch.connCtx 1085 } 1086 1087 func (ch *ctxHolder) hijack(sessionTracingCtx context.Context) { 1088 if ch.sessionTracingCtx != nil { 1089 panic("hijack already in effect") 1090 } 1091 ch.sessionTracingCtx = sessionTracingCtx 1092 } 1093 1094 func (ch *ctxHolder) unhijack() { 1095 if ch.sessionTracingCtx == nil { 1096 panic("hijack not in effect") 1097 } 1098 ch.sessionTracingCtx = nil 1099 } 1100 1101 type prepStmtNamespace struct { 1102 // prepStmts contains the prepared statements currently available on the 1103 // session. 1104 prepStmts map[string]*PreparedStatement 1105 // portals contains the portals currently available on the session. 1106 portals map[string]*PreparedPortal 1107 } 1108 1109 func (ns prepStmtNamespace) String() string { 1110 var sb strings.Builder 1111 sb.WriteString("Prep stmts: ") 1112 for name := range ns.prepStmts { 1113 sb.WriteString(name + " ") 1114 } 1115 sb.WriteString("Portals: ") 1116 for name := range ns.portals { 1117 sb.WriteString(name + " ") 1118 } 1119 return sb.String() 1120 } 1121 1122 // resetTo resets a namespace to equate another one (`to`). All the receiver's 1123 // references are release and all the to's references are duplicated. 1124 // 1125 // An empty `to` can be passed in to deallocate everything. 1126 func (ns *prepStmtNamespace) resetTo(ctx context.Context, to prepStmtNamespace) { 1127 for name, p := range ns.prepStmts { 1128 p.decRef(ctx) 1129 delete(ns.prepStmts, name) 1130 } 1131 for name, p := range ns.portals { 1132 p.decRef(ctx) 1133 delete(ns.portals, name) 1134 } 1135 1136 for name, ps := range to.prepStmts { 1137 ps.incRef(ctx) 1138 ns.prepStmts[name] = ps 1139 } 1140 for name, p := range to.portals { 1141 p.incRef(ctx) 1142 ns.portals[name] = p 1143 } 1144 } 1145 1146 // resetExtraTxnState resets the fields of ex.extraTxnState when a transaction 1147 // commits, rolls back or restarts. 1148 func (ex *connExecutor) resetExtraTxnState( 1149 ctx context.Context, dbCacheHolder *databaseCacheHolder, ev txnEvent, 1150 ) error { 1151 ex.extraTxnState.jobs = nil 1152 1153 ex.extraTxnState.descCollection.ReleaseAll(ctx) 1154 1155 ex.extraTxnState.descCollection.ResetDatabaseCache(dbCacheHolder.getDatabaseCache()) 1156 1157 // Close all portals. 1158 for name, p := range ex.extraTxnState.prepStmtsNamespace.portals { 1159 p.decRef(ctx) 1160 delete(ex.extraTxnState.prepStmtsNamespace.portals, name) 1161 } 1162 1163 switch ev { 1164 case txnCommit, txnRollback: 1165 ex.extraTxnState.savepoints.clear() 1166 // After txn is finished, we need to call onTxnFinish (if it's non-nil). 1167 if ex.extraTxnState.onTxnFinish != nil { 1168 ex.extraTxnState.onTxnFinish(ev) 1169 ex.extraTxnState.onTxnFinish = nil 1170 } 1171 } 1172 // NOTE: on txnRestart we don't need to muck with the savepoints stack. It's either a 1173 // a ROLLBACK TO SAVEPOINT that generated the event, and that statement deals with the 1174 // savepoints, or it's a rewind which also deals with them. 1175 1176 return nil 1177 } 1178 1179 // Ctx returns the transaction's ctx, if we're inside a transaction, or the 1180 // session's context otherwise. 1181 func (ex *connExecutor) Ctx() context.Context { 1182 if _, ok := ex.machine.CurState().(stateNoTxn); ok { 1183 return ex.ctxHolder.ctx() 1184 } 1185 // stateInternalError is used by the InternalExecutor. 1186 if _, ok := ex.machine.CurState().(stateInternalError); ok { 1187 return ex.ctxHolder.ctx() 1188 } 1189 return ex.state.Ctx 1190 } 1191 1192 // activate engages the use of resources that must be cleaned up 1193 // afterwards. after activate() completes, the close() method must be 1194 // called. 1195 // 1196 // Args: 1197 // parentMon: The root monitor. 1198 // reserved: Memory reserved for the connection. The connExecutor takes 1199 // ownership of this memory. 1200 func (ex *connExecutor) activate( 1201 ctx context.Context, parentMon *mon.BytesMonitor, reserved mon.BoundAccount, 1202 ) { 1203 // Note: we pass `reserved` to sessionRootMon where it causes it to act as a 1204 // buffer. This is not done for sessionMon nor state.mon: these monitors don't 1205 // start with any buffer, so they'll need to ask their "parent" for memory as 1206 // soon as the first allocation. This is acceptable because the session is 1207 // single threaded, and the point of buffering is just to avoid contention. 1208 ex.mon.Start(ctx, parentMon, reserved) 1209 ex.sessionMon.Start(ctx, ex.mon, mon.BoundAccount{}) 1210 1211 // Enable the trace if configured. 1212 if traceSessionEventLogEnabled.Get(&ex.server.cfg.Settings.SV) { 1213 remoteStr := "<admin>" 1214 if ex.sessionData.RemoteAddr != nil { 1215 remoteStr = ex.sessionData.RemoteAddr.String() 1216 } 1217 ex.eventLog = trace.NewEventLog( 1218 fmt.Sprintf("sql session [%s]", ex.sessionData.User), remoteStr) 1219 } 1220 1221 ex.activated = true 1222 } 1223 1224 // run implements the run loop for a connExecutor. Commands are read one by one 1225 // from the input buffer; they are executed and the resulting state transitions 1226 // are performed. 1227 // 1228 // run returns when either the stmtBuf is closed by someone else or when an 1229 // error is propagated from query execution. Note that query errors are not 1230 // propagated as errors to this layer; only things that are supposed to 1231 // terminate the session are (e.g. client communication errors and ctx 1232 // cancelations). 1233 // run() is expected to react on ctx cancelation, but the caller needs to also 1234 // close the stmtBuf at the same time as canceling the ctx. If cancelation 1235 // happens in the middle of a query execution, that's expected to interrupt the 1236 // execution and generate an error. run() is then supposed to return because the 1237 // buffer is closed and no further commands can be read. 1238 // 1239 // When this returns, ex.close() needs to be called and the connection to the 1240 // client needs to be terminated. If it returns with an error, that error may 1241 // represent a communication error (in which case the connection might already 1242 // also have an error from the reading side), or some other unexpected failure. 1243 // Returned errors have not been communicated to the client: it's up to the 1244 // caller to do that if it wants. 1245 // 1246 // If not nil, reserved represents Memory reserved for the connection. The 1247 // connExecutor takes ownership of this memory. 1248 // 1249 // onCancel, if not nil, will be called when the SessionRegistry cancels the 1250 // session. TODO(andrei): This is hooked up to canceling the pgwire connection's 1251 // context (of which ctx is also a child). It seems uncouth for the connExecutor 1252 // to cancel a higher-level task. A better design would probably be for pgwire 1253 // to own the SessionRegistry, instead of it being owned by the sql.Server - 1254 // then pgwire would directly cancel its own tasks; the sessions also more 1255 // naturally belong there. There is a problem, however, as query cancelation (as 1256 // opposed to session cancelation) is done through the SessionRegistry and that 1257 // does belong with the connExecutor. Introducing a query registry, separate 1258 // from the session registry, might be too costly - the way query cancelation 1259 // works is that every session is asked to cancel a given query until the right 1260 // one is found. That seems like a good performance trade-off. 1261 func (ex *connExecutor) run( 1262 ctx context.Context, 1263 parentMon *mon.BytesMonitor, 1264 reserved mon.BoundAccount, 1265 onCancel context.CancelFunc, 1266 ) error { 1267 if !ex.activated { 1268 ex.activate(ctx, parentMon, reserved) 1269 } 1270 ex.ctxHolder.connCtx = ctx 1271 ex.onCancelSession = onCancel 1272 1273 ex.sessionID = ex.generateID() 1274 ex.server.cfg.SessionRegistry.register(ex.sessionID, ex) 1275 ex.planner.extendedEvalCtx.setSessionID(ex.sessionID) 1276 defer ex.server.cfg.SessionRegistry.deregister(ex.sessionID) 1277 1278 for { 1279 ex.curStmt = nil 1280 if err := ctx.Err(); err != nil { 1281 return err 1282 } 1283 1284 var err error 1285 if err = ex.execCmd(ex.Ctx()); err != nil { 1286 if errors.IsAny(err, io.EOF, errDrainingComplete) { 1287 return nil 1288 } 1289 return err 1290 } 1291 } 1292 } 1293 1294 // errDrainingComplete is returned by execCmd when the connExecutor previously got 1295 // a DrainRequest and the time is ripe to finish this session (i.e. we're no 1296 // longer in a transaction). 1297 var errDrainingComplete = fmt.Errorf("draining done. this is a good time to finish this session") 1298 1299 // execCmd reads the current command from the stmtBuf and executes it. The 1300 // transaction state is modified accordingly, and the stmtBuf is advanced or 1301 // rewinded accordingly. 1302 // 1303 // Returns an error if communication of results to the client has failed and the 1304 // session should be terminated. Returns io.EOF if the stmtBuf has been closed. 1305 // Returns drainingComplete if the session should finish because draining is 1306 // complete (i.e. we received a DrainRequest - possibly previously - and the 1307 // connection is found to be idle). 1308 func (ex *connExecutor) execCmd(ctx context.Context) error { 1309 cmd, pos, err := ex.stmtBuf.CurCmd() 1310 if err != nil { 1311 return err // err could be io.EOF 1312 } 1313 1314 ctx, sp := tracing.EnsureChildSpan( 1315 ctx, ex.server.cfg.AmbientCtx.Tracer, 1316 // We print the type of command, not the String() which includes long 1317 // statements. 1318 cmd.command()) 1319 defer sp.Finish() 1320 1321 if log.ExpensiveLogEnabled(ctx, 2) || ex.eventLog != nil { 1322 ex.sessionEventf(ctx, "[%s pos:%d] executing %s", 1323 ex.machine.CurState(), pos, cmd) 1324 } 1325 1326 var ev fsm.Event 1327 var payload fsm.EventPayload 1328 var res ResultBase 1329 1330 switch tcmd := cmd.(type) { 1331 case ExecStmt: 1332 if tcmd.AST == nil { 1333 res = ex.clientComm.CreateEmptyQueryResult(pos) 1334 break 1335 } 1336 ex.curStmt = tcmd.AST 1337 1338 stmtRes := ex.clientComm.CreateStatementResult( 1339 tcmd.AST, 1340 NeedRowDesc, 1341 pos, 1342 nil, /* formatCodes */ 1343 ex.sessionData.DataConversion, 1344 0, /* limit */ 1345 "", /* portalName */ 1346 ex.implicitTxn(), 1347 ) 1348 res = stmtRes 1349 curStmt := Statement{Statement: tcmd.Statement} 1350 1351 ex.phaseTimes[sessionQueryReceived] = tcmd.TimeReceived 1352 ex.phaseTimes[sessionStartParse] = tcmd.ParseStart 1353 ex.phaseTimes[sessionEndParse] = tcmd.ParseEnd 1354 1355 stmtCtx := withStatement(ctx, ex.curStmt) 1356 ev, payload, err = ex.execStmt(stmtCtx, curStmt, stmtRes, nil /* pinfo */) 1357 if err != nil { 1358 return err 1359 } 1360 case ExecPortal: 1361 // ExecPortal is handled like ExecStmt, except that the placeholder info 1362 // is taken from the portal. 1363 1364 portal, ok := ex.extraTxnState.prepStmtsNamespace.portals[tcmd.Name] 1365 if !ok { 1366 err := pgerror.Newf( 1367 pgcode.InvalidCursorName, "unknown portal %q", tcmd.Name) 1368 ev = eventNonRetriableErr{IsCommit: fsm.False} 1369 payload = eventNonRetriableErrPayload{err: err} 1370 res = ex.clientComm.CreateErrorResult(pos) 1371 break 1372 } 1373 if portal.Stmt.AST == nil { 1374 res = ex.clientComm.CreateEmptyQueryResult(pos) 1375 break 1376 } 1377 1378 if log.ExpensiveLogEnabled(ctx, 2) { 1379 log.VEventf(ctx, 2, "portal resolved to: %s", portal.Stmt.AST.String()) 1380 } 1381 ex.curStmt = portal.Stmt.AST 1382 1383 pinfo := &tree.PlaceholderInfo{ 1384 PlaceholderTypesInfo: tree.PlaceholderTypesInfo{ 1385 TypeHints: portal.Stmt.TypeHints, 1386 Types: portal.Stmt.Types, 1387 }, 1388 Values: portal.Qargs, 1389 } 1390 1391 ex.phaseTimes[sessionQueryReceived] = tcmd.TimeReceived 1392 // When parsing has been done earlier, via a separate parse 1393 // message, it is not any more part of the statistics collected 1394 // for this execution. In that case, we simply report that 1395 // parsing took no time. 1396 ex.phaseTimes[sessionStartParse] = time.Time{} 1397 ex.phaseTimes[sessionEndParse] = time.Time{} 1398 1399 stmtRes := ex.clientComm.CreateStatementResult( 1400 portal.Stmt.AST, 1401 // The client is using the extended protocol, so no row description is 1402 // needed. 1403 DontNeedRowDesc, 1404 pos, portal.OutFormats, 1405 ex.sessionData.DataConversion, 1406 tcmd.Limit, 1407 tcmd.Name, 1408 ex.implicitTxn(), 1409 ) 1410 res = stmtRes 1411 curStmt := Statement{ 1412 Statement: portal.Stmt.Statement, 1413 Prepared: portal.Stmt, 1414 ExpectedTypes: portal.Stmt.Columns, 1415 AnonymizedStr: portal.Stmt.AnonymizedStr, 1416 } 1417 stmtCtx := withStatement(ctx, ex.curStmt) 1418 ev, payload, err = ex.execStmt(stmtCtx, curStmt, stmtRes, pinfo) 1419 if err != nil { 1420 return err 1421 } 1422 case PrepareStmt: 1423 ex.curStmt = tcmd.AST 1424 res = ex.clientComm.CreatePrepareResult(pos) 1425 stmtCtx := withStatement(ctx, ex.curStmt) 1426 ev, payload = ex.execPrepare(stmtCtx, tcmd) 1427 case DescribeStmt: 1428 descRes := ex.clientComm.CreateDescribeResult(pos) 1429 res = descRes 1430 ev, payload = ex.execDescribe(ctx, tcmd, descRes) 1431 case BindStmt: 1432 res = ex.clientComm.CreateBindResult(pos) 1433 ev, payload = ex.execBind(ctx, tcmd) 1434 case DeletePreparedStmt: 1435 res = ex.clientComm.CreateDeleteResult(pos) 1436 ev, payload = ex.execDelPrepStmt(ctx, tcmd) 1437 case SendError: 1438 res = ex.clientComm.CreateErrorResult(pos) 1439 ev = eventNonRetriableErr{IsCommit: fsm.False} 1440 payload = eventNonRetriableErrPayload{err: tcmd.Err} 1441 case Sync: 1442 // Note that the Sync result will flush results to the network connection. 1443 res = ex.clientComm.CreateSyncResult(pos) 1444 if ex.draining { 1445 // If we're draining, check whether this is a good time to finish the 1446 // connection. If we're not inside a transaction, we stop processing 1447 // now. If we are inside a transaction, we'll check again the next time 1448 // a Sync is processed. 1449 if ex.idleConn() { 1450 // If we're about to close the connection, close res in order to flush 1451 // now, as we won't have an opportunity to do it later. 1452 res.Close(ctx, stateToTxnStatusIndicator(ex.machine.CurState())) 1453 return errDrainingComplete 1454 } 1455 } 1456 case CopyIn: 1457 res = ex.clientComm.CreateCopyInResult(pos) 1458 var err error 1459 ev, payload, err = ex.execCopyIn(ctx, tcmd) 1460 if err != nil { 1461 return err 1462 } 1463 case DrainRequest: 1464 // We received a drain request. We terminate immediately if we're not in a 1465 // transaction. If we are in a transaction, we'll finish as soon as a Sync 1466 // command (i.e. the end of a batch) is processed outside of a 1467 // transaction. 1468 ex.draining = true 1469 res = ex.clientComm.CreateDrainResult(pos) 1470 if ex.idleConn() { 1471 return errDrainingComplete 1472 } 1473 case Flush: 1474 // Closing the res will flush the connection's buffer. 1475 res = ex.clientComm.CreateFlushResult(pos) 1476 default: 1477 panic(fmt.Sprintf("unsupported command type: %T", cmd)) 1478 } 1479 1480 var advInfo advanceInfo 1481 1482 // If an event was generated, feed it to the state machine. 1483 if ev != nil { 1484 var err error 1485 advInfo, err = ex.txnStateTransitionsApplyWrapper(ev, payload, res, pos) 1486 if err != nil { 1487 return err 1488 } 1489 } else { 1490 // If no event was generated synthesize an advance code. 1491 advInfo = advanceInfo{ 1492 code: advanceOne, 1493 } 1494 } 1495 1496 // Decide if we need to close the result or not. We don't need to do it if 1497 // we're staying in place or rewinding - the statement will be executed 1498 // again. 1499 if advInfo.code != stayInPlace && advInfo.code != rewind { 1500 // Close the result. In case of an execution error, the result might have 1501 // its error set already or it might not. 1502 resErr := res.Err() 1503 1504 pe, ok := payload.(payloadWithError) 1505 if ok { 1506 ex.sessionEventf(ctx, "execution error: %s", pe.errorCause()) 1507 if resErr == nil { 1508 res.SetError(pe.errorCause()) 1509 } 1510 } 1511 res.Close(ctx, stateToTxnStatusIndicator(ex.machine.CurState())) 1512 } else { 1513 res.Discard() 1514 } 1515 1516 // Move the cursor according to what the state transition told us to do. 1517 switch advInfo.code { 1518 case advanceOne: 1519 ex.stmtBuf.AdvanceOne() 1520 case skipBatch: 1521 // We'll flush whatever results we have to the network. The last one must 1522 // be an error. This flush may seem unnecessary, as we generally only 1523 // flush when the client requests it through a Sync or a Flush but without 1524 // it the Node.js driver isn't happy. That driver likes to send "flush" 1525 // command and only sends Syncs once it received some data. But we ignore 1526 // flush commands (just like we ignore any other commands) when skipping 1527 // to the next batch. 1528 if err := ex.clientComm.Flush(pos); err != nil { 1529 return err 1530 } 1531 if err := ex.stmtBuf.seekToNextBatch(); err != nil { 1532 return err 1533 } 1534 case rewind: 1535 ex.rewindPrepStmtNamespace(ctx) 1536 ex.extraTxnState.savepoints = ex.extraTxnState.savepointsAtTxnRewindPos 1537 advInfo.rewCap.rewindAndUnlock(ctx) 1538 case stayInPlace: 1539 // Nothing to do. The same statement will be executed again. 1540 default: 1541 panic(fmt.Sprintf("unexpected advance code: %s", advInfo.code)) 1542 } 1543 1544 if err := ex.updateTxnRewindPosMaybe(ctx, cmd, pos, advInfo); err != nil { 1545 return err 1546 } 1547 1548 if rewindCapability, canRewind := ex.getRewindTxnCapability(); !canRewind { 1549 // Trim statements that cannot be retried to reclaim memory. 1550 ex.stmtBuf.ltrim(ctx, pos) 1551 } else { 1552 rewindCapability.close() 1553 } 1554 1555 if ex.server.cfg.TestingKnobs.AfterExecCmd != nil { 1556 ex.server.cfg.TestingKnobs.AfterExecCmd(ctx, cmd, ex.stmtBuf) 1557 } 1558 1559 return nil 1560 } 1561 1562 func (ex *connExecutor) idleConn() bool { 1563 switch ex.machine.CurState().(type) { 1564 case stateNoTxn: 1565 return true 1566 case stateInternalError: 1567 return true 1568 default: 1569 return false 1570 } 1571 } 1572 1573 // updateTxnRewindPosMaybe checks whether the ex.extraTxnState.txnRewindPos 1574 // should be advanced, based on the advInfo produced by running cmd at position 1575 // pos. 1576 func (ex *connExecutor) updateTxnRewindPosMaybe( 1577 ctx context.Context, cmd Command, pos CmdPos, advInfo advanceInfo, 1578 ) error { 1579 // txnRewindPos is only maintained while in stateOpen. 1580 if _, ok := ex.machine.CurState().(stateOpen); !ok { 1581 return nil 1582 } 1583 if advInfo.txnEvent == txnStart || advInfo.txnEvent == txnRestart { 1584 var nextPos CmdPos 1585 switch advInfo.code { 1586 case stayInPlace: 1587 nextPos = pos 1588 case advanceOne: 1589 // Future rewinds will refer to the next position; the statement that 1590 // started the transaction (i.e. BEGIN) will not be itself be executed 1591 // again. 1592 nextPos = pos + 1 1593 case rewind: 1594 if advInfo.rewCap.rewindPos != ex.extraTxnState.txnRewindPos { 1595 return errors.AssertionFailedf( 1596 "unexpected rewind position: %d when txn start is: %d", 1597 errors.Safe(advInfo.rewCap.rewindPos), 1598 errors.Safe(ex.extraTxnState.txnRewindPos)) 1599 } 1600 // txnRewindPos stays unchanged. 1601 return nil 1602 default: 1603 return errors.AssertionFailedf( 1604 "unexpected advance code when starting a txn: %s", 1605 errors.Safe(advInfo.code)) 1606 } 1607 ex.setTxnRewindPos(ctx, nextPos) 1608 } else { 1609 // See if we can advance the rewind point even if this is not the point 1610 // where the transaction started. We can do that after running a special 1611 // statement (e.g. SET TRANSACTION or SAVEPOINT) or after most commands that 1612 // don't execute statements. 1613 // The idea is that, for example, we don't want the following sequence to 1614 // disable retries for what comes after the sequence: 1615 // 1: PrepareStmt BEGIN 1616 // 2: BindStmt 1617 // 3: ExecutePortal 1618 // 4: Sync 1619 1620 // Note that the current command cannot influence the rewind point if 1621 // if the rewind point is not current set to the command's position 1622 // (i.e. we don't do anything if txnRewindPos != pos). 1623 1624 if advInfo.code != advanceOne { 1625 panic(fmt.Sprintf("unexpected advanceCode: %s", advInfo.code)) 1626 } 1627 1628 var canAdvance bool 1629 _, inOpen := ex.machine.CurState().(stateOpen) 1630 if inOpen && (ex.extraTxnState.txnRewindPos == pos) { 1631 switch tcmd := cmd.(type) { 1632 case ExecStmt: 1633 canAdvance = ex.stmtDoesntNeedRetry(tcmd.AST) 1634 case ExecPortal: 1635 portal := ex.extraTxnState.prepStmtsNamespace.portals[tcmd.Name] 1636 canAdvance = ex.stmtDoesntNeedRetry(portal.Stmt.AST) 1637 case PrepareStmt: 1638 canAdvance = true 1639 case DescribeStmt: 1640 canAdvance = true 1641 case BindStmt: 1642 canAdvance = true 1643 case DeletePreparedStmt: 1644 canAdvance = true 1645 case SendError: 1646 canAdvance = true 1647 case Sync: 1648 canAdvance = true 1649 case CopyIn: 1650 // Can't advance. 1651 case DrainRequest: 1652 canAdvance = true 1653 case Flush: 1654 canAdvance = true 1655 default: 1656 panic(fmt.Sprintf("unsupported cmd: %T", cmd)) 1657 } 1658 if canAdvance { 1659 ex.setTxnRewindPos(ctx, pos+1) 1660 } 1661 } 1662 } 1663 return nil 1664 } 1665 1666 // setTxnRewindPos updates the position to which future rewinds will refer. 1667 // 1668 // All statements with lower position in stmtBuf (if any) are removed, as we 1669 // won't ever need them again. 1670 func (ex *connExecutor) setTxnRewindPos(ctx context.Context, pos CmdPos) { 1671 if pos <= ex.extraTxnState.txnRewindPos { 1672 panic(fmt.Sprintf("can only move the txnRewindPos forward. "+ 1673 "Was: %d; new value: %d", ex.extraTxnState.txnRewindPos, pos)) 1674 } 1675 ex.extraTxnState.txnRewindPos = pos 1676 ex.stmtBuf.ltrim(ctx, pos) 1677 ex.commitPrepStmtNamespace(ctx) 1678 ex.extraTxnState.savepointsAtTxnRewindPos = ex.extraTxnState.savepoints.clone() 1679 } 1680 1681 // stmtDoesntNeedRetry returns true if the given statement does not need to be 1682 // retried when performing automatic retries. This means that the results of the 1683 // statement do not change with retries. 1684 func (ex *connExecutor) stmtDoesntNeedRetry(stmt tree.Statement) bool { 1685 wrap := Statement{Statement: parser.Statement{AST: stmt}} 1686 return isSavepoint(wrap) || isSetTransaction(wrap) 1687 } 1688 1689 func stateToTxnStatusIndicator(s fsm.State) TransactionStatusIndicator { 1690 switch s.(type) { 1691 case stateOpen: 1692 return InTxnBlock 1693 case stateAborted: 1694 return InFailedTxnBlock 1695 case stateNoTxn: 1696 return IdleTxnBlock 1697 case stateCommitWait: 1698 return InTxnBlock 1699 case stateInternalError: 1700 return InTxnBlock 1701 default: 1702 panic(fmt.Sprintf("unknown state: %T", s)) 1703 } 1704 } 1705 1706 // We handle the CopyFrom statement by creating a copyMachine and handing it 1707 // control over the connection until the copying is done. The contract is that, 1708 // when this is called, the pgwire.conn is not reading from the network 1709 // connection any more until this returns. The copyMachine will to the reading 1710 // and writing up to the CommandComplete message. 1711 func (ex *connExecutor) execCopyIn( 1712 ctx context.Context, cmd CopyIn, 1713 ) (fsm.Event, fsm.EventPayload, error) { 1714 1715 // When we're done, unblock the network connection. 1716 defer cmd.CopyDone.Done() 1717 1718 state := ex.machine.CurState() 1719 _, isNoTxn := state.(stateNoTxn) 1720 _, isOpen := state.(stateOpen) 1721 if !isNoTxn && !isOpen { 1722 ev := eventNonRetriableErr{IsCommit: fsm.False} 1723 payload := eventNonRetriableErrPayload{ 1724 err: sqlbase.NewTransactionAbortedError("" /* customMsg */)} 1725 return ev, payload, nil 1726 } 1727 1728 // If we're in an explicit txn, then the copying will be done within that 1729 // txn. Otherwise, we tell the copyMachine to manage its own transactions. 1730 var txnOpt copyTxnOpt 1731 if isOpen { 1732 txnOpt = copyTxnOpt{ 1733 txn: ex.state.mu.txn, 1734 txnTimestamp: ex.state.sqlTimestamp, 1735 stmtTimestamp: ex.server.cfg.Clock.PhysicalTime(), 1736 } 1737 } 1738 1739 var monToStop *mon.BytesMonitor 1740 defer func() { 1741 if monToStop != nil { 1742 monToStop.Stop(ctx) 1743 } 1744 }() 1745 if isNoTxn { 1746 // HACK: We're reaching inside ex.state and starting the monitor. Normally 1747 // that's driven by the state machine, but we're bypassing the state machine 1748 // here. 1749 ex.state.mon.Start(ctx, ex.sessionMon, mon.BoundAccount{} /* reserved */) 1750 monToStop = ex.state.mon 1751 } 1752 txnOpt.resetPlanner = func(ctx context.Context, p *planner, txn *kv.Txn, txnTS time.Time, stmtTS time.Time) { 1753 // HACK: We're reaching inside ex.state and changing sqlTimestamp by hand. 1754 // It is used by resetPlanner. Normally sqlTimestamp is updated by the 1755 // state machine, but the copyMachine manages its own transactions without 1756 // going through the state machine. 1757 ex.state.sqlTimestamp = txnTS 1758 ex.statsCollector = ex.newStatsCollector() 1759 ex.statsCollector.reset(&ex.server.sqlStats, ex.appStats, &ex.phaseTimes) 1760 ex.initPlanner(ctx, p) 1761 ex.resetPlanner(ctx, p, txn, stmtTS) 1762 } 1763 var cm copyMachineInterface 1764 var err error 1765 if table := cmd.Stmt.Table; table.Table() == fileUploadTable && table.Schema() == crdbInternalName { 1766 cm, err = newFileUploadMachine(ctx, cmd.Conn, cmd.Stmt, txnOpt, ex.server.cfg) 1767 } else { 1768 cm, err = newCopyMachine( 1769 ctx, cmd.Conn, cmd.Stmt, txnOpt, ex.server.cfg, 1770 // execInsertPlan 1771 func(ctx context.Context, p *planner, res RestrictedCommandResult) error { 1772 _, _, err := ex.execWithDistSQLEngine(ctx, p, tree.RowsAffected, res, false /* distribute */, nil /* progressAtomic */) 1773 return err 1774 }, 1775 ) 1776 } 1777 if err != nil { 1778 ev := eventNonRetriableErr{IsCommit: fsm.False} 1779 payload := eventNonRetriableErrPayload{err: err} 1780 return ev, payload, nil 1781 } 1782 if err := cm.run(ctx); err != nil { 1783 // TODO(andrei): We don't have a retriable error story for the copy machine. 1784 // When running outside of a txn, the copyMachine should probably do retries 1785 // internally. When not, it's unclear what we should do. For now, we abort 1786 // the txn (if any). 1787 // We also don't have a story for distinguishing communication errors (which 1788 // should terminate the connection) from query errors. For now, we treat all 1789 // errors as query errors. 1790 ev := eventNonRetriableErr{IsCommit: fsm.False} 1791 payload := eventNonRetriableErrPayload{err: err} 1792 return ev, payload, nil 1793 } 1794 return nil, nil, nil 1795 } 1796 1797 // stmtHasNoData returns true if describing a result of the input statement 1798 // type should return NoData. 1799 func stmtHasNoData(stmt tree.Statement) bool { 1800 return stmt == nil || stmt.StatementType() != tree.Rows 1801 } 1802 1803 // generateID generates a unique ID based on the SQL instance ID and its current 1804 // HLC timestamp. These IDs are either scoped at the query level or at the 1805 // session level. 1806 func (ex *connExecutor) generateID() ClusterWideID { 1807 return GenerateClusterWideID(ex.server.cfg.Clock.Now(), ex.server.cfg.NodeID.SQLInstanceID()) 1808 } 1809 1810 // commitPrepStmtNamespace deallocates everything in 1811 // prepStmtsNamespaceAtTxnRewindPos that's not part of prepStmtsNamespace. 1812 func (ex *connExecutor) commitPrepStmtNamespace(ctx context.Context) { 1813 ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos.resetTo( 1814 ctx, ex.extraTxnState.prepStmtsNamespace) 1815 } 1816 1817 // commitPrepStmtNamespace deallocates everything in prepStmtsNamespace that's 1818 // not part of prepStmtsNamespaceAtTxnRewindPos. 1819 func (ex *connExecutor) rewindPrepStmtNamespace(ctx context.Context) { 1820 ex.extraTxnState.prepStmtsNamespace.resetTo( 1821 ctx, ex.extraTxnState.prepStmtsNamespaceAtTxnRewindPos) 1822 } 1823 1824 // getRewindTxnCapability checks whether rewinding to the position previously 1825 // set through setTxnRewindPos() is possible and, if it is, returns a 1826 // rewindCapability bound to that position. The returned bool is true if the 1827 // rewind is possible. If it is, client communication is blocked until the 1828 // rewindCapability is exercised. 1829 func (ex *connExecutor) getRewindTxnCapability() (rewindCapability, bool) { 1830 cl := ex.clientComm.LockCommunication() 1831 1832 // If we already delivered results at or past the start position, we can't 1833 // rewind. 1834 if cl.ClientPos() >= ex.extraTxnState.txnRewindPos { 1835 cl.Close() 1836 return rewindCapability{}, false 1837 } 1838 return rewindCapability{ 1839 cl: cl, 1840 buf: ex.stmtBuf, 1841 rewindPos: ex.extraTxnState.txnRewindPos, 1842 }, true 1843 } 1844 1845 // isCommit returns true if stmt is a "COMMIT" statement. 1846 func isCommit(stmt tree.Statement) bool { 1847 _, ok := stmt.(*tree.CommitTransaction) 1848 return ok 1849 } 1850 1851 func errIsRetriable(err error) bool { 1852 return errors.HasType(err, (*roachpb.TransactionRetryWithProtoRefreshError)(nil)) 1853 } 1854 1855 // makeErrEvent takes an error and returns either an eventRetriableErr or an 1856 // eventNonRetriableErr, depending on the error type. 1857 func (ex *connExecutor) makeErrEvent(err error, stmt tree.Statement) (fsm.Event, fsm.EventPayload) { 1858 retriable := errIsRetriable(err) 1859 if retriable { 1860 rc, canAutoRetry := ex.getRewindTxnCapability() 1861 ev := eventRetriableErr{ 1862 IsCommit: fsm.FromBool(isCommit(stmt)), 1863 CanAutoRetry: fsm.FromBool(canAutoRetry), 1864 } 1865 payload := eventRetriableErrPayload{ 1866 err: err, 1867 rewCap: rc, 1868 } 1869 return ev, payload 1870 } 1871 ev := eventNonRetriableErr{ 1872 IsCommit: fsm.FromBool(isCommit(stmt)), 1873 } 1874 payload := eventNonRetriableErrPayload{err: err} 1875 return ev, payload 1876 } 1877 1878 // setTransactionModes implements the txnModesSetter interface. 1879 func (ex *connExecutor) setTransactionModes( 1880 modes tree.TransactionModes, asOfTs hlc.Timestamp, 1881 ) error { 1882 // This method cheats and manipulates ex.state directly, not through an event. 1883 // The alternative would be to create a special event, but it's unclear how 1884 // that'd work given that this method is called while executing a statement. 1885 1886 // Transform the transaction options into the types needed by the state 1887 // machine. 1888 if modes.UserPriority != tree.UnspecifiedUserPriority { 1889 pri := txnPriorityToProto(modes.UserPriority) 1890 if err := ex.state.setPriority(pri); err != nil { 1891 return err 1892 } 1893 } 1894 if modes.Isolation != tree.UnspecifiedIsolation && modes.Isolation != tree.SerializableIsolation { 1895 return errors.AssertionFailedf( 1896 "unknown isolation level: %s", errors.Safe(modes.Isolation)) 1897 } 1898 rwMode := modes.ReadWriteMode 1899 if modes.AsOf.Expr != nil && (asOfTs == hlc.Timestamp{}) { 1900 return errors.AssertionFailedf("expected an evaluated AS OF timestamp") 1901 } 1902 if (asOfTs != hlc.Timestamp{}) { 1903 ex.state.setHistoricalTimestamp(ex.Ctx(), asOfTs) 1904 ex.state.sqlTimestamp = asOfTs.GoTime() 1905 if rwMode == tree.UnspecifiedReadWriteMode { 1906 rwMode = tree.ReadOnly 1907 } 1908 } 1909 return ex.state.setReadOnlyMode(rwMode) 1910 } 1911 1912 func txnPriorityToProto(mode tree.UserPriority) roachpb.UserPriority { 1913 var pri roachpb.UserPriority 1914 switch mode { 1915 case tree.UnspecifiedUserPriority: 1916 pri = roachpb.NormalUserPriority 1917 case tree.Low: 1918 pri = roachpb.MinUserPriority 1919 case tree.Normal: 1920 pri = roachpb.NormalUserPriority 1921 case tree.High: 1922 pri = roachpb.MaxUserPriority 1923 default: 1924 log.Fatalf(context.Background(), "unknown user priority: %s", mode) 1925 } 1926 return pri 1927 } 1928 1929 func (ex *connExecutor) txnPriorityWithSessionDefault(mode tree.UserPriority) roachpb.UserPriority { 1930 if mode == tree.UnspecifiedUserPriority { 1931 mode = tree.UserPriority(ex.sessionData.DefaultTxnPriority) 1932 } 1933 return txnPriorityToProto(mode) 1934 } 1935 1936 func (ex *connExecutor) readWriteModeWithSessionDefault( 1937 mode tree.ReadWriteMode, 1938 ) tree.ReadWriteMode { 1939 if mode == tree.UnspecifiedReadWriteMode { 1940 if ex.sessionData.DefaultReadOnly { 1941 return tree.ReadOnly 1942 } 1943 return tree.ReadWrite 1944 } 1945 return mode 1946 } 1947 1948 // initEvalCtx initializes the fields of an extendedEvalContext that stay the 1949 // same across multiple statements. resetEvalCtx must also be called before each 1950 // statement, to reinitialize other fields. 1951 func (ex *connExecutor) initEvalCtx(ctx context.Context, evalCtx *extendedEvalContext, p *planner) { 1952 scInterface := newSchemaInterface(&ex.extraTxnState.descCollection, ex.server.cfg.VirtualSchemas) 1953 1954 ie := MakeInternalExecutor( 1955 ctx, 1956 ex.server, 1957 ex.memMetrics, 1958 ex.server.cfg.Settings, 1959 ) 1960 ie.SetSessionData(ex.sessionData) 1961 1962 *evalCtx = extendedEvalContext{ 1963 EvalContext: tree.EvalContext{ 1964 Planner: p, 1965 PrivilegedAccessor: p, 1966 SessionAccessor: p, 1967 ClientNoticeSender: p, 1968 Sequence: p, 1969 Tenant: p, 1970 SessionData: ex.sessionData, 1971 Settings: ex.server.cfg.Settings, 1972 TestingKnobs: ex.server.cfg.EvalContextTestingKnobs, 1973 ClusterID: ex.server.cfg.ClusterID(), 1974 ClusterName: ex.server.cfg.RPCContext.ClusterName(), 1975 NodeID: ex.server.cfg.NodeID, 1976 Codec: ex.server.cfg.Codec, 1977 Locality: ex.server.cfg.Locality, 1978 ReCache: ex.server.reCache, 1979 InternalExecutor: &ie, 1980 DB: ex.server.cfg.DB, 1981 }, 1982 SessionMutator: ex.dataMutator, 1983 VirtualSchemas: ex.server.cfg.VirtualSchemas, 1984 Tracing: &ex.sessionTracing, 1985 StatusServer: ex.server.cfg.StatusServer, 1986 MemMetrics: &ex.memMetrics, 1987 Descs: &ex.extraTxnState.descCollection, 1988 ExecCfg: ex.server.cfg, 1989 DistSQLPlanner: ex.server.cfg.DistSQLPlanner, 1990 TxnModesSetter: ex, 1991 Jobs: &ex.extraTxnState.jobs, 1992 schemaAccessors: scInterface, 1993 sqlStatsCollector: ex.statsCollector, 1994 } 1995 } 1996 1997 // resetEvalCtx initializes the fields of evalCtx that can change 1998 // during a session (i.e. the fields not set by initEvalCtx). 1999 // 2000 // stmtTS is the timestamp that the statement_timestamp() SQL builtin will 2001 // return for statements executed with this evalCtx. Since generally each 2002 // statement is supposed to have a different timestamp, the evalCtx generally 2003 // shouldn't be reused across statements. 2004 func (ex *connExecutor) resetEvalCtx(evalCtx *extendedEvalContext, txn *kv.Txn, stmtTS time.Time) { 2005 evalCtx.TxnState = ex.getTransactionState() 2006 evalCtx.TxnReadOnly = ex.state.readOnly 2007 evalCtx.TxnImplicit = ex.implicitTxn() 2008 evalCtx.StmtTimestamp = stmtTS 2009 evalCtx.TxnTimestamp = ex.state.sqlTimestamp 2010 evalCtx.Placeholders = nil 2011 evalCtx.Annotations = nil 2012 evalCtx.IVarContainer = nil 2013 evalCtx.Context = ex.Ctx() 2014 evalCtx.Txn = txn 2015 evalCtx.Mon = ex.state.mon 2016 evalCtx.PrepareOnly = false 2017 evalCtx.SkipNormalize = false 2018 } 2019 2020 // getTransactionState retrieves a text representation of the given state. 2021 func (ex *connExecutor) getTransactionState() string { 2022 state := ex.machine.CurState() 2023 if ex.implicitTxn() { 2024 // If the statement reading the state is in an implicit transaction, then we 2025 // want to tell NoTxn to the client. 2026 state = stateNoTxn{} 2027 } 2028 return state.(fmt.Stringer).String() 2029 } 2030 2031 func (ex *connExecutor) implicitTxn() bool { 2032 state := ex.machine.CurState() 2033 os, ok := state.(stateOpen) 2034 return ok && os.ImplicitTxn.Get() 2035 } 2036 2037 // initPlanner initializes a planner so it can can be used for planning a 2038 // query in the context of this session. 2039 func (ex *connExecutor) initPlanner(ctx context.Context, p *planner) { 2040 p.cancelChecker = sqlbase.NewCancelChecker(ctx) 2041 2042 ex.initEvalCtx(ctx, &p.extendedEvalCtx, p) 2043 2044 p.sessionDataMutator = ex.dataMutator 2045 p.noticeSender = nil 2046 p.preparedStatements = ex.getPrepStmtsAccessor() 2047 2048 p.queryCacheSession.Init() 2049 p.optPlanningCtx.init(p) 2050 } 2051 2052 func (ex *connExecutor) resetPlanner( 2053 ctx context.Context, p *planner, txn *kv.Txn, stmtTS time.Time, 2054 ) { 2055 p.txn = txn 2056 p.stmt = nil 2057 2058 p.cancelChecker.Reset(ctx) 2059 2060 p.semaCtx = tree.MakeSemaContext() 2061 p.semaCtx.Location = &ex.sessionData.DataConversion.Location 2062 p.semaCtx.SearchPath = ex.sessionData.SearchPath 2063 p.semaCtx.AsOfTimestamp = nil 2064 p.semaCtx.Annotations = nil 2065 p.semaCtx.TypeResolver = p 2066 2067 ex.resetEvalCtx(&p.extendedEvalCtx, txn, stmtTS) 2068 2069 p.autoCommit = false 2070 p.isPreparing = false 2071 p.avoidCachedDescriptors = false 2072 p.discardRows = false 2073 p.collectBundle = false 2074 } 2075 2076 // txnStateTransitionsApplyWrapper is a wrapper on top of Machine built with the 2077 // TxnStateTransitions above. Its point is to detect when we go in and out of 2078 // transactions and update some state. 2079 // 2080 // Any returned error indicates an unrecoverable error for the session; 2081 // execution on this connection should be interrupted. 2082 func (ex *connExecutor) txnStateTransitionsApplyWrapper( 2083 ev fsm.Event, payload fsm.EventPayload, res ResultBase, pos CmdPos, 2084 ) (advanceInfo, error) { 2085 var implicitTxn bool 2086 if os, ok := ex.machine.CurState().(stateOpen); ok { 2087 implicitTxn = os.ImplicitTxn.Get() 2088 } 2089 2090 err := ex.machine.ApplyWithPayload(withStatement(ex.Ctx(), ex.curStmt), ev, payload) 2091 if err != nil { 2092 if errors.HasType(err, (*fsm.TransitionNotFoundError)(nil)) { 2093 panic(err) 2094 } 2095 return advanceInfo{}, err 2096 } 2097 2098 advInfo := ex.state.consumeAdvanceInfo() 2099 2100 if advInfo.code == rewind { 2101 ex.extraTxnState.autoRetryCounter++ 2102 } 2103 2104 // Handle transaction events which cause updates to txnState. 2105 switch advInfo.txnEvent { 2106 case noEvent: 2107 case txnStart: 2108 ex.extraTxnState.autoRetryCounter = 0 2109 ex.extraTxnState.onTxnFinish = ex.recordTransactionStart() 2110 case txnCommit: 2111 if res.Err() != nil { 2112 err := errorutil.UnexpectedWithIssueErrorf( 2113 26687, 2114 "programming error: non-error event %s generated even though res.Err() has been set to: %s", 2115 errors.Safe(advInfo.txnEvent.String()), 2116 res.Err()) 2117 log.Errorf(ex.Ctx(), "%v", err) 2118 errorutil.SendReport(ex.Ctx(), &ex.server.cfg.Settings.SV, err) 2119 return advanceInfo{}, err 2120 } 2121 2122 handleErr := func(err error) { 2123 if implicitTxn { 2124 // The schema change/job failed but it was also the only 2125 // operation in the transaction. In this case, the transaction's 2126 // error is the schema change error. 2127 // TODO (lucy): I'm not sure the above is true. What about DROP TABLE 2128 // with multiple tables? 2129 res.SetError(err) 2130 } else { 2131 // The schema change/job failed but everything else in the 2132 // transaction was actually committed successfully already. At 2133 // this point, it is too late to cancel the transaction. In 2134 // effect, we have violated the "A" of ACID. 2135 // 2136 // This situation is sufficiently serious that we cannot let the 2137 // error that caused the schema change to fail flow back to the 2138 // client as-is. We replace it by a custom code dedicated to 2139 // this situation. Replacement occurs because this error code is 2140 // a "serious error" and the code computation logic will give it 2141 // a higher priority. 2142 // 2143 // We also print out the original error code as prefix of the 2144 // error message, in case it was a serious error. 2145 newErr := pgerror.Wrapf(err, 2146 pgcode.TransactionCommittedWithSchemaChangeFailure, 2147 "transaction committed but schema change aborted with error: (%s)", 2148 pgerror.GetPGCode(err)) 2149 newErr = errors.WithHint(newErr, 2150 "Some of the non-DDL statements may have committed successfully, "+ 2151 "but some of the DDL statement(s) failed.\nManual inspection may be "+ 2152 "required to determine the actual state of the database.") 2153 newErr = errors.WithIssueLink(newErr, 2154 errors.IssueLink{IssueURL: "https://github.com/cockroachdb/cockroach/issues/42061"}) 2155 res.SetError(newErr) 2156 } 2157 } 2158 ex.notifyStatsRefresherOfNewTables(ex.Ctx()) 2159 2160 if err := ex.server.cfg.JobRegistry.Run( 2161 ex.ctxHolder.connCtx, 2162 ex.server.cfg.InternalExecutor, 2163 ex.extraTxnState.jobs); err != nil { 2164 handleErr(err) 2165 } 2166 2167 // Wait for the cache to reflect the dropped databases if any. 2168 ex.extraTxnState.descCollection.WaitForCacheToDropDatabases(ex.Ctx()) 2169 2170 fallthrough 2171 case txnRestart, txnRollback: 2172 if err := ex.resetExtraTxnState(ex.Ctx(), ex.server.dbCache, advInfo.txnEvent); err != nil { 2173 return advanceInfo{}, err 2174 } 2175 default: 2176 return advanceInfo{}, errors.AssertionFailedf( 2177 "unexpected event: %v", errors.Safe(advInfo.txnEvent)) 2178 } 2179 2180 return advInfo, nil 2181 } 2182 2183 // initStatementResult initializes res according to a query. 2184 // 2185 // cols represents the columns of the result rows. Should be nil if 2186 // stmt.AST.StatementType() != tree.Rows. 2187 // 2188 // If an error is returned, it is to be considered a query execution error. 2189 func (ex *connExecutor) initStatementResult( 2190 ctx context.Context, res RestrictedCommandResult, stmt *Statement, cols sqlbase.ResultColumns, 2191 ) error { 2192 for _, c := range cols { 2193 if err := checkResultType(c.Typ); err != nil { 2194 return err 2195 } 2196 } 2197 if stmt.AST.StatementType() == tree.Rows { 2198 // Note that this call is necessary even if cols is nil. 2199 res.SetColumns(ctx, cols) 2200 } 2201 return nil 2202 } 2203 2204 // newStatsCollector returns a sqlStatsCollector that will record stats in the 2205 // session's stats containers. 2206 func (ex *connExecutor) newStatsCollector() *sqlStatsCollector { 2207 return newSQLStatsCollector(&ex.server.sqlStats, ex.appStats, &ex.phaseTimes) 2208 } 2209 2210 // cancelQuery is part of the registrySession interface. 2211 func (ex *connExecutor) cancelQuery(queryID ClusterWideID) bool { 2212 ex.mu.Lock() 2213 defer ex.mu.Unlock() 2214 if queryMeta, exists := ex.mu.ActiveQueries[queryID]; exists { 2215 queryMeta.cancel() 2216 return true 2217 } 2218 return false 2219 } 2220 2221 // cancelSession is part of the registrySession interface. 2222 func (ex *connExecutor) cancelSession() { 2223 if ex.onCancelSession == nil { 2224 return 2225 } 2226 // TODO(abhimadan): figure out how to send a nice error message to the client. 2227 ex.onCancelSession() 2228 } 2229 2230 // user is part of the registrySession interface. 2231 func (ex *connExecutor) user() string { 2232 return ex.sessionData.User 2233 } 2234 2235 // serialize is part of the registrySession interface. 2236 func (ex *connExecutor) serialize() serverpb.Session { 2237 ex.mu.RLock() 2238 defer ex.mu.RUnlock() 2239 ex.state.mu.RLock() 2240 defer ex.state.mu.RUnlock() 2241 2242 var kvTxnID *uuid.UUID 2243 var activeTxnInfo *serverpb.TxnInfo 2244 txn := ex.state.mu.txn 2245 if txn != nil { 2246 id := txn.ID() 2247 kvTxnID = &id 2248 activeTxnInfo = &serverpb.TxnInfo{ 2249 ID: id, 2250 Start: ex.state.mu.txnStart, 2251 TxnDescription: txn.String(), 2252 } 2253 } 2254 2255 activeQueries := make([]serverpb.ActiveQuery, 0, len(ex.mu.ActiveQueries)) 2256 truncateSQL := func(sql string) string { 2257 if len(sql) > MaxSQLBytes { 2258 sql = sql[:MaxSQLBytes-utf8.RuneLen('…')] 2259 // Ensure the resulting string is valid utf8. 2260 for { 2261 if r, _ := utf8.DecodeLastRuneInString(sql); r != utf8.RuneError { 2262 break 2263 } 2264 sql = sql[:len(sql)-1] 2265 } 2266 sql += "…" 2267 } 2268 return sql 2269 } 2270 2271 for id, query := range ex.mu.ActiveQueries { 2272 if query.hidden { 2273 continue 2274 } 2275 sql := truncateSQL(query.getStatement()) 2276 progress := math.Float64frombits(atomic.LoadUint64(&query.progressAtomic)) 2277 activeQueries = append(activeQueries, serverpb.ActiveQuery{ 2278 TxnID: query.txnID, 2279 ID: id.String(), 2280 Start: query.start.UTC(), 2281 Sql: sql, 2282 IsDistributed: query.isDistributed, 2283 Phase: (serverpb.ActiveQuery_Phase)(query.phase), 2284 Progress: float32(progress), 2285 }) 2286 } 2287 lastActiveQuery := "" 2288 if ex.mu.LastActiveQuery != nil { 2289 lastActiveQuery = truncateSQL(ex.mu.LastActiveQuery.String()) 2290 } 2291 2292 remoteStr := "<admin>" 2293 if ex.sessionData.RemoteAddr != nil { 2294 remoteStr = ex.sessionData.RemoteAddr.String() 2295 } 2296 2297 return serverpb.Session{ 2298 Username: ex.sessionData.User, 2299 ClientAddress: remoteStr, 2300 ApplicationName: ex.applicationName.Load().(string), 2301 Start: ex.phaseTimes[sessionInit].UTC(), 2302 ActiveQueries: activeQueries, 2303 ActiveTxn: activeTxnInfo, 2304 KvTxnID: kvTxnID, 2305 LastActiveQuery: lastActiveQuery, 2306 ID: ex.sessionID.GetBytes(), 2307 AllocBytes: ex.mon.AllocBytes(), 2308 MaxAllocBytes: ex.mon.MaximumBytes(), 2309 } 2310 } 2311 2312 func (ex *connExecutor) getPrepStmtsAccessor() preparedStatementsAccessor { 2313 return connExPrepStmtsAccessor{ 2314 ex: ex, 2315 } 2316 } 2317 2318 // sessionEventf logs a message to the session event log (if any). 2319 func (ex *connExecutor) sessionEventf(ctx context.Context, format string, args ...interface{}) { 2320 if log.ExpensiveLogEnabled(ctx, 2) { 2321 log.VEventfDepth(ctx, 1 /* depth */, 2 /* level */, format, args...) 2322 } 2323 if ex.eventLog != nil { 2324 ex.eventLog.Printf(format, args...) 2325 } 2326 } 2327 2328 // notifyStatsRefresherOfNewTables is called on txn commit to inform 2329 // the stats refresher that new tables exist and should have their stats 2330 // collected now. 2331 func (ex *connExecutor) notifyStatsRefresherOfNewTables(ctx context.Context) { 2332 for _, desc := range ex.extraTxnState.descCollection.GetTableDescsWithNewVersion() { 2333 // The CREATE STATISTICS run for an async CTAS query is initiated by the 2334 // SchemaChanger, so we don't do it here. 2335 if desc.IsTable() && !desc.IsAs() { 2336 // Initiate a run of CREATE STATISTICS. We use a large number 2337 // for rowsAffected because we want to make sure that stats always get 2338 // created/refreshed here. 2339 ex.planner.execCfg.StatsRefresher. 2340 NotifyMutation(desc.ID, math.MaxInt32 /* rowsAffected */) 2341 } 2342 } 2343 } 2344 2345 // StatementCounters groups metrics for counting different types of 2346 // statements. 2347 type StatementCounters struct { 2348 // QueryCount includes all statements and it is therefore the sum of 2349 // all the below metrics. 2350 QueryCount telemetry.CounterWithMetric 2351 2352 // Basic CRUD statements. 2353 SelectCount telemetry.CounterWithMetric 2354 UpdateCount telemetry.CounterWithMetric 2355 InsertCount telemetry.CounterWithMetric 2356 DeleteCount telemetry.CounterWithMetric 2357 2358 // Transaction operations. 2359 TxnBeginCount telemetry.CounterWithMetric 2360 TxnCommitCount telemetry.CounterWithMetric 2361 TxnRollbackCount telemetry.CounterWithMetric 2362 2363 // Savepoint operations. SavepointCount is for real SQL savepoints; 2364 // the RestartSavepoint variants are for the 2365 // cockroach-specific client-side retry protocol. 2366 SavepointCount telemetry.CounterWithMetric 2367 ReleaseSavepointCount telemetry.CounterWithMetric 2368 RollbackToSavepointCount telemetry.CounterWithMetric 2369 RestartSavepointCount telemetry.CounterWithMetric 2370 ReleaseRestartSavepointCount telemetry.CounterWithMetric 2371 RollbackToRestartSavepointCount telemetry.CounterWithMetric 2372 2373 // DdlCount counts all statements whose StatementType is DDL. 2374 DdlCount telemetry.CounterWithMetric 2375 2376 // MiscCount counts all statements not covered by a more specific stat above. 2377 MiscCount telemetry.CounterWithMetric 2378 } 2379 2380 func makeStartedStatementCounters(internal bool) StatementCounters { 2381 return StatementCounters{ 2382 TxnBeginCount: telemetry.NewCounterWithMetric( 2383 getMetricMeta(MetaTxnBeginStarted, internal)), 2384 TxnCommitCount: telemetry.NewCounterWithMetric( 2385 getMetricMeta(MetaTxnCommitStarted, internal)), 2386 TxnRollbackCount: telemetry.NewCounterWithMetric( 2387 getMetricMeta(MetaTxnRollbackStarted, internal)), 2388 RestartSavepointCount: telemetry.NewCounterWithMetric( 2389 getMetricMeta(MetaRestartSavepointStarted, internal)), 2390 ReleaseRestartSavepointCount: telemetry.NewCounterWithMetric( 2391 getMetricMeta(MetaReleaseRestartSavepointStarted, internal)), 2392 RollbackToRestartSavepointCount: telemetry.NewCounterWithMetric( 2393 getMetricMeta(MetaRollbackToRestartSavepointStarted, internal)), 2394 SavepointCount: telemetry.NewCounterWithMetric( 2395 getMetricMeta(MetaSavepointStarted, internal)), 2396 ReleaseSavepointCount: telemetry.NewCounterWithMetric( 2397 getMetricMeta(MetaReleaseSavepointStarted, internal)), 2398 RollbackToSavepointCount: telemetry.NewCounterWithMetric( 2399 getMetricMeta(MetaRollbackToSavepointStarted, internal)), 2400 SelectCount: telemetry.NewCounterWithMetric( 2401 getMetricMeta(MetaSelectStarted, internal)), 2402 UpdateCount: telemetry.NewCounterWithMetric( 2403 getMetricMeta(MetaUpdateStarted, internal)), 2404 InsertCount: telemetry.NewCounterWithMetric( 2405 getMetricMeta(MetaInsertStarted, internal)), 2406 DeleteCount: telemetry.NewCounterWithMetric( 2407 getMetricMeta(MetaDeleteStarted, internal)), 2408 DdlCount: telemetry.NewCounterWithMetric( 2409 getMetricMeta(MetaDdlStarted, internal)), 2410 MiscCount: telemetry.NewCounterWithMetric( 2411 getMetricMeta(MetaMiscStarted, internal)), 2412 QueryCount: telemetry.NewCounterWithMetric( 2413 getMetricMeta(MetaQueryStarted, internal)), 2414 } 2415 } 2416 2417 func makeExecutedStatementCounters(internal bool) StatementCounters { 2418 return StatementCounters{ 2419 TxnBeginCount: telemetry.NewCounterWithMetric( 2420 getMetricMeta(MetaTxnBeginExecuted, internal)), 2421 TxnCommitCount: telemetry.NewCounterWithMetric( 2422 getMetricMeta(MetaTxnCommitExecuted, internal)), 2423 TxnRollbackCount: telemetry.NewCounterWithMetric( 2424 getMetricMeta(MetaTxnRollbackExecuted, internal)), 2425 RestartSavepointCount: telemetry.NewCounterWithMetric( 2426 getMetricMeta(MetaRestartSavepointExecuted, internal)), 2427 ReleaseRestartSavepointCount: telemetry.NewCounterWithMetric( 2428 getMetricMeta(MetaReleaseRestartSavepointExecuted, internal)), 2429 RollbackToRestartSavepointCount: telemetry.NewCounterWithMetric( 2430 getMetricMeta(MetaRollbackToRestartSavepointExecuted, internal)), 2431 SavepointCount: telemetry.NewCounterWithMetric( 2432 getMetricMeta(MetaSavepointExecuted, internal)), 2433 ReleaseSavepointCount: telemetry.NewCounterWithMetric( 2434 getMetricMeta(MetaReleaseSavepointExecuted, internal)), 2435 RollbackToSavepointCount: telemetry.NewCounterWithMetric( 2436 getMetricMeta(MetaRollbackToSavepointExecuted, internal)), 2437 SelectCount: telemetry.NewCounterWithMetric( 2438 getMetricMeta(MetaSelectExecuted, internal)), 2439 UpdateCount: telemetry.NewCounterWithMetric( 2440 getMetricMeta(MetaUpdateExecuted, internal)), 2441 InsertCount: telemetry.NewCounterWithMetric( 2442 getMetricMeta(MetaInsertExecuted, internal)), 2443 DeleteCount: telemetry.NewCounterWithMetric( 2444 getMetricMeta(MetaDeleteExecuted, internal)), 2445 DdlCount: telemetry.NewCounterWithMetric( 2446 getMetricMeta(MetaDdlExecuted, internal)), 2447 MiscCount: telemetry.NewCounterWithMetric( 2448 getMetricMeta(MetaMiscExecuted, internal)), 2449 QueryCount: telemetry.NewCounterWithMetric( 2450 getMetricMeta(MetaQueryExecuted, internal)), 2451 } 2452 } 2453 2454 func (sc *StatementCounters) incrementCount(ex *connExecutor, stmt tree.Statement) { 2455 sc.QueryCount.Inc() 2456 switch t := stmt.(type) { 2457 case *tree.BeginTransaction: 2458 sc.TxnBeginCount.Inc() 2459 case *tree.Select: 2460 sc.SelectCount.Inc() 2461 case *tree.Update: 2462 sc.UpdateCount.Inc() 2463 case *tree.Insert: 2464 sc.InsertCount.Inc() 2465 case *tree.Delete: 2466 sc.DeleteCount.Inc() 2467 case *tree.CommitTransaction: 2468 sc.TxnCommitCount.Inc() 2469 case *tree.RollbackTransaction: 2470 sc.TxnRollbackCount.Inc() 2471 case *tree.Savepoint: 2472 if ex.isCommitOnReleaseSavepoint(t.Name) { 2473 sc.RestartSavepointCount.Inc() 2474 } else { 2475 sc.SavepointCount.Inc() 2476 } 2477 case *tree.ReleaseSavepoint: 2478 if ex.isCommitOnReleaseSavepoint(t.Savepoint) { 2479 sc.ReleaseRestartSavepointCount.Inc() 2480 } else { 2481 sc.ReleaseSavepointCount.Inc() 2482 } 2483 case *tree.RollbackToSavepoint: 2484 if ex.isCommitOnReleaseSavepoint(t.Savepoint) { 2485 sc.RollbackToRestartSavepointCount.Inc() 2486 } else { 2487 sc.RollbackToSavepointCount.Inc() 2488 } 2489 default: 2490 if tree.CanModifySchema(stmt) { 2491 sc.DdlCount.Inc() 2492 } else { 2493 sc.MiscCount.Inc() 2494 } 2495 } 2496 } 2497 2498 // connExPrepStmtsAccessor is an implementation of preparedStatementsAccessor 2499 // that gives access to a connExecutor's prepared statements. 2500 type connExPrepStmtsAccessor struct { 2501 ex *connExecutor 2502 } 2503 2504 var _ preparedStatementsAccessor = connExPrepStmtsAccessor{} 2505 2506 // List is part of the preparedStatementsAccessor interface. 2507 func (ps connExPrepStmtsAccessor) List() map[string]*PreparedStatement { 2508 // Return a copy of the data, to prevent modification of the map. 2509 stmts := ps.ex.extraTxnState.prepStmtsNamespace.prepStmts 2510 ret := make(map[string]*PreparedStatement, len(stmts)) 2511 for key, stmt := range stmts { 2512 ret[key] = stmt 2513 } 2514 return ret 2515 } 2516 2517 // Get is part of the preparedStatementsAccessor interface. 2518 func (ps connExPrepStmtsAccessor) Get(name string) (*PreparedStatement, bool) { 2519 s, ok := ps.ex.extraTxnState.prepStmtsNamespace.prepStmts[name] 2520 return s, ok 2521 } 2522 2523 // Delete is part of the preparedStatementsAccessor interface. 2524 func (ps connExPrepStmtsAccessor) Delete(ctx context.Context, name string) bool { 2525 _, ok := ps.Get(name) 2526 if !ok { 2527 return false 2528 } 2529 ps.ex.deletePreparedStmt(ctx, name) 2530 return true 2531 } 2532 2533 // DeleteAll is part of the preparedStatementsAccessor interface. 2534 func (ps connExPrepStmtsAccessor) DeleteAll(ctx context.Context) { 2535 ps.ex.extraTxnState.prepStmtsNamespace.resetTo(ctx, prepStmtNamespace{}) 2536 } 2537 2538 // contextStatementKey is an empty type for the handle associated with the 2539 // statement value (see context.Value). 2540 type contextStatementKey struct{} 2541 2542 // withStatement adds a SQL statement to the provided context. The statement 2543 // will then be included in crash reports which use that context. 2544 func withStatement(ctx context.Context, stmt tree.Statement) context.Context { 2545 return context.WithValue(ctx, contextStatementKey{}, stmt) 2546 } 2547 2548 // statementFromCtx returns the statement value from a context, or nil if unset. 2549 func statementFromCtx(ctx context.Context) tree.Statement { 2550 stmt := ctx.Value(contextStatementKey{}) 2551 if stmt == nil { 2552 return nil 2553 } 2554 return stmt.(tree.Statement) 2555 } 2556 2557 func init() { 2558 // Register a function to include the anonymized statement in crash reports. 2559 log.RegisterTagFn("statement", func(ctx context.Context) string { 2560 stmt := statementFromCtx(ctx) 2561 if stmt == nil { 2562 return "" 2563 } 2564 // Anonymize the statement for reporting. 2565 return anonymizeStmtAndConstants(stmt) 2566 }) 2567 }