github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/executor_statement_metrics.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    18  	"github.com/cockroachdb/cockroach/pkg/util/log"
    19  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    20  )
    21  
    22  // SQL execution is separated in 3+ phases:
    23  // - parse/prepare
    24  // - plan
    25  // - run
    26  //
    27  // The commonly used term "execution latency" encompasses this entire
    28  // process. However for the purpose of analyzing / optimizing
    29  // individual parts of the SQL execution engine, it is useful to
    30  // separate the durations of these individual phases. The code below
    31  // does this.
    32  
    33  // sessionPhase is used to index the Session.phaseTimes array.
    34  type sessionPhase int
    35  
    36  const (
    37  	// When the session is created (pgwire). Used to compute
    38  	// the session age.
    39  	sessionInit sessionPhase = iota
    40  
    41  	// Executor phases.
    42  	sessionQueryReceived    // Query is received.
    43  	sessionStartParse       // Parse starts.
    44  	sessionEndParse         // Parse ends.
    45  	plannerStartLogicalPlan // Planning starts.
    46  	plannerEndLogicalPlan   // Planning ends.
    47  	plannerStartExecStmt    // Execution starts.
    48  	plannerEndExecStmt      // Execution ends.
    49  
    50  	// sessionNumPhases must be listed last so that it can be used to
    51  	// define arrays sufficiently large to hold all the other values.
    52  	sessionNumPhases
    53  )
    54  
    55  // phaseTimes is the type of the session.phaseTimes array.
    56  //
    57  // It's important that this is an array and not a slice, as we rely on the array
    58  // copy behavior.
    59  type phaseTimes [sessionNumPhases]time.Time
    60  
    61  // EngineMetrics groups a set of SQL metrics.
    62  type EngineMetrics struct {
    63  	// The subset of SELECTs that are processed through DistSQL.
    64  	DistSQLSelectCount *metric.Counter
    65  	// The subset of queries which we attempted and failed to plan with the
    66  	// cost-based optimizer.
    67  	SQLOptFallbackCount   *metric.Counter
    68  	SQLOptPlanCacheHits   *metric.Counter
    69  	SQLOptPlanCacheMisses *metric.Counter
    70  
    71  	DistSQLExecLatency    *metric.Histogram
    72  	SQLExecLatency        *metric.Histogram
    73  	DistSQLServiceLatency *metric.Histogram
    74  	SQLServiceLatency     *metric.Histogram
    75  	SQLTxnLatency         *metric.Histogram
    76  
    77  	// TxnAbortCount counts transactions that were aborted, either due
    78  	// to non-retriable errors, or retriable errors when the client-side
    79  	// retry protocol is not in use.
    80  	TxnAbortCount *metric.Counter
    81  
    82  	// FailureCount counts non-retriable errors in open transactions.
    83  	FailureCount *metric.Counter
    84  }
    85  
    86  // EngineMetrics implements the metric.Struct interface
    87  var _ metric.Struct = EngineMetrics{}
    88  
    89  // MetricStruct is part of the metric.Struct interface.
    90  func (EngineMetrics) MetricStruct() {}
    91  
    92  // recordStatementSummery gathers various details pertaining to the
    93  // last executed statement/query and performs the associated
    94  // accounting in the passed-in EngineMetrics.
    95  // - distSQLUsed reports whether the query was distributed.
    96  // - automaticRetryCount is the count of implicit txn retries
    97  //   so far.
    98  // - result is the result set computed by the query/statement.
    99  // - err is the error encountered, if any.
   100  func (ex *connExecutor) recordStatementSummary(
   101  	ctx context.Context,
   102  	planner *planner,
   103  	automaticRetryCount int,
   104  	rowsAffected int,
   105  	err error,
   106  	bytesRead int64,
   107  	rowsRead int64,
   108  ) {
   109  	phaseTimes := &ex.statsCollector.phaseTimes
   110  
   111  	// Compute the run latency. This is always recorded in the
   112  	// server metrics.
   113  	runLatRaw := phaseTimes[plannerEndExecStmt].Sub(phaseTimes[plannerStartExecStmt])
   114  
   115  	// Collect the statistics.
   116  	runLat := runLatRaw.Seconds()
   117  
   118  	parseLat := phaseTimes[sessionEndParse].
   119  		Sub(phaseTimes[sessionStartParse]).Seconds()
   120  	planLat := phaseTimes[plannerEndLogicalPlan].
   121  		Sub(phaseTimes[plannerStartLogicalPlan]).Seconds()
   122  	// service latency: time query received to end of run
   123  	svcLatRaw := phaseTimes[plannerEndExecStmt].Sub(phaseTimes[sessionQueryReceived])
   124  	svcLat := svcLatRaw.Seconds()
   125  
   126  	// processing latency: contributing towards SQL results.
   127  	processingLat := parseLat + planLat + runLat
   128  
   129  	// overhead latency: txn/retry management, error checking, etc
   130  	execOverhead := svcLat - processingLat
   131  
   132  	stmt := planner.stmt
   133  	flags := planner.curPlan.flags
   134  	if automaticRetryCount == 0 {
   135  		ex.updateOptCounters(flags)
   136  		m := &ex.metrics.EngineMetrics
   137  		if flags.IsSet(planFlagDistributed) {
   138  			if _, ok := stmt.AST.(*tree.Select); ok {
   139  				m.DistSQLSelectCount.Inc(1)
   140  			}
   141  			m.DistSQLExecLatency.RecordValue(runLatRaw.Nanoseconds())
   142  			m.DistSQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds())
   143  		}
   144  		m.SQLExecLatency.RecordValue(runLatRaw.Nanoseconds())
   145  		m.SQLServiceLatency.RecordValue(svcLatRaw.Nanoseconds())
   146  	}
   147  
   148  	ex.statsCollector.recordStatement(
   149  		stmt, planner.curPlan.instrumentation.savedPlanForStats,
   150  		flags.IsSet(planFlagDistributed), flags.IsSet(planFlagImplicitTxn),
   151  		automaticRetryCount, rowsAffected, err,
   152  		parseLat, planLat, runLat, svcLat, execOverhead, bytesRead, rowsRead,
   153  	)
   154  
   155  	if log.V(2) {
   156  		// ages since significant epochs
   157  		sessionAge := phaseTimes[plannerEndExecStmt].
   158  			Sub(phaseTimes[sessionInit]).Seconds()
   159  
   160  		log.Infof(ctx,
   161  			"query stats: %d rows, %d retries, "+
   162  				"parse %.2fµs (%.1f%%), "+
   163  				"plan %.2fµs (%.1f%%), "+
   164  				"run %.2fµs (%.1f%%), "+
   165  				"overhead %.2fµs (%.1f%%), "+
   166  				"session age %.4fs",
   167  			rowsAffected, automaticRetryCount,
   168  			parseLat*1e6, 100*parseLat/svcLat,
   169  			planLat*1e6, 100*planLat/svcLat,
   170  			runLat*1e6, 100*runLat/svcLat,
   171  			execOverhead*1e6, 100*execOverhead/svcLat,
   172  			sessionAge,
   173  		)
   174  	}
   175  }
   176  
   177  func (ex *connExecutor) updateOptCounters(planFlags planFlags) {
   178  	m := &ex.metrics.EngineMetrics
   179  
   180  	if planFlags.IsSet(planFlagOptCacheHit) {
   181  		m.SQLOptPlanCacheHits.Inc(1)
   182  	} else if planFlags.IsSet(planFlagOptCacheMiss) {
   183  		m.SQLOptPlanCacheMisses.Inc(1)
   184  	}
   185  }