github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/observer/tidb.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package observer
    15  
    16  import (
    17  	"context"
    18  	"database/sql"
    19  	"strconv"
    20  
    21  	"github.com/pingcap/log"
    22  	"github.com/pingcap/tiflow/pkg/errors"
    23  	"go.uber.org/zap"
    24  )
    25  
    26  var (
    27  	// Query latest tidb connection idle duration, sample output:
    28  	// +----------------------------+------------------+--------+----------+-----------------------+
    29  	// | time                       | instance         | in_txn | quantile | value                 |
    30  	// +----------------------------+------------------+--------+----------+-----------------------+
    31  	// | 2023-01-30 17:42:23.918000 | 10.2.6.127:11080 | 0      |      0.9 |    0.4613936714347638 |
    32  	// | 2023-01-30 17:42:23.918000 | 10.2.6.127:11080 | 1      |      0.9 | 0.0007897614642526763 |
    33  	// | 2023-01-30 17:42:23.918000 | 10.2.6.127:11080 | 0      |     0.99 |    0.5070392371044647 |
    34  	// | 2023-01-30 17:42:23.918000 | 10.2.6.127:11080 | 1      |     0.99 | 0.0026397727272727063 |
    35  	// | 2023-01-30 17:42:23.918000 | 10.2.6.127:11080 | 0      |    0.999 |    0.5116037936714348 |
    36  	// | 2023-01-30 17:42:23.918000 | 10.2.6.127:11080 | 1      |    0.999 |  0.013826666666666192 |
    37  	// +----------------------------+------------------+--------+----------+-----------------------+
    38  	queryConnIdleDurationStmt = `SELECT
    39  	a.time, a.instance, a.in_txn, a.quantile, a.value
    40  	FROM METRICS_SCHEMA.tidb_connection_idle_duration a
    41  	INNER JOIN (
    42  		SELECT instance, in_txn, quantile, MAX(time) time
    43  		FROM METRICS_SCHEMA.tidb_connection_idle_duration
    44  		WHERE quantile in (0.9,0.99,0.999)
    45  		GROUP BY instance, in_txn, quantile
    46  	) b ON a.instance = b.instance AND a.in_txn = b.in_txn AND
    47  	a.quantile = b.quantile AND a.time = b.time AND a.quantile in (0.9,0.99,0.999);`
    48  
    49  	// Query latest tidb connection count, sample output:
    50  	// +----------------------------+------------------+-------+
    51  	// | time                       | instance         | value |
    52  	// +----------------------------+------------------+-------+
    53  	// | 2023-01-10 16:44:39.123000 | 10.2.6.127:11080 |    24 |
    54  	// +----------------------------+------------------+-------+
    55  	queryConnCountStmt = `SELECT a.time, a.instance, a.value
    56  	FROM METRICS_SCHEMA.tidb_connection_count a
    57  	INNER JOIN (
    58  		SELECT instance, MAX(time) time FROM METRICS_SCHEMA.tidb_connection_count
    59  		GROUP BY instance
    60  	) b ON a.instance = b.instance AND a.time = b.time;`
    61  
    62  	// Query latest tidb query duration, sample output:
    63  	// +----------------------------+------------------+----------------+-----------------------+
    64  	// | time                       | instance         | sql_type       | value                 |
    65  	// +----------------------------+------------------+----------------+-----------------------+
    66  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Begin          | 0.0018886375591793793 |
    67  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Commit         |  0.014228768066070199 |
    68  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | CreateDatabase |                  NULL |
    69  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | CreateTable    |                  NULL |
    70  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Delete         |                  NULL |
    71  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Execute        |                  NULL |
    72  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Insert         | 0.0004933262664880737 |
    73  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Replace        |                  NULL |
    74  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Rollback       |                  NULL |
    75  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Select         |   0.06080000000000001 |
    76  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Set            | 0.0017023494860499266 |
    77  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Show           |                  NULL |
    78  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Update         |                  NULL |
    79  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | Use            |                  NULL |
    80  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | general        |                  NULL |
    81  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | internal       |  0.007085714285714287 |
    82  	// | 2023-01-10 16:47:08.283000 | 10.2.6.127:11080 | other          |                  NULL |
    83  	// +----------------------------+------------------+----------------+-----------------------+
    84  	queryQueryDurationStmt = `SELECT a.time, a.instance, a.sql_type, a.value
    85  	FROM METRICS_SCHEMA.tidb_query_duration a
    86  	INNER JOIN (
    87  		SELECT instance, sql_type, MAX(time) time FROM METRICS_SCHEMA.tidb_query_duration
    88  		GROUP BY instance, sql_type
    89  	) b ON a.instance = b.instance AND a.sql_type = b.sql_type AND a.time = b.time;`
    90  
    91  	// Query latest tidb transaction duration, sample output:
    92  	// +----------------------------+------------------+----------+---------------------+
    93  	// | time                       | instance         | type     | value               |
    94  	// +----------------------------+------------------+----------+---------------------+
    95  	// | 2023-01-10 16:50:38.153000 | 10.2.6.127:11080 | abort    |                NULL |
    96  	// | 2023-01-10 16:50:38.153000 | 10.2.6.127:11080 | commit   | 0.06155323076923076 |
    97  	// | 2023-01-10 16:50:38.153000 | 10.2.6.127:11080 | rollback |                NULL |
    98  	// +----------------------------+------------------+----------+---------------------+
    99  	queryTxnDurationStmt = `SELECT a.time, a.instance, a.type, a.value
   100  	FROM METRICS_SCHEMA.tidb_transaction_duration a
   101  	INNER JOIN (
   102  		SELECT instance, type, MAX(time) time FROM METRICS_SCHEMA.tidb_transaction_duration
   103  		GROUP BY instance, type
   104  	) b ON a.instance = b.instance AND a.type = b.type AND a.time = b.time;`
   105  )
   106  
   107  // TiDBObserver is a tidb performance observer. It's not thread-safe.
   108  type TiDBObserver struct {
   109  	db *sql.DB
   110  }
   111  
   112  // Tick implements Observer
   113  func (o *TiDBObserver) Tick(ctx context.Context) error {
   114  	m1 := make([]*tidbConnIdleDuration, 0)
   115  	if err := queryMetrics[tidbConnIdleDuration](
   116  		ctx, o.db, queryConnIdleDurationStmt, &m1); err != nil {
   117  		return err
   118  	}
   119  	for _, m := range m1 {
   120  		if !m.duration.Valid {
   121  			m.duration.Float64 = 0
   122  		}
   123  		inTxnLabel := strconv.Itoa(m.inTxn)
   124  		quantileLabel := strconv.FormatFloat(m.quantile, 'f', -1, 64)
   125  		tidbConnIdleDurationGauge.
   126  			WithLabelValues(m.instance, inTxnLabel, quantileLabel).
   127  			Set(m.duration.Float64)
   128  	}
   129  
   130  	m2 := make([]*tidbConnCount, 0)
   131  	if err := queryMetrics[tidbConnCount](
   132  		ctx, o.db, queryConnCountStmt, &m2); err != nil {
   133  		return err
   134  	}
   135  	for _, m := range m2 {
   136  		if !m.count.Valid {
   137  			m.count.Int32 = 0
   138  		}
   139  		tidbConnCountGauge.WithLabelValues(m.instance).Set(float64(m.count.Int32))
   140  	}
   141  
   142  	m3 := make([]*tidbQueryDuration, 0)
   143  	if err := queryMetrics[tidbQueryDuration](
   144  		ctx, o.db, queryQueryDurationStmt, &m3); err != nil {
   145  		return err
   146  	}
   147  	for _, m := range m3 {
   148  		if !m.duration.Valid {
   149  			m.duration.Float64 = 0
   150  		}
   151  		tidbQueryDurationGauge.WithLabelValues(m.instance, m.queryType).Set(m.duration.Float64)
   152  	}
   153  
   154  	m4 := make([]*tidbTxnDuration, 0)
   155  	if err := queryMetrics[tidbTxnDuration](
   156  		ctx, o.db, queryTxnDurationStmt, &m4); err != nil {
   157  		return err
   158  	}
   159  	for _, m := range m4 {
   160  		if !m.duration.Valid {
   161  			m.duration.Float64 = 0
   162  		}
   163  		tidbTxnDurationGauge.WithLabelValues(m.instance, m.opType).Set(m.duration.Float64)
   164  	}
   165  
   166  	return nil
   167  }
   168  
   169  // Close implements Observer
   170  func (o *TiDBObserver) Close() error {
   171  	return o.db.Close()
   172  }
   173  
   174  // NewTiDBObserver creates a new TiDBObserver instance
   175  func NewTiDBObserver(db *sql.DB) *TiDBObserver {
   176  	return &TiDBObserver{
   177  		db: db,
   178  	}
   179  }
   180  
   181  type tidbConnIdleDuration struct {
   182  	ts       string
   183  	instance string
   184  	inTxn    int
   185  	quantile float64
   186  	duration sql.NullFloat64
   187  }
   188  
   189  func (m *tidbConnIdleDuration) columns() []interface{} {
   190  	return []interface{}{&m.ts, &m.instance, &m.inTxn, &m.quantile, &m.duration}
   191  }
   192  
   193  type tidbConnCount struct {
   194  	ts       string
   195  	instance string
   196  	count    sql.NullInt32
   197  }
   198  
   199  func (m *tidbConnCount) columns() []interface{} {
   200  	return []interface{}{&m.ts, &m.instance, &m.count}
   201  }
   202  
   203  type tidbQueryDuration struct {
   204  	ts        string
   205  	instance  string
   206  	queryType string
   207  	duration  sql.NullFloat64
   208  }
   209  
   210  func (m *tidbQueryDuration) columns() []interface{} {
   211  	return []interface{}{&m.ts, &m.instance, &m.queryType, &m.duration}
   212  }
   213  
   214  type tidbTxnDuration struct {
   215  	ts       string
   216  	instance string
   217  	opType   string
   218  	duration sql.NullFloat64
   219  }
   220  
   221  func (m *tidbTxnDuration) columns() []interface{} {
   222  	return []interface{}{&m.ts, &m.instance, &m.opType, &m.duration}
   223  }
   224  
   225  type metricColumnImpl interface {
   226  	tidbConnIdleDuration | tidbConnCount | tidbQueryDuration | tidbTxnDuration
   227  }
   228  
   229  type metricColumnIface[T metricColumnImpl] interface {
   230  	*T
   231  	columns() []interface{}
   232  }
   233  
   234  func queryMetrics[T metricColumnImpl, F metricColumnIface[T]](
   235  	ctx context.Context, db *sql.DB, stmt string, metrics *[]F,
   236  ) error {
   237  	rows, err := db.QueryContext(ctx, stmt)
   238  	if err != nil {
   239  		return errors.WrapError(errors.ErrMySQLQueryError, err)
   240  	}
   241  	defer func() {
   242  		if err := rows.Close(); err != nil {
   243  			log.Warn("query metrics close rows failed", zap.Error(err))
   244  		}
   245  		if rows.Err() != nil {
   246  			log.Warn("query metrics rows has error", zap.Error(rows.Err()))
   247  		}
   248  	}()
   249  	for rows.Next() {
   250  		var m F = new(T)
   251  		if err := rows.Scan(m.columns()...); err != nil {
   252  			return errors.WrapError(errors.ErrMySQLQueryError, err)
   253  		}
   254  		*metrics = append(*metrics, m)
   255  	}
   256  	return nil
   257  }