github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/kv/metrics.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package kv
    15  
    16  import (
    17  	grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
    18  	"github.com/prometheus/client_golang/prometheus"
    19  )
    20  
    21  var (
    22  	grpcMetrics = grpc_prometheus.NewClientMetrics()
    23  
    24  	eventFeedErrorCounter = prometheus.NewCounterVec(
    25  		prometheus.CounterOpts{
    26  			Namespace: "ticdc",
    27  			Subsystem: "kvclient",
    28  			Name:      "event_feed_error_count",
    29  			Help:      "The number of error return by tikv",
    30  		}, []string{"type"})
    31  	eventFeedGauge = prometheus.NewGauge(
    32  		prometheus.GaugeOpts{
    33  			Namespace: "ticdc",
    34  			Subsystem: "kvclient",
    35  			Name:      "event_feed_count",
    36  			Help:      "The number of event feed running",
    37  		})
    38  	scanRegionsDuration = prometheus.NewHistogram(
    39  		prometheus.HistogramOpts{
    40  			Namespace: "ticdc",
    41  			Subsystem: "kvclient",
    42  			Name:      "scan_regions_duration_seconds",
    43  			Help:      "The time it took to finish a scanRegions call.",
    44  			Buckets:   prometheus.ExponentialBuckets(0.001 /* 1 ms */, 2, 18),
    45  		})
    46  	eventSize = prometheus.NewHistogramVec(
    47  		prometheus.HistogramOpts{
    48  			Namespace: "ticdc",
    49  			Subsystem: "kvclient",
    50  			Name:      "event_size_bytes",
    51  			Help:      "Size of KV events.",
    52  			Buckets:   prometheus.ExponentialBuckets(16, 2, 25),
    53  		}, []string{"type"})
    54  	pullEventCounter = prometheus.NewCounterVec(
    55  		prometheus.CounterOpts{
    56  			Namespace: "ticdc",
    57  			Subsystem: "kvclient",
    58  			Name:      "pull_event_count",
    59  			Help:      "event count received by this puller",
    60  		}, []string{"type", "namespace", "changefeed"})
    61  	sendEventCounter = prometheus.NewCounterVec(
    62  		prometheus.CounterOpts{
    63  			Namespace: "ticdc",
    64  			Subsystem: "kvclient",
    65  			Name:      "send_event_count",
    66  			Help:      "event count sent to event channel by this puller",
    67  		}, []string{"type", "namespace", "changefeed"})
    68  	clientChannelSize = prometheus.NewGaugeVec(
    69  		prometheus.GaugeOpts{
    70  			Namespace: "ticdc",
    71  			Subsystem: "kvclient",
    72  			Name:      "channel_size",
    73  			Help:      "size of each channel in kv client",
    74  		}, []string{"namespace", "changefeed", "table", "type"})
    75  	clientRegionTokenSize = prometheus.NewGaugeVec(
    76  		prometheus.GaugeOpts{
    77  			Namespace: "ticdc",
    78  			Subsystem: "kvclient",
    79  			Name:      "region_token",
    80  			Help:      "size of region token in kv client",
    81  		}, []string{"store", "namespace", "changefeed"})
    82  	cachedRegionSize = prometheus.NewGaugeVec(
    83  		prometheus.GaugeOpts{
    84  			Namespace: "ticdc",
    85  			Subsystem: "kvclient",
    86  			Name:      "cached_region",
    87  			Help:      "cached region that has not requested to TiKV in kv client",
    88  		}, []string{"store", "namespace", "changefeed"})
    89  	batchResolvedEventSize = prometheus.NewHistogramVec(
    90  		prometheus.HistogramOpts{
    91  			Namespace: "ticdc",
    92  			Subsystem: "kvclient",
    93  			Name:      "batch_resolved_event_size",
    94  			Help:      "The number of region in one batch resolved ts event",
    95  			Buckets:   prometheus.ExponentialBuckets(2, 2, 16),
    96  		}, []string{"namespace", "changefeed"})
    97  	grpcPoolStreamGauge = prometheus.NewGaugeVec(
    98  		prometheus.GaugeOpts{
    99  			Namespace: "ticdc",
   100  			Subsystem: "kvclient",
   101  			Name:      "grpc_stream_count",
   102  			Help:      "active stream count of each gRPC connection",
   103  		}, []string{"store"})
   104  
   105  	regionEventsBatchSize = prometheus.NewHistogram(
   106  		prometheus.HistogramOpts{
   107  			Namespace: "ticdc",
   108  			Subsystem: "kvclient",
   109  			Name:      "region_events_batch_size",
   110  			Help:      "region events batch size",
   111  			Buckets:   prometheus.ExponentialBuckets(1, 2, 20),
   112  		})
   113  
   114  	regionConnectDuration = prometheus.NewHistogramVec(
   115  		prometheus.HistogramOpts{
   116  			Namespace: "ticdc",
   117  			Subsystem: "kvclient",
   118  			Name:      "region_connect_duration",
   119  			Help:      "time of locating a region in ms",
   120  			Buckets:   prometheus.ExponentialBuckets(1, 2, 20),
   121  		},
   122  		// actions: lock, locate, connect.
   123  		[]string{"namespace", "changefeed", "action"})
   124  
   125  	lockResolveDuration = prometheus.NewHistogramVec(
   126  		prometheus.HistogramOpts{
   127  			Namespace: "ticdc",
   128  			Subsystem: "kvclient",
   129  			Name:      "lock_resolve_duration",
   130  			Help:      "time of lock resolve in ms",
   131  			Buckets:   prometheus.ExponentialBuckets(1, 2, 20),
   132  		},
   133  		// actions: wait, run.
   134  		[]string{"namespace", "changefeed", "action"})
   135  
   136  	regionWorkerQueueDuration = prometheus.NewHistogramVec(
   137  		prometheus.HistogramOpts{
   138  			Namespace: "ticdc",
   139  			Subsystem: "kvclient",
   140  			Name:      "region_worker_queue_duration",
   141  			Help:      "time of queue in region worker",
   142  			Buckets:   prometheus.ExponentialBuckets(1, 2, 20),
   143  		},
   144  		// actions: wait, run.
   145  		[]string{"namespace", "changefeed"})
   146  
   147  	workerBusyRatio = prometheus.NewGaugeVec(
   148  		prometheus.GaugeOpts{
   149  			Namespace: "ticdc",
   150  			Subsystem: "kvclient",
   151  			Name:      "region_worker_busy_ratio",
   152  			Help:      "Busy ratio (X ms in 1s) for region worker.",
   153  		}, []string{"namespace", "changefeed", "table", "store", "type"})
   154  	workerChannelSize = prometheus.NewGaugeVec(
   155  		prometheus.GaugeOpts{
   156  			Namespace: "ticdc",
   157  			Subsystem: "kvclient",
   158  			Name:      "region_worker_channel_size",
   159  			Help:      "size of each channel in region worker",
   160  		}, []string{"namespace", "changefeed", "table", "store", "type"})
   161  	slowInitializeRegion = prometheus.NewGaugeVec(
   162  		prometheus.GaugeOpts{
   163  			Namespace: "ticdc",
   164  			Subsystem: "kvclient",
   165  			Name:      "slow_initialize_region_count",
   166  			Help:      "the number of slow initialize region",
   167  		}, []string{"namespace", "changefeed"})
   168  )
   169  
   170  // GetGlobalGrpcMetrics gets the global grpc metrics.
   171  func GetGlobalGrpcMetrics() *grpc_prometheus.ClientMetrics {
   172  	return grpcMetrics
   173  }
   174  
   175  // InitMetrics registers all metrics in the kv package
   176  func InitMetrics(registry *prometheus.Registry) {
   177  	registry.MustRegister(eventFeedErrorCounter)
   178  	registry.MustRegister(scanRegionsDuration)
   179  	registry.MustRegister(eventSize)
   180  	registry.MustRegister(eventFeedGauge)
   181  	registry.MustRegister(pullEventCounter)
   182  	registry.MustRegister(sendEventCounter)
   183  	registry.MustRegister(clientChannelSize)
   184  	registry.MustRegister(clientRegionTokenSize)
   185  	registry.MustRegister(cachedRegionSize)
   186  	registry.MustRegister(batchResolvedEventSize)
   187  	registry.MustRegister(grpcPoolStreamGauge)
   188  	registry.MustRegister(regionEventsBatchSize)
   189  	registry.MustRegister(regionConnectDuration)
   190  	registry.MustRegister(lockResolveDuration)
   191  	registry.MustRegister(regionWorkerQueueDuration)
   192  	registry.MustRegister(workerBusyRatio)
   193  	registry.MustRegister(workerChannelSize)
   194  	registry.MustRegister(slowInitializeRegion)
   195  
   196  	// Register client metrics to registry.
   197  	registry.MustRegister(grpcMetrics)
   198  }