github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/kv/metrics.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package kv 15 16 import ( 17 grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" 18 "github.com/prometheus/client_golang/prometheus" 19 ) 20 21 var ( 22 grpcMetrics = grpc_prometheus.NewClientMetrics() 23 24 eventFeedErrorCounter = prometheus.NewCounterVec( 25 prometheus.CounterOpts{ 26 Namespace: "ticdc", 27 Subsystem: "kvclient", 28 Name: "event_feed_error_count", 29 Help: "The number of error return by tikv", 30 }, []string{"type"}) 31 eventFeedGauge = prometheus.NewGauge( 32 prometheus.GaugeOpts{ 33 Namespace: "ticdc", 34 Subsystem: "kvclient", 35 Name: "event_feed_count", 36 Help: "The number of event feed running", 37 }) 38 scanRegionsDuration = prometheus.NewHistogram( 39 prometheus.HistogramOpts{ 40 Namespace: "ticdc", 41 Subsystem: "kvclient", 42 Name: "scan_regions_duration_seconds", 43 Help: "The time it took to finish a scanRegions call.", 44 Buckets: prometheus.ExponentialBuckets(0.001 /* 1 ms */, 2, 18), 45 }) 46 eventSize = prometheus.NewHistogramVec( 47 prometheus.HistogramOpts{ 48 Namespace: "ticdc", 49 Subsystem: "kvclient", 50 Name: "event_size_bytes", 51 Help: "Size of KV events.", 52 Buckets: prometheus.ExponentialBuckets(16, 2, 25), 53 }, []string{"type"}) 54 pullEventCounter = prometheus.NewCounterVec( 55 prometheus.CounterOpts{ 56 Namespace: "ticdc", 57 Subsystem: "kvclient", 58 Name: "pull_event_count", 59 Help: "event count received by this puller", 60 }, []string{"type", "namespace", "changefeed"}) 61 sendEventCounter = prometheus.NewCounterVec( 62 prometheus.CounterOpts{ 63 Namespace: "ticdc", 64 Subsystem: "kvclient", 65 Name: "send_event_count", 66 Help: "event count sent to event channel by this puller", 67 }, []string{"type", "namespace", "changefeed"}) 68 clientChannelSize = prometheus.NewGaugeVec( 69 prometheus.GaugeOpts{ 70 Namespace: "ticdc", 71 Subsystem: "kvclient", 72 Name: "channel_size", 73 Help: "size of each channel in kv client", 74 }, []string{"namespace", "changefeed", "table", "type"}) 75 clientRegionTokenSize = prometheus.NewGaugeVec( 76 prometheus.GaugeOpts{ 77 Namespace: "ticdc", 78 Subsystem: "kvclient", 79 Name: "region_token", 80 Help: "size of region token in kv client", 81 }, []string{"store", "namespace", "changefeed"}) 82 cachedRegionSize = prometheus.NewGaugeVec( 83 prometheus.GaugeOpts{ 84 Namespace: "ticdc", 85 Subsystem: "kvclient", 86 Name: "cached_region", 87 Help: "cached region that has not requested to TiKV in kv client", 88 }, []string{"store", "namespace", "changefeed"}) 89 batchResolvedEventSize = prometheus.NewHistogramVec( 90 prometheus.HistogramOpts{ 91 Namespace: "ticdc", 92 Subsystem: "kvclient", 93 Name: "batch_resolved_event_size", 94 Help: "The number of region in one batch resolved ts event", 95 Buckets: prometheus.ExponentialBuckets(2, 2, 16), 96 }, []string{"namespace", "changefeed"}) 97 grpcPoolStreamGauge = prometheus.NewGaugeVec( 98 prometheus.GaugeOpts{ 99 Namespace: "ticdc", 100 Subsystem: "kvclient", 101 Name: "grpc_stream_count", 102 Help: "active stream count of each gRPC connection", 103 }, []string{"store"}) 104 105 regionEventsBatchSize = prometheus.NewHistogram( 106 prometheus.HistogramOpts{ 107 Namespace: "ticdc", 108 Subsystem: "kvclient", 109 Name: "region_events_batch_size", 110 Help: "region events batch size", 111 Buckets: prometheus.ExponentialBuckets(1, 2, 20), 112 }) 113 114 regionConnectDuration = prometheus.NewHistogramVec( 115 prometheus.HistogramOpts{ 116 Namespace: "ticdc", 117 Subsystem: "kvclient", 118 Name: "region_connect_duration", 119 Help: "time of locating a region in ms", 120 Buckets: prometheus.ExponentialBuckets(1, 2, 20), 121 }, 122 // actions: lock, locate, connect. 123 []string{"namespace", "changefeed", "action"}) 124 125 lockResolveDuration = prometheus.NewHistogramVec( 126 prometheus.HistogramOpts{ 127 Namespace: "ticdc", 128 Subsystem: "kvclient", 129 Name: "lock_resolve_duration", 130 Help: "time of lock resolve in ms", 131 Buckets: prometheus.ExponentialBuckets(1, 2, 20), 132 }, 133 // actions: wait, run. 134 []string{"namespace", "changefeed", "action"}) 135 136 regionWorkerQueueDuration = prometheus.NewHistogramVec( 137 prometheus.HistogramOpts{ 138 Namespace: "ticdc", 139 Subsystem: "kvclient", 140 Name: "region_worker_queue_duration", 141 Help: "time of queue in region worker", 142 Buckets: prometheus.ExponentialBuckets(1, 2, 20), 143 }, 144 // actions: wait, run. 145 []string{"namespace", "changefeed"}) 146 147 workerBusyRatio = prometheus.NewGaugeVec( 148 prometheus.GaugeOpts{ 149 Namespace: "ticdc", 150 Subsystem: "kvclient", 151 Name: "region_worker_busy_ratio", 152 Help: "Busy ratio (X ms in 1s) for region worker.", 153 }, []string{"namespace", "changefeed", "table", "store", "type"}) 154 workerChannelSize = prometheus.NewGaugeVec( 155 prometheus.GaugeOpts{ 156 Namespace: "ticdc", 157 Subsystem: "kvclient", 158 Name: "region_worker_channel_size", 159 Help: "size of each channel in region worker", 160 }, []string{"namespace", "changefeed", "table", "store", "type"}) 161 slowInitializeRegion = prometheus.NewGaugeVec( 162 prometheus.GaugeOpts{ 163 Namespace: "ticdc", 164 Subsystem: "kvclient", 165 Name: "slow_initialize_region_count", 166 Help: "the number of slow initialize region", 167 }, []string{"namespace", "changefeed"}) 168 ) 169 170 // GetGlobalGrpcMetrics gets the global grpc metrics. 171 func GetGlobalGrpcMetrics() *grpc_prometheus.ClientMetrics { 172 return grpcMetrics 173 } 174 175 // InitMetrics registers all metrics in the kv package 176 func InitMetrics(registry *prometheus.Registry) { 177 registry.MustRegister(eventFeedErrorCounter) 178 registry.MustRegister(scanRegionsDuration) 179 registry.MustRegister(eventSize) 180 registry.MustRegister(eventFeedGauge) 181 registry.MustRegister(pullEventCounter) 182 registry.MustRegister(sendEventCounter) 183 registry.MustRegister(clientChannelSize) 184 registry.MustRegister(clientRegionTokenSize) 185 registry.MustRegister(cachedRegionSize) 186 registry.MustRegister(batchResolvedEventSize) 187 registry.MustRegister(grpcPoolStreamGauge) 188 registry.MustRegister(regionEventsBatchSize) 189 registry.MustRegister(regionConnectDuration) 190 registry.MustRegister(lockResolveDuration) 191 registry.MustRegister(regionWorkerQueueDuration) 192 registry.MustRegister(workerBusyRatio) 193 registry.MustRegister(workerChannelSize) 194 registry.MustRegister(slowInitializeRegion) 195 196 // Register client metrics to registry. 197 registry.MustRegister(grpcMetrics) 198 }