github.com/KinWaiYuen/client-go/v2@v2.5.4/metrics/metrics.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/metrics/metrics.go 19 // 20 21 // Copyright 2021 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package metrics 36 37 import ( 38 "github.com/prometheus/client_golang/prometheus" 39 dto "github.com/prometheus/client_model/go" 40 ) 41 42 // Client metrics. 43 var ( 44 TiKVTxnCmdHistogram *prometheus.HistogramVec 45 TiKVBackoffHistogram *prometheus.HistogramVec 46 TiKVSendReqHistogram *prometheus.HistogramVec 47 TiKVCoprocessorHistogram *prometheus.HistogramVec 48 TiKVLockResolverCounter *prometheus.CounterVec 49 TiKVRegionErrorCounter *prometheus.CounterVec 50 TiKVTxnWriteKVCountHistogram prometheus.Histogram 51 TiKVTxnWriteSizeHistogram prometheus.Histogram 52 TiKVRawkvCmdHistogram *prometheus.HistogramVec 53 TiKVRawkvSizeHistogram *prometheus.HistogramVec 54 TiKVTxnRegionsNumHistogram *prometheus.HistogramVec 55 TiKVLoadSafepointCounter *prometheus.CounterVec 56 TiKVSecondaryLockCleanupFailureCounter *prometheus.CounterVec 57 TiKVRegionCacheCounter *prometheus.CounterVec 58 TiKVLocalLatchWaitTimeHistogram prometheus.Histogram 59 TiKVStatusDuration *prometheus.HistogramVec 60 TiKVStatusCounter *prometheus.CounterVec 61 TiKVBatchWaitDuration prometheus.Histogram 62 TiKVBatchSendLatency prometheus.Histogram 63 TiKVBatchWaitOverLoad prometheus.Counter 64 TiKVBatchPendingRequests *prometheus.HistogramVec 65 TiKVBatchRequests *prometheus.HistogramVec 66 TiKVBatchClientUnavailable prometheus.Histogram 67 TiKVBatchClientWaitEstablish prometheus.Histogram 68 TiKVBatchClientRecycle prometheus.Histogram 69 TiKVBatchRecvLatency *prometheus.HistogramVec 70 TiKVRangeTaskStats *prometheus.GaugeVec 71 TiKVRangeTaskPushDuration *prometheus.HistogramVec 72 TiKVTokenWaitDuration prometheus.Histogram 73 TiKVTxnHeartBeatHistogram *prometheus.HistogramVec 74 TiKVPessimisticLockKeysDuration prometheus.Histogram 75 TiKVTTLLifeTimeReachCounter prometheus.Counter 76 TiKVNoAvailableConnectionCounter prometheus.Counter 77 TiKVTwoPCTxnCounter *prometheus.CounterVec 78 TiKVAsyncCommitTxnCounter *prometheus.CounterVec 79 TiKVOnePCTxnCounter *prometheus.CounterVec 80 TiKVStoreLimitErrorCounter *prometheus.CounterVec 81 TiKVGRPCConnTransientFailureCounter *prometheus.CounterVec 82 TiKVPanicCounter *prometheus.CounterVec 83 TiKVForwardRequestCounter *prometheus.CounterVec 84 TiKVTSFutureWaitDuration prometheus.Histogram 85 TiKVSafeTSUpdateCounter *prometheus.CounterVec 86 TiKVMinSafeTSGapSeconds *prometheus.GaugeVec 87 TiKVReplicaSelectorFailureCounter *prometheus.CounterVec 88 TiKVRequestRetryTimesHistogram prometheus.Histogram 89 TiKVTxnCommitBackoffSeconds prometheus.Histogram 90 TiKVTxnCommitBackoffCount prometheus.Histogram 91 TiKVSmallReadDuration prometheus.Histogram 92 TiKVUnsafeDestroyRangeFailuresCounterVec *prometheus.CounterVec 93 ) 94 95 // Label constants. 96 const ( 97 LblType = "type" 98 LblResult = "result" 99 LblStore = "store" 100 LblCommit = "commit" 101 LblAbort = "abort" 102 LblRollback = "rollback" 103 LblBatchGet = "batch_get" 104 LblGet = "get" 105 LblLockKeys = "lock_keys" 106 LabelBatchRecvLoop = "batch-recv-loop" 107 LabelBatchSendLoop = "batch-send-loop" 108 LblAddress = "address" 109 LblFromStore = "from_store" 110 LblToStore = "to_store" 111 LblStaleRead = "stale_read" 112 ) 113 114 func initMetrics(namespace, subsystem string) { 115 TiKVTxnCmdHistogram = prometheus.NewHistogramVec( 116 prometheus.HistogramOpts{ 117 Namespace: namespace, 118 Subsystem: subsystem, 119 Name: "txn_cmd_duration_seconds", 120 Help: "Bucketed histogram of processing time of txn cmds.", 121 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 29), // 0.5ms ~ 1.5days 122 }, []string{LblType}) 123 124 TiKVBackoffHistogram = prometheus.NewHistogramVec( 125 prometheus.HistogramOpts{ 126 Namespace: namespace, 127 Subsystem: subsystem, 128 Name: "backoff_seconds", 129 Help: "total backoff seconds of a single backoffer.", 130 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 29), // 0.5ms ~ 1.5days 131 }, []string{LblType}) 132 133 TiKVSendReqHistogram = prometheus.NewHistogramVec( 134 prometheus.HistogramOpts{ 135 Namespace: namespace, 136 Subsystem: subsystem, 137 Name: "request_seconds", 138 Help: "Bucketed histogram of sending request duration.", 139 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 29), // 0.5ms ~ 1.5days 140 }, []string{LblType, LblStore, LblStaleRead}) 141 142 TiKVCoprocessorHistogram = prometheus.NewHistogramVec( 143 prometheus.HistogramOpts{ 144 Namespace: namespace, 145 Subsystem: subsystem, 146 Name: "cop_duration_seconds", 147 Help: "Run duration of a single coprocessor task, includes backoff time.", 148 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 29), // 0.5ms ~ 1.5days 149 }, []string{LblStore, LblStaleRead}) 150 151 TiKVLockResolverCounter = prometheus.NewCounterVec( 152 prometheus.CounterOpts{ 153 Namespace: namespace, 154 Subsystem: subsystem, 155 Name: "lock_resolver_actions_total", 156 Help: "Counter of lock resolver actions.", 157 }, []string{LblType}) 158 159 TiKVRegionErrorCounter = prometheus.NewCounterVec( 160 prometheus.CounterOpts{ 161 Namespace: namespace, 162 Subsystem: subsystem, 163 Name: "region_err_total", 164 Help: "Counter of region errors.", 165 }, []string{LblType}) 166 167 TiKVTxnWriteKVCountHistogram = prometheus.NewHistogram( 168 prometheus.HistogramOpts{ 169 Namespace: namespace, 170 Subsystem: subsystem, 171 Name: "txn_write_kv_num", 172 Help: "Count of kv pairs to write in a transaction.", 173 Buckets: prometheus.ExponentialBuckets(1, 4, 17), // 1 ~ 4G 174 }) 175 176 TiKVTxnWriteSizeHistogram = prometheus.NewHistogram( 177 prometheus.HistogramOpts{ 178 Namespace: namespace, 179 Subsystem: subsystem, 180 Name: "txn_write_size_bytes", 181 Help: "Size of kv pairs to write in a transaction.", 182 Buckets: prometheus.ExponentialBuckets(16, 4, 17), // 16Bytes ~ 64GB 183 }) 184 185 TiKVRawkvCmdHistogram = prometheus.NewHistogramVec( 186 prometheus.HistogramOpts{ 187 Namespace: namespace, 188 Subsystem: subsystem, 189 Name: "rawkv_cmd_seconds", 190 Help: "Bucketed histogram of processing time of rawkv cmds.", 191 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 29), // 0.5ms ~ 1.5days 192 }, []string{LblType}) 193 194 TiKVRawkvSizeHistogram = prometheus.NewHistogramVec( 195 prometheus.HistogramOpts{ 196 Namespace: namespace, 197 Subsystem: subsystem, 198 Name: "rawkv_kv_size_bytes", 199 Help: "Size of key/value to put, in bytes.", 200 Buckets: prometheus.ExponentialBuckets(1, 2, 30), // 1Byte ~ 512MB 201 }, []string{LblType}) 202 203 TiKVTxnRegionsNumHistogram = prometheus.NewHistogramVec( 204 prometheus.HistogramOpts{ 205 Namespace: namespace, 206 Subsystem: subsystem, 207 Name: "txn_regions_num", 208 Help: "Number of regions in a transaction.", 209 Buckets: prometheus.ExponentialBuckets(1, 2, 25), // 1 ~ 16M 210 }, []string{LblType}) 211 212 TiKVLoadSafepointCounter = prometheus.NewCounterVec( 213 prometheus.CounterOpts{ 214 Namespace: namespace, 215 Subsystem: subsystem, 216 Name: "load_safepoint_total", 217 Help: "Counter of load safepoint.", 218 }, []string{LblType}) 219 220 TiKVSecondaryLockCleanupFailureCounter = prometheus.NewCounterVec( 221 prometheus.CounterOpts{ 222 Namespace: namespace, 223 Subsystem: subsystem, 224 Name: "lock_cleanup_task_total", 225 Help: "failure statistic of secondary lock cleanup task.", 226 }, []string{LblType}) 227 228 TiKVRegionCacheCounter = prometheus.NewCounterVec( 229 prometheus.CounterOpts{ 230 Namespace: namespace, 231 Subsystem: subsystem, 232 Name: "region_cache_operations_total", 233 Help: "Counter of region cache.", 234 }, []string{LblType, LblResult}) 235 236 TiKVLocalLatchWaitTimeHistogram = prometheus.NewHistogram( 237 prometheus.HistogramOpts{ 238 Namespace: namespace, 239 Subsystem: subsystem, 240 Name: "local_latch_wait_seconds", 241 Help: "Wait time of a get local latch.", 242 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 20), // 0.5ms ~ 262s 243 }) 244 245 TiKVStatusDuration = prometheus.NewHistogramVec( 246 prometheus.HistogramOpts{ 247 Namespace: namespace, 248 Subsystem: subsystem, 249 Name: "kv_status_api_duration", 250 Help: "duration for kv status api.", 251 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 20), // 0.5ms ~ 262s 252 }, []string{"store"}) 253 254 TiKVStatusCounter = prometheus.NewCounterVec( 255 prometheus.CounterOpts{ 256 Namespace: namespace, 257 Subsystem: subsystem, 258 Name: "kv_status_api_count", 259 Help: "Counter of access kv status api.", 260 }, []string{LblResult}) 261 262 TiKVBatchWaitDuration = prometheus.NewHistogram( 263 prometheus.HistogramOpts{ 264 Namespace: namespace, 265 Subsystem: subsystem, 266 Name: "batch_wait_duration", 267 Buckets: prometheus.ExponentialBuckets(1, 2, 34), // 1ns ~ 8s 268 Help: "batch wait duration", 269 }) 270 271 TiKVBatchSendLatency = prometheus.NewHistogram( 272 prometheus.HistogramOpts{ 273 Namespace: namespace, 274 Subsystem: subsystem, 275 Name: "batch_send_latency", 276 Buckets: prometheus.ExponentialBuckets(1, 2, 34), // 1ns ~ 8s 277 Help: "batch send latency", 278 }) 279 280 TiKVBatchRecvLatency = prometheus.NewHistogramVec( 281 prometheus.HistogramOpts{ 282 Namespace: namespace, 283 Subsystem: subsystem, 284 Name: "batch_recv_latency", 285 Buckets: prometheus.ExponentialBuckets(1000, 2, 34), // 1us ~ 8000s 286 Help: "batch recv latency", 287 }, []string{LblResult}) 288 289 TiKVBatchWaitOverLoad = prometheus.NewCounter( 290 prometheus.CounterOpts{ 291 Namespace: namespace, 292 Subsystem: subsystem, 293 Name: "batch_wait_overload", 294 Help: "event of tikv transport layer overload", 295 }) 296 297 TiKVBatchPendingRequests = prometheus.NewHistogramVec( 298 prometheus.HistogramOpts{ 299 Namespace: namespace, 300 Subsystem: subsystem, 301 Name: "batch_pending_requests", 302 Buckets: prometheus.ExponentialBuckets(1, 2, 8), 303 Help: "number of requests pending in the batch channel", 304 }, []string{"store"}) 305 306 TiKVBatchRequests = prometheus.NewHistogramVec( 307 prometheus.HistogramOpts{ 308 Namespace: namespace, 309 Subsystem: subsystem, 310 Name: "batch_requests", 311 Buckets: prometheus.ExponentialBuckets(1, 2, 8), 312 Help: "number of requests in one batch", 313 }, []string{"store"}) 314 315 TiKVBatchClientUnavailable = prometheus.NewHistogram( 316 prometheus.HistogramOpts{ 317 Namespace: namespace, 318 Subsystem: subsystem, 319 Name: "batch_client_unavailable_seconds", 320 Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days 321 Help: "batch client unavailable", 322 }) 323 324 TiKVBatchClientWaitEstablish = prometheus.NewHistogram( 325 prometheus.HistogramOpts{ 326 Namespace: namespace, 327 Subsystem: subsystem, 328 Name: "batch_client_wait_connection_establish", 329 Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days 330 Help: "batch client wait new connection establish", 331 }) 332 333 TiKVBatchClientRecycle = prometheus.NewHistogram( 334 prometheus.HistogramOpts{ 335 Namespace: namespace, 336 Subsystem: subsystem, 337 Name: "batch_client_reset", 338 Buckets: prometheus.ExponentialBuckets(0.001, 2, 28), // 1ms ~ 1.5days 339 Help: "batch client recycle connection and reconnect duration", 340 }) 341 342 TiKVRangeTaskStats = prometheus.NewGaugeVec( 343 prometheus.GaugeOpts{ 344 Namespace: namespace, 345 Subsystem: subsystem, 346 Name: "range_task_stats", 347 Help: "stat of range tasks", 348 }, []string{LblType, LblResult}) 349 350 TiKVRangeTaskPushDuration = prometheus.NewHistogramVec( 351 prometheus.HistogramOpts{ 352 Namespace: namespace, 353 Subsystem: subsystem, 354 Name: "range_task_push_duration", 355 Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), // 1ms ~ 524s 356 Help: "duration to push sub tasks to range task workers", 357 }, []string{LblType}) 358 359 TiKVTokenWaitDuration = prometheus.NewHistogram( 360 prometheus.HistogramOpts{ 361 Namespace: namespace, 362 Subsystem: subsystem, 363 Name: "batch_executor_token_wait_duration", 364 Buckets: prometheus.ExponentialBuckets(1, 2, 34), // 1ns ~ 8s 365 Help: "tidb txn token wait duration to process batches", 366 }) 367 368 TiKVTxnHeartBeatHistogram = prometheus.NewHistogramVec( 369 prometheus.HistogramOpts{ 370 Namespace: namespace, 371 Subsystem: subsystem, 372 Name: "txn_heart_beat", 373 Help: "Bucketed histogram of the txn_heartbeat request duration.", 374 Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), // 1ms ~ 524s 375 }, []string{LblType}) 376 377 TiKVPessimisticLockKeysDuration = prometheus.NewHistogram( 378 prometheus.HistogramOpts{ 379 Namespace: namespace, 380 Subsystem: subsystem, 381 Name: "pessimistic_lock_keys_duration", 382 Buckets: prometheus.ExponentialBuckets(0.001, 2, 24), // 1ms ~ 8389s 383 Help: "tidb txn pessimistic lock keys duration", 384 }) 385 386 TiKVTTLLifeTimeReachCounter = prometheus.NewCounter( 387 prometheus.CounterOpts{ 388 Namespace: namespace, 389 Subsystem: subsystem, 390 Name: "ttl_lifetime_reach_total", 391 Help: "Counter of ttlManager live too long.", 392 }) 393 394 TiKVNoAvailableConnectionCounter = prometheus.NewCounter( 395 prometheus.CounterOpts{ 396 Namespace: namespace, 397 Subsystem: subsystem, 398 Name: "batch_client_no_available_connection_total", 399 Help: "Counter of no available batch client.", 400 }) 401 402 TiKVTwoPCTxnCounter = prometheus.NewCounterVec( 403 prometheus.CounterOpts{ 404 Namespace: namespace, 405 Subsystem: subsystem, 406 Name: "commit_txn_counter", 407 Help: "Counter of 2PC transactions.", 408 }, []string{LblType}) 409 410 TiKVAsyncCommitTxnCounter = prometheus.NewCounterVec( 411 prometheus.CounterOpts{ 412 Namespace: namespace, 413 Subsystem: subsystem, 414 Name: "async_commit_txn_counter", 415 Help: "Counter of async commit transactions.", 416 }, []string{LblType}) 417 418 TiKVOnePCTxnCounter = prometheus.NewCounterVec( 419 prometheus.CounterOpts{ 420 Namespace: namespace, 421 Subsystem: subsystem, 422 Name: "one_pc_txn_counter", 423 Help: "Counter of 1PC transactions.", 424 }, []string{LblType}) 425 426 TiKVStoreLimitErrorCounter = prometheus.NewCounterVec( 427 prometheus.CounterOpts{ 428 Namespace: namespace, 429 Subsystem: subsystem, 430 Name: "get_store_limit_token_error", 431 Help: "store token is up to the limit, probably because one of the stores is the hotspot or unavailable", 432 }, []string{LblAddress, LblStore}) 433 434 TiKVGRPCConnTransientFailureCounter = prometheus.NewCounterVec( 435 prometheus.CounterOpts{ 436 Namespace: namespace, 437 Subsystem: subsystem, 438 Name: "connection_transient_failure_count", 439 Help: "Counter of gRPC connection transient failure", 440 }, []string{LblAddress, LblStore}) 441 442 TiKVPanicCounter = prometheus.NewCounterVec( 443 prometheus.CounterOpts{ 444 Namespace: namespace, 445 Subsystem: subsystem, 446 Name: "panic_total", 447 Help: "Counter of panic.", 448 }, []string{LblType}) 449 450 TiKVForwardRequestCounter = prometheus.NewCounterVec( 451 prometheus.CounterOpts{ 452 Namespace: namespace, 453 Subsystem: subsystem, 454 Name: "forward_request_counter", 455 Help: "Counter of tikv request being forwarded through another node", 456 }, []string{LblFromStore, LblToStore, LblType, LblResult}) 457 458 TiKVTSFutureWaitDuration = prometheus.NewHistogram( 459 prometheus.HistogramOpts{ 460 Namespace: namespace, 461 Subsystem: subsystem, 462 Name: "ts_future_wait_seconds", 463 Help: "Bucketed histogram of seconds cost for waiting timestamp future.", 464 Buckets: prometheus.ExponentialBuckets(0.000005, 2, 30), // 5us ~ 2560s 465 }) 466 467 TiKVSafeTSUpdateCounter = prometheus.NewCounterVec( 468 prometheus.CounterOpts{ 469 Namespace: namespace, 470 Subsystem: subsystem, 471 Name: "safets_update_counter", 472 Help: "Counter of tikv safe_ts being updated.", 473 }, []string{LblResult, LblStore}) 474 475 TiKVMinSafeTSGapSeconds = prometheus.NewGaugeVec( 476 prometheus.GaugeOpts{ 477 Namespace: namespace, 478 Subsystem: subsystem, 479 Name: "min_safets_gap_seconds", 480 Help: "The minimal (non-zero) SafeTS gap for each store.", 481 }, []string{LblStore}) 482 483 TiKVReplicaSelectorFailureCounter = prometheus.NewCounterVec( 484 prometheus.CounterOpts{ 485 Namespace: namespace, 486 Subsystem: subsystem, 487 Name: "replica_selector_failure_counter", 488 Help: "Counter of the reason why the replica selector cannot yield a potential leader.", 489 }, []string{LblType}) 490 491 TiKVRequestRetryTimesHistogram = prometheus.NewHistogram( 492 prometheus.HistogramOpts{ 493 Namespace: namespace, 494 Subsystem: subsystem, 495 Name: "request_retry_times", 496 Help: "Bucketed histogram of how many times a region request retries.", 497 Buckets: []float64{1, 2, 3, 4, 8, 16, 32, 64, 128, 256}, 498 }) 499 TiKVTxnCommitBackoffSeconds = prometheus.NewHistogram( 500 prometheus.HistogramOpts{ 501 Namespace: namespace, 502 Subsystem: subsystem, 503 Name: "txn_commit_backoff_seconds", 504 Help: "Bucketed histogram of the total backoff duration in committing a transaction.", 505 Buckets: prometheus.ExponentialBuckets(0.001, 2, 22), // 1ms ~ 2097s 506 }) 507 TiKVTxnCommitBackoffCount = prometheus.NewHistogram( 508 prometheus.HistogramOpts{ 509 Namespace: namespace, 510 Subsystem: subsystem, 511 Name: "txn_commit_backoff_count", 512 Help: "Bucketed histogram of the backoff count in committing a transaction.", 513 Buckets: prometheus.ExponentialBuckets(1, 2, 12), // 1 ~ 2048 514 }) 515 516 // TiKVSmallReadDuration uses to collect small request read duration. 517 TiKVSmallReadDuration = prometheus.NewHistogram( 518 prometheus.HistogramOpts{ 519 Namespace: namespace, 520 Subsystem: "sli", // Always use "sli" to make it compatible with TiDB. 521 Name: "tikv_small_read_duration", 522 Help: "Read time of TiKV small read.", 523 Buckets: prometheus.ExponentialBuckets(0.0005, 2, 28), // 0.5ms ~ 74h 524 }) 525 526 TiKVUnsafeDestroyRangeFailuresCounterVec = prometheus.NewCounterVec( 527 prometheus.CounterOpts{ 528 Namespace: namespace, 529 Subsystem: subsystem, 530 Name: "gc_unsafe_destroy_range_failures", 531 Help: "Counter of unsafe destroyrange failures", 532 }, []string{LblType}) 533 534 initShortcuts() 535 } 536 537 func init() { 538 initMetrics("tikv", "client_go") 539 } 540 541 // InitMetrics initializes metrics variables with given namespace and subsystem name. 542 func InitMetrics(namespace, subsystem string) { 543 initMetrics(namespace, subsystem) 544 } 545 546 // RegisterMetrics registers all metrics variables. 547 // Note: to change default namespace and subsystem name, call `InitMetrics` before registering. 548 func RegisterMetrics() { 549 prometheus.MustRegister(TiKVTxnCmdHistogram) 550 prometheus.MustRegister(TiKVBackoffHistogram) 551 prometheus.MustRegister(TiKVSendReqHistogram) 552 prometheus.MustRegister(TiKVCoprocessorHistogram) 553 prometheus.MustRegister(TiKVLockResolverCounter) 554 prometheus.MustRegister(TiKVRegionErrorCounter) 555 prometheus.MustRegister(TiKVTxnWriteKVCountHistogram) 556 prometheus.MustRegister(TiKVTxnWriteSizeHistogram) 557 prometheus.MustRegister(TiKVRawkvCmdHistogram) 558 prometheus.MustRegister(TiKVRawkvSizeHistogram) 559 prometheus.MustRegister(TiKVTxnRegionsNumHistogram) 560 prometheus.MustRegister(TiKVLoadSafepointCounter) 561 prometheus.MustRegister(TiKVSecondaryLockCleanupFailureCounter) 562 prometheus.MustRegister(TiKVRegionCacheCounter) 563 prometheus.MustRegister(TiKVLocalLatchWaitTimeHistogram) 564 prometheus.MustRegister(TiKVStatusDuration) 565 prometheus.MustRegister(TiKVStatusCounter) 566 prometheus.MustRegister(TiKVBatchWaitDuration) 567 prometheus.MustRegister(TiKVBatchSendLatency) 568 prometheus.MustRegister(TiKVBatchRecvLatency) 569 prometheus.MustRegister(TiKVBatchWaitOverLoad) 570 prometheus.MustRegister(TiKVBatchPendingRequests) 571 prometheus.MustRegister(TiKVBatchRequests) 572 prometheus.MustRegister(TiKVBatchClientUnavailable) 573 prometheus.MustRegister(TiKVBatchClientWaitEstablish) 574 prometheus.MustRegister(TiKVBatchClientRecycle) 575 prometheus.MustRegister(TiKVRangeTaskStats) 576 prometheus.MustRegister(TiKVRangeTaskPushDuration) 577 prometheus.MustRegister(TiKVTokenWaitDuration) 578 prometheus.MustRegister(TiKVTxnHeartBeatHistogram) 579 prometheus.MustRegister(TiKVPessimisticLockKeysDuration) 580 prometheus.MustRegister(TiKVTTLLifeTimeReachCounter) 581 prometheus.MustRegister(TiKVNoAvailableConnectionCounter) 582 prometheus.MustRegister(TiKVTwoPCTxnCounter) 583 prometheus.MustRegister(TiKVAsyncCommitTxnCounter) 584 prometheus.MustRegister(TiKVOnePCTxnCounter) 585 prometheus.MustRegister(TiKVStoreLimitErrorCounter) 586 prometheus.MustRegister(TiKVGRPCConnTransientFailureCounter) 587 prometheus.MustRegister(TiKVPanicCounter) 588 prometheus.MustRegister(TiKVForwardRequestCounter) 589 prometheus.MustRegister(TiKVTSFutureWaitDuration) 590 prometheus.MustRegister(TiKVSafeTSUpdateCounter) 591 prometheus.MustRegister(TiKVMinSafeTSGapSeconds) 592 prometheus.MustRegister(TiKVReplicaSelectorFailureCounter) 593 prometheus.MustRegister(TiKVRequestRetryTimesHistogram) 594 prometheus.MustRegister(TiKVTxnCommitBackoffSeconds) 595 prometheus.MustRegister(TiKVTxnCommitBackoffCount) 596 prometheus.MustRegister(TiKVSmallReadDuration) 597 } 598 599 // readCounter reads the value of a prometheus.Counter. 600 // Returns -1 when failing to read the value. 601 func readCounter(m prometheus.Counter) int64 { 602 // Actually, it's not recommended to read the value of prometheus metric types directly: 603 // https://github.com/prometheus/client_golang/issues/486#issuecomment-433345239 604 pb := &dto.Metric{} 605 // It's impossible to return an error though. 606 if err := m.Write(pb); err != nil { 607 return -1 608 } 609 return int64(pb.GetCounter().GetValue()) 610 } 611 612 // TxnCommitCounter is the counter of transactions committed with 613 // different protocols, i.e. 2PC, async-commit, 1PC. 614 type TxnCommitCounter struct { 615 TwoPC int64 `json:"twoPC"` 616 AsyncCommit int64 `json:"asyncCommit"` 617 OnePC int64 `json:"onePC"` 618 } 619 620 // Sub returns the difference of two counters. 621 func (c TxnCommitCounter) Sub(rhs TxnCommitCounter) TxnCommitCounter { 622 new := TxnCommitCounter{} 623 new.TwoPC = c.TwoPC - rhs.TwoPC 624 new.AsyncCommit = c.AsyncCommit - rhs.AsyncCommit 625 new.OnePC = c.OnePC - rhs.OnePC 626 return new 627 } 628 629 // GetTxnCommitCounter gets the TxnCommitCounter. 630 func GetTxnCommitCounter() TxnCommitCounter { 631 return TxnCommitCounter{ 632 TwoPC: readCounter(TwoPCTxnCounterOk), 633 AsyncCommit: readCounter(AsyncCommitTxnCounterOk), 634 OnePC: readCounter(OnePCTxnCounterOk), 635 } 636 } 637 638 const smallTxnAffectRow = 20 639 640 // ObserveReadSLI observes the read SLI metric. 641 func ObserveReadSLI(readKeys uint64, readTime float64) { 642 if readKeys <= smallTxnAffectRow && readKeys != 0 && readTime != 0 { 643 TiKVSmallReadDuration.Observe(readTime) 644 } 645 }