github.com/polarismesh/polaris@v1.17.8/common/metrics/sys_metrics.go (about) 1 /** 2 * Tencent is pleased to support the open source community by making Polaris available. 3 * 4 * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved. 5 * 6 * Licensed under the BSD 3-Clause License (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * https://opensource.org/licenses/BSD-3-Clause 11 * 12 * Unless required by applicable law or agreed to in writing, software distributed 13 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 14 * CONDITIONS OF ANY KIND, either express or implied. See the License for the 15 * specific language governing permissions and limitations under the License. 16 */ 17 18 package metrics 19 20 import ( 21 "strconv" 22 "sync/atomic" 23 "time" 24 25 "github.com/prometheus/client_golang/prometheus" 26 27 "github.com/polarismesh/polaris/common/utils" 28 ) 29 30 var ( 31 lastRedisReadFailureReport atomic.Value 32 lastRedisWriteFailureReport atomic.Value 33 ) 34 35 func registerSysMetrics() { 36 // instanceAsyncRegisCost 实例异步注册任务耗费时间 37 instanceAsyncRegisCost = prometheus.NewHistogram(prometheus.HistogramOpts{ 38 Name: "instance_regis_cost_time", 39 Help: "instance regis cost time", 40 ConstLabels: map[string]string{ 41 LabelServerNode: utils.LocalHost, 42 }, 43 }) 44 45 // instanceRegisTaskExpire 实例异步注册任务超时无效事件 46 instanceRegisTaskExpire = prometheus.NewCounter(prometheus.CounterOpts{ 47 Name: "instance_regis_task_expire", 48 Help: "instance regis task expire that server drop it", 49 ConstLabels: map[string]string{ 50 LabelServerNode: utils.LocalHost, 51 }, 52 }) 53 54 redisReadFailure = prometheus.NewGauge(prometheus.GaugeOpts{ 55 Name: "redis_read_failure", 56 Help: "polaris exec redis read operation failure", 57 ConstLabels: map[string]string{ 58 LabelServerNode: utils.LocalHost, 59 }, 60 }) 61 62 redisWriteFailure = prometheus.NewGauge(prometheus.GaugeOpts{ 63 Name: "redis_write_failure", 64 Help: "polaris exec redis write operation failure", 65 ConstLabels: map[string]string{ 66 LabelServerNode: utils.LocalHost, 67 }, 68 }) 69 70 redisAliveStatus = prometheus.NewGauge(prometheus.GaugeOpts{ 71 Name: "redis_alive_status", 72 Help: "polaris redis alive status", 73 ConstLabels: map[string]string{ 74 "polaris_server_instance": utils.LocalHost, 75 }, 76 }) 77 78 cacheUpdateCost = prometheus.NewHistogramVec(prometheus.HistogramOpts{ 79 Name: "cache_update_cost", 80 Help: "cache update cost per resource cache", 81 ConstLabels: map[string]string{ 82 "polaris_server_instance": utils.LocalHost, 83 }, 84 }, []string{labelCacheType, labelCacheUpdateCount}) 85 86 batchJobUnFinishJobs = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 87 Name: "batch_job_unfinish", 88 Help: "count unfinish batch job", 89 ConstLabels: map[string]string{ 90 "polaris_server_instance": utils.LocalHost, 91 }, 92 }, []string{ 93 labelBatchJobLabel, 94 }) 95 96 _ = registry.Register(instanceAsyncRegisCost) 97 _ = registry.Register(instanceRegisTaskExpire) 98 _ = registry.Register(redisReadFailure) 99 _ = registry.Register(redisWriteFailure) 100 _ = registry.Register(redisAliveStatus) 101 _ = registry.Register(cacheUpdateCost) 102 _ = registry.Register(batchJobUnFinishJobs) 103 104 go func() { 105 lastRedisReadFailureReport.Store(time.Now()) 106 lastRedisWriteFailureReport.Store(time.Now()) 107 ticker := time.NewTicker(time.Minute) 108 for range ticker.C { 109 tn := time.Now() 110 if tn.Sub(lastRedisReadFailureReport.Load().(time.Time)) > time.Minute { 111 redisReadFailure.Set(0) 112 } 113 if tn.Sub(lastRedisWriteFailureReport.Load().(time.Time)) > time.Minute { 114 redisWriteFailure.Set(0) 115 } 116 } 117 }() 118 } 119 120 // ReportInstanceRegisCost Total time to report the short-term registered task of the reporting instance 121 func ReportInstanceRegisCost(cost time.Duration) { 122 instanceAsyncRegisCost.Observe(float64(cost.Milliseconds())) 123 } 124 125 // ReportDropInstanceRegisTask Record the number of registered tasks discarded 126 func ReportDropInstanceRegisTask() { 127 instanceRegisTaskExpire.Inc() 128 } 129 130 // ReportRedisReadFailure report redis exec read operatio failure 131 func ReportRedisReadFailure() { 132 lastRedisReadFailureReport.Store(time.Now()) 133 redisReadFailure.Inc() 134 } 135 136 // ReportRedisWriteFailure report redis exec write operatio failure 137 func ReportRedisWriteFailure() { 138 lastRedisWriteFailureReport.Store(time.Now()) 139 redisWriteFailure.Inc() 140 } 141 142 // ReportRedisIsDead report redis alive status is dead 143 func ReportRedisIsDead() { 144 redisAliveStatus.Set(0) 145 } 146 147 // ReportRedisIsAlive report redis alive status is health 148 func ReportRedisIsAlive() { 149 redisAliveStatus.Set(1) 150 } 151 152 // RecordCacheUpdateCost record per cache update cost time 153 func RecordCacheUpdateCost(cost time.Duration, cacheTye string, total int64) { 154 if cacheUpdateCost == nil { 155 return 156 } 157 cacheUpdateCost.With(map[string]string{ 158 labelCacheType: cacheTye, 159 labelCacheUpdateCount: strconv.FormatInt(total, 10), 160 }) 161 } 162 163 // ReportAddBatchJob . 164 func ReportAddBatchJob(label string, count int64) { 165 if batchJobUnFinishJobs == nil { 166 return 167 } 168 batchJobUnFinishJobs.With(map[string]string{ 169 labelBatchJobLabel: label, 170 }).Add(float64(count)) 171 } 172 173 // ReportFinishBatchJob . 174 func ReportFinishBatchJob(label string, count int64) { 175 if batchJobUnFinishJobs == nil { 176 return 177 } 178 batchJobUnFinishJobs.With(map[string]string{ 179 labelBatchJobLabel: label, 180 }).Sub(float64(count)) 181 }