github.com/polarismesh/polaris@v1.17.8/common/metrics/sys_metrics.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package metrics
    19  
    20  import (
    21  	"strconv"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"github.com/prometheus/client_golang/prometheus"
    26  
    27  	"github.com/polarismesh/polaris/common/utils"
    28  )
    29  
    30  var (
    31  	lastRedisReadFailureReport  atomic.Value
    32  	lastRedisWriteFailureReport atomic.Value
    33  )
    34  
    35  func registerSysMetrics() {
    36  	// instanceAsyncRegisCost 实例异步注册任务耗费时间
    37  	instanceAsyncRegisCost = prometheus.NewHistogram(prometheus.HistogramOpts{
    38  		Name: "instance_regis_cost_time",
    39  		Help: "instance regis cost time",
    40  		ConstLabels: map[string]string{
    41  			LabelServerNode: utils.LocalHost,
    42  		},
    43  	})
    44  
    45  	// instanceRegisTaskExpire 实例异步注册任务超时无效事件
    46  	instanceRegisTaskExpire = prometheus.NewCounter(prometheus.CounterOpts{
    47  		Name: "instance_regis_task_expire",
    48  		Help: "instance regis task expire that server drop it",
    49  		ConstLabels: map[string]string{
    50  			LabelServerNode: utils.LocalHost,
    51  		},
    52  	})
    53  
    54  	redisReadFailure = prometheus.NewGauge(prometheus.GaugeOpts{
    55  		Name: "redis_read_failure",
    56  		Help: "polaris exec redis read operation failure",
    57  		ConstLabels: map[string]string{
    58  			LabelServerNode: utils.LocalHost,
    59  		},
    60  	})
    61  
    62  	redisWriteFailure = prometheus.NewGauge(prometheus.GaugeOpts{
    63  		Name: "redis_write_failure",
    64  		Help: "polaris exec redis write operation failure",
    65  		ConstLabels: map[string]string{
    66  			LabelServerNode: utils.LocalHost,
    67  		},
    68  	})
    69  
    70  	redisAliveStatus = prometheus.NewGauge(prometheus.GaugeOpts{
    71  		Name: "redis_alive_status",
    72  		Help: "polaris redis alive status",
    73  		ConstLabels: map[string]string{
    74  			"polaris_server_instance": utils.LocalHost,
    75  		},
    76  	})
    77  
    78  	cacheUpdateCost = prometheus.NewHistogramVec(prometheus.HistogramOpts{
    79  		Name: "cache_update_cost",
    80  		Help: "cache update cost per resource cache",
    81  		ConstLabels: map[string]string{
    82  			"polaris_server_instance": utils.LocalHost,
    83  		},
    84  	}, []string{labelCacheType, labelCacheUpdateCount})
    85  
    86  	batchJobUnFinishJobs = prometheus.NewGaugeVec(prometheus.GaugeOpts{
    87  		Name: "batch_job_unfinish",
    88  		Help: "count unfinish batch job",
    89  		ConstLabels: map[string]string{
    90  			"polaris_server_instance": utils.LocalHost,
    91  		},
    92  	}, []string{
    93  		labelBatchJobLabel,
    94  	})
    95  
    96  	_ = registry.Register(instanceAsyncRegisCost)
    97  	_ = registry.Register(instanceRegisTaskExpire)
    98  	_ = registry.Register(redisReadFailure)
    99  	_ = registry.Register(redisWriteFailure)
   100  	_ = registry.Register(redisAliveStatus)
   101  	_ = registry.Register(cacheUpdateCost)
   102  	_ = registry.Register(batchJobUnFinishJobs)
   103  
   104  	go func() {
   105  		lastRedisReadFailureReport.Store(time.Now())
   106  		lastRedisWriteFailureReport.Store(time.Now())
   107  		ticker := time.NewTicker(time.Minute)
   108  		for range ticker.C {
   109  			tn := time.Now()
   110  			if tn.Sub(lastRedisReadFailureReport.Load().(time.Time)) > time.Minute {
   111  				redisReadFailure.Set(0)
   112  			}
   113  			if tn.Sub(lastRedisWriteFailureReport.Load().(time.Time)) > time.Minute {
   114  				redisWriteFailure.Set(0)
   115  			}
   116  		}
   117  	}()
   118  }
   119  
   120  // ReportInstanceRegisCost Total time to report the short-term registered task of the reporting instance
   121  func ReportInstanceRegisCost(cost time.Duration) {
   122  	instanceAsyncRegisCost.Observe(float64(cost.Milliseconds()))
   123  }
   124  
   125  // ReportDropInstanceRegisTask Record the number of registered tasks discarded
   126  func ReportDropInstanceRegisTask() {
   127  	instanceRegisTaskExpire.Inc()
   128  }
   129  
   130  // ReportRedisReadFailure report redis exec read operatio failure
   131  func ReportRedisReadFailure() {
   132  	lastRedisReadFailureReport.Store(time.Now())
   133  	redisReadFailure.Inc()
   134  }
   135  
   136  // ReportRedisWriteFailure report redis exec write operatio failure
   137  func ReportRedisWriteFailure() {
   138  	lastRedisWriteFailureReport.Store(time.Now())
   139  	redisWriteFailure.Inc()
   140  }
   141  
   142  // ReportRedisIsDead report redis alive status is dead
   143  func ReportRedisIsDead() {
   144  	redisAliveStatus.Set(0)
   145  }
   146  
   147  // ReportRedisIsAlive report redis alive status is health
   148  func ReportRedisIsAlive() {
   149  	redisAliveStatus.Set(1)
   150  }
   151  
   152  // RecordCacheUpdateCost record per cache update cost time
   153  func RecordCacheUpdateCost(cost time.Duration, cacheTye string, total int64) {
   154  	if cacheUpdateCost == nil {
   155  		return
   156  	}
   157  	cacheUpdateCost.With(map[string]string{
   158  		labelCacheType:        cacheTye,
   159  		labelCacheUpdateCount: strconv.FormatInt(total, 10),
   160  	})
   161  }
   162  
   163  // ReportAddBatchJob .
   164  func ReportAddBatchJob(label string, count int64) {
   165  	if batchJobUnFinishJobs == nil {
   166  		return
   167  	}
   168  	batchJobUnFinishJobs.With(map[string]string{
   169  		labelBatchJobLabel: label,
   170  	}).Add(float64(count))
   171  }
   172  
   173  // ReportFinishBatchJob .
   174  func ReportFinishBatchJob(label string, count int64) {
   175  	if batchJobUnFinishJobs == nil {
   176  		return
   177  	}
   178  	batchJobUnFinishJobs.With(map[string]string{
   179  		labelBatchJobLabel: label,
   180  	}).Sub(float64(count))
   181  }