github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/metrics/metrics.go (about)

     1  package metrics
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"strconv"
     8  	"sync"
     9  
    10  	"github.com/projecteru2/core/log"
    11  	"github.com/projecteru2/core/resource"
    12  	"github.com/projecteru2/core/resource/cobalt"
    13  	plugintypes "github.com/projecteru2/core/resource/plugins/types"
    14  	"github.com/projecteru2/core/types"
    15  	"github.com/projecteru2/core/utils"
    16  	promClient "github.com/prometheus/client_model/go"
    17  	"golang.org/x/exp/slices"
    18  
    19  	statsdlib "github.com/CMGS/statsd"
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	"golang.org/x/exp/maps"
    22  )
    23  
    24  const (
    25  	deployCountKey    = "core.%s.deploy.count"
    26  	deployCountName   = "core_deploy"
    27  	podNodeStatusKey  = "pod.node.%s.up"
    28  	podNodeStatusName = "pod_node_up"
    29  
    30  	gaugeType   = "gauge"
    31  	counterType = "counter"
    32  )
    33  
    34  // Metrics define metrics
    35  type Metrics struct {
    36  	Config types.Config
    37  
    38  	StatsdAddr   string
    39  	Hostname     string
    40  	statsdClient *statsdlib.Client
    41  
    42  	Collectors map[string]prometheus.Collector
    43  
    44  	rmgr resource.Manager
    45  }
    46  
    47  // SendDeployCount update deploy counter
    48  func (m *Metrics) SendDeployCount(ctx context.Context, n int) {
    49  	metrics := &plugintypes.Metrics{
    50  		Name:   deployCountName,
    51  		Labels: []string{m.Hostname},
    52  		Key:    deployCountKey,
    53  		Value:  strconv.Itoa(n),
    54  	}
    55  
    56  	m.SendMetrics(ctx, metrics)
    57  }
    58  
    59  func (m *Metrics) SendPodNodeStatus(ctx context.Context, node *types.Node) {
    60  	up := !node.IsDown()
    61  	metrics := &plugintypes.Metrics{
    62  		Name:   podNodeStatusName,
    63  		Labels: []string{m.Hostname, node.Podname, node.Name},
    64  		Key:    fmt.Sprintf(podNodeStatusKey, node.Name),
    65  		Value:  strconv.Itoa(utils.Bool2Int(up)),
    66  	}
    67  
    68  	m.SendMetrics(ctx, metrics)
    69  }
    70  
    71  // SendMetrics update metrics
    72  func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metrics) {
    73  	logger := log.WithFunc("metrics.SendMetrics")
    74  	for _, metric := range metrics {
    75  		collector, ok := m.Collectors[metric.Name]
    76  		if !ok {
    77  			logger.Warnf(ctx, "Collector not found: %s", metric.Name)
    78  			continue
    79  		}
    80  		switch collector.(type) { //nolint
    81  		case *prometheus.GaugeVec:
    82  			value, err := strconv.ParseFloat(metric.Value, 64)
    83  			if err != nil {
    84  				logger.Errorf(ctx, err, "Error occurred while parsing %+v value %+v", metric.Name, metric.Value)
    85  			}
    86  			collector.(*prometheus.GaugeVec).WithLabelValues(metric.Labels...).Set(value) //nolint
    87  			if err := m.gauge(ctx, metric.Key, value); err != nil {
    88  				logger.Errorf(ctx, err, "Error occurred while sending %+v data to statsd", metric.Name)
    89  			}
    90  		case *prometheus.CounterVec:
    91  			value, err := strconv.ParseInt(metric.Value, 10, 32) //nolint
    92  			if err != nil {
    93  				logger.Errorf(ctx, err, "Error occurred while parsing %+v value %+v", metric.Name, metric.Value)
    94  			}
    95  			collector.(*prometheus.CounterVec).WithLabelValues(metric.Labels...).Add(float64(value)) //nolint
    96  			if err := m.count(ctx, metric.Key, int(value), 1.0); err != nil {
    97  				logger.Errorf(ctx, err, "Error occurred while sending %+v data to statsd", metric.Name)
    98  			}
    99  		default:
   100  			logger.Errorf(ctx, types.ErrMetricsTypeNotSupport, "Unknown collector type: %T", collector)
   101  		}
   102  	}
   103  }
   104  
   105  // RemoveInvalidNodes 清除多余的metric标签值
   106  func (m *Metrics) RemoveInvalidNodes(invalidNodes ...string) {
   107  	if len(invalidNodes) == 0 {
   108  		return
   109  	}
   110  	for _, collector := range m.Collectors {
   111  		metrics, _ := prometheus.DefaultGatherer.Gather()
   112  		for _, metric := range metrics {
   113  			for _, mf := range metric.GetMetric() {
   114  				if !slices.ContainsFunc(mf.Label, func(label *promClient.LabelPair) bool {
   115  					return label.GetName() == "nodename" && slices.ContainsFunc(invalidNodes, func(nodename string) bool {
   116  						return label.GetValue() == nodename
   117  					})
   118  				}) {
   119  					continue
   120  				}
   121  				labels := prometheus.Labels{}
   122  				for _, label := range mf.Label {
   123  					labels[label.GetName()] = label.GetValue()
   124  				}
   125  				// 删除符合条件的度量标签
   126  				switch c := collector.(type) {
   127  				case *prometheus.GaugeVec:
   128  					c.Delete(labels)
   129  				case *prometheus.CounterVec:
   130  					c.Delete(labels)
   131  				}
   132  			}
   133  		}
   134  		// 添加更多的条件来处理其他类型的Collector
   135  	}
   136  }
   137  
   138  // Lazy connect
   139  func (m *Metrics) checkConn(ctx context.Context) error {
   140  	if m.statsdClient != nil {
   141  		return nil
   142  	}
   143  	logger := log.WithFunc("metrics.checkConn")
   144  	var err error
   145  	// We needn't try to renew/reconnect because of only supporting UDP protocol now
   146  	// We should add an `errorCount` to reconnect when implementing TCP protocol
   147  	if m.statsdClient, err = statsdlib.New(m.StatsdAddr, statsdlib.WithErrorHandler(func(err error) {
   148  		logger.Error(ctx, err, "Sending statsd failed")
   149  	})); err != nil {
   150  		logger.Error(ctx, err, "Connect statsd failed")
   151  		return err
   152  	}
   153  	return nil
   154  }
   155  
   156  func (m *Metrics) gauge(ctx context.Context, key string, value float64) error {
   157  	if m.StatsdAddr == "" {
   158  		return nil
   159  	}
   160  	if err := m.checkConn(ctx); err != nil {
   161  		return err
   162  	}
   163  	m.statsdClient.Gauge(key, value)
   164  	return nil
   165  }
   166  
   167  func (m *Metrics) count(ctx context.Context, key string, n int, rate float32) error {
   168  	if m.StatsdAddr == "" {
   169  		return nil
   170  	}
   171  	if err := m.checkConn(ctx); err != nil {
   172  		return err
   173  	}
   174  	m.statsdClient.Count(key, n, rate)
   175  	return nil
   176  }
   177  
   178  // Client is a metrics obj
   179  var Client = Metrics{}
   180  var once sync.Once
   181  
   182  // InitMetrics new a metrics obj
   183  func InitMetrics(ctx context.Context, config types.Config, metricsDescriptions []*plugintypes.MetricsDescription) error {
   184  	hostname, err := os.Hostname()
   185  	if err != nil {
   186  		return err
   187  	}
   188  	rmgr, err := cobalt.New(config)
   189  	if err != nil {
   190  		return err
   191  	}
   192  	if err := rmgr.LoadPlugins(ctx, nil); err != nil {
   193  		return err
   194  	}
   195  
   196  	Client = Metrics{
   197  		Config:     config,
   198  		StatsdAddr: config.Statsd,
   199  		Hostname:   utils.CleanStatsdMetrics(hostname),
   200  		Collectors: map[string]prometheus.Collector{},
   201  		rmgr:       rmgr,
   202  	}
   203  
   204  	for _, desc := range metricsDescriptions {
   205  		switch desc.Type {
   206  		case gaugeType:
   207  			collector := prometheus.NewGaugeVec(prometheus.GaugeOpts{
   208  				Name: desc.Name,
   209  				Help: desc.Help,
   210  			}, desc.Labels)
   211  			Client.Collectors[desc.Name] = collector
   212  		case counterType:
   213  			collector := prometheus.NewCounterVec(prometheus.CounterOpts{
   214  				Name: desc.Name,
   215  				Help: desc.Help,
   216  			}, desc.Labels)
   217  			Client.Collectors[desc.Name] = collector
   218  		}
   219  	}
   220  
   221  	Client.Collectors[deployCountName] = prometheus.NewCounterVec(prometheus.CounterOpts{
   222  		Name: deployCountName,
   223  		Help: "core deploy counter",
   224  	}, []string{"hostname"})
   225  
   226  	Client.Collectors[podNodeStatusName] = prometheus.NewGaugeVec(prometheus.GaugeOpts{
   227  		Name: podNodeStatusName,
   228  		Help: "number of up nodes",
   229  	}, []string{"hostname", "podname", "nodename"})
   230  
   231  	once.Do(func() {
   232  		prometheus.MustRegister(maps.Values(Client.Collectors)...)
   233  	})
   234  	return nil
   235  }