github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/metrics/metrics.go (about) 1 package metrics 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "strconv" 8 "sync" 9 10 "github.com/projecteru2/core/log" 11 "github.com/projecteru2/core/resource" 12 "github.com/projecteru2/core/resource/cobalt" 13 plugintypes "github.com/projecteru2/core/resource/plugins/types" 14 "github.com/projecteru2/core/types" 15 "github.com/projecteru2/core/utils" 16 promClient "github.com/prometheus/client_model/go" 17 "golang.org/x/exp/slices" 18 19 statsdlib "github.com/CMGS/statsd" 20 "github.com/prometheus/client_golang/prometheus" 21 "golang.org/x/exp/maps" 22 ) 23 24 const ( 25 deployCountKey = "core.%s.deploy.count" 26 deployCountName = "core_deploy" 27 podNodeStatusKey = "pod.node.%s.up" 28 podNodeStatusName = "pod_node_up" 29 30 gaugeType = "gauge" 31 counterType = "counter" 32 ) 33 34 // Metrics define metrics 35 type Metrics struct { 36 Config types.Config 37 38 StatsdAddr string 39 Hostname string 40 statsdClient *statsdlib.Client 41 42 Collectors map[string]prometheus.Collector 43 44 rmgr resource.Manager 45 } 46 47 // SendDeployCount update deploy counter 48 func (m *Metrics) SendDeployCount(ctx context.Context, n int) { 49 metrics := &plugintypes.Metrics{ 50 Name: deployCountName, 51 Labels: []string{m.Hostname}, 52 Key: deployCountKey, 53 Value: strconv.Itoa(n), 54 } 55 56 m.SendMetrics(ctx, metrics) 57 } 58 59 func (m *Metrics) SendPodNodeStatus(ctx context.Context, node *types.Node) { 60 up := !node.IsDown() 61 metrics := &plugintypes.Metrics{ 62 Name: podNodeStatusName, 63 Labels: []string{m.Hostname, node.Podname, node.Name}, 64 Key: fmt.Sprintf(podNodeStatusKey, node.Name), 65 Value: strconv.Itoa(utils.Bool2Int(up)), 66 } 67 68 m.SendMetrics(ctx, metrics) 69 } 70 71 // SendMetrics update metrics 72 func (m *Metrics) SendMetrics(ctx context.Context, metrics ...*plugintypes.Metrics) { 73 logger := log.WithFunc("metrics.SendMetrics") 74 for _, metric := range metrics { 75 collector, ok := m.Collectors[metric.Name] 76 if !ok { 77 logger.Warnf(ctx, "Collector not found: %s", metric.Name) 78 continue 79 } 80 switch collector.(type) { //nolint 81 case *prometheus.GaugeVec: 82 value, err := strconv.ParseFloat(metric.Value, 64) 83 if err != nil { 84 logger.Errorf(ctx, err, "Error occurred while parsing %+v value %+v", metric.Name, metric.Value) 85 } 86 collector.(*prometheus.GaugeVec).WithLabelValues(metric.Labels...).Set(value) //nolint 87 if err := m.gauge(ctx, metric.Key, value); err != nil { 88 logger.Errorf(ctx, err, "Error occurred while sending %+v data to statsd", metric.Name) 89 } 90 case *prometheus.CounterVec: 91 value, err := strconv.ParseInt(metric.Value, 10, 32) //nolint 92 if err != nil { 93 logger.Errorf(ctx, err, "Error occurred while parsing %+v value %+v", metric.Name, metric.Value) 94 } 95 collector.(*prometheus.CounterVec).WithLabelValues(metric.Labels...).Add(float64(value)) //nolint 96 if err := m.count(ctx, metric.Key, int(value), 1.0); err != nil { 97 logger.Errorf(ctx, err, "Error occurred while sending %+v data to statsd", metric.Name) 98 } 99 default: 100 logger.Errorf(ctx, types.ErrMetricsTypeNotSupport, "Unknown collector type: %T", collector) 101 } 102 } 103 } 104 105 // RemoveInvalidNodes 清除多余的metric标签值 106 func (m *Metrics) RemoveInvalidNodes(invalidNodes ...string) { 107 if len(invalidNodes) == 0 { 108 return 109 } 110 for _, collector := range m.Collectors { 111 metrics, _ := prometheus.DefaultGatherer.Gather() 112 for _, metric := range metrics { 113 for _, mf := range metric.GetMetric() { 114 if !slices.ContainsFunc(mf.Label, func(label *promClient.LabelPair) bool { 115 return label.GetName() == "nodename" && slices.ContainsFunc(invalidNodes, func(nodename string) bool { 116 return label.GetValue() == nodename 117 }) 118 }) { 119 continue 120 } 121 labels := prometheus.Labels{} 122 for _, label := range mf.Label { 123 labels[label.GetName()] = label.GetValue() 124 } 125 // 删除符合条件的度量标签 126 switch c := collector.(type) { 127 case *prometheus.GaugeVec: 128 c.Delete(labels) 129 case *prometheus.CounterVec: 130 c.Delete(labels) 131 } 132 } 133 } 134 // 添加更多的条件来处理其他类型的Collector 135 } 136 } 137 138 // Lazy connect 139 func (m *Metrics) checkConn(ctx context.Context) error { 140 if m.statsdClient != nil { 141 return nil 142 } 143 logger := log.WithFunc("metrics.checkConn") 144 var err error 145 // We needn't try to renew/reconnect because of only supporting UDP protocol now 146 // We should add an `errorCount` to reconnect when implementing TCP protocol 147 if m.statsdClient, err = statsdlib.New(m.StatsdAddr, statsdlib.WithErrorHandler(func(err error) { 148 logger.Error(ctx, err, "Sending statsd failed") 149 })); err != nil { 150 logger.Error(ctx, err, "Connect statsd failed") 151 return err 152 } 153 return nil 154 } 155 156 func (m *Metrics) gauge(ctx context.Context, key string, value float64) error { 157 if m.StatsdAddr == "" { 158 return nil 159 } 160 if err := m.checkConn(ctx); err != nil { 161 return err 162 } 163 m.statsdClient.Gauge(key, value) 164 return nil 165 } 166 167 func (m *Metrics) count(ctx context.Context, key string, n int, rate float32) error { 168 if m.StatsdAddr == "" { 169 return nil 170 } 171 if err := m.checkConn(ctx); err != nil { 172 return err 173 } 174 m.statsdClient.Count(key, n, rate) 175 return nil 176 } 177 178 // Client is a metrics obj 179 var Client = Metrics{} 180 var once sync.Once 181 182 // InitMetrics new a metrics obj 183 func InitMetrics(ctx context.Context, config types.Config, metricsDescriptions []*plugintypes.MetricsDescription) error { 184 hostname, err := os.Hostname() 185 if err != nil { 186 return err 187 } 188 rmgr, err := cobalt.New(config) 189 if err != nil { 190 return err 191 } 192 if err := rmgr.LoadPlugins(ctx, nil); err != nil { 193 return err 194 } 195 196 Client = Metrics{ 197 Config: config, 198 StatsdAddr: config.Statsd, 199 Hostname: utils.CleanStatsdMetrics(hostname), 200 Collectors: map[string]prometheus.Collector{}, 201 rmgr: rmgr, 202 } 203 204 for _, desc := range metricsDescriptions { 205 switch desc.Type { 206 case gaugeType: 207 collector := prometheus.NewGaugeVec(prometheus.GaugeOpts{ 208 Name: desc.Name, 209 Help: desc.Help, 210 }, desc.Labels) 211 Client.Collectors[desc.Name] = collector 212 case counterType: 213 collector := prometheus.NewCounterVec(prometheus.CounterOpts{ 214 Name: desc.Name, 215 Help: desc.Help, 216 }, desc.Labels) 217 Client.Collectors[desc.Name] = collector 218 } 219 } 220 221 Client.Collectors[deployCountName] = prometheus.NewCounterVec(prometheus.CounterOpts{ 222 Name: deployCountName, 223 Help: "core deploy counter", 224 }, []string{"hostname"}) 225 226 Client.Collectors[podNodeStatusName] = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 227 Name: podNodeStatusName, 228 Help: "number of up nodes", 229 }, []string{"hostname", "podname", "nodename"}) 230 231 once.Do(func() { 232 prometheus.MustRegister(maps.Values(Client.Collectors)...) 233 }) 234 return nil 235 }