github.com/verrazzano/verrazzano-monitoring-operator@v0.0.30/pkg/metricsexporter/metricsexporter.go (about) 1 // Copyright (C) 2022, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package metricsexporter 5 6 import ( 7 "strconv" 8 9 "github.com/pkg/errors" 10 "github.com/prometheus/client_golang/prometheus" 11 "go.uber.org/zap" 12 ) 13 14 type metricName string 15 16 const ( 17 NamesReconcile metricName = "reconcile" 18 NamesDeployment metricName = "deployment" 19 NamesDeploymentUpdateError metricName = "deploymentUpdateErrorCounter" 20 NamesDeploymentDeleteCounter metricName = "deploymentDeleteCounter" 21 NamesDeploymentDeleteError metricName = "deploymentDeleteErrorCounter" 22 NamesDeploymentUpdateCounter metricName = "deploymentUpdateCounter" 23 NamesConfigMap metricName = "configMap" 24 NamesServicesCreated metricName = "servicesCreated" 25 NamesServices metricName = "services" 26 NamesRoleBindings metricName = "roleBindings" 27 NamesIngress metricName = "ingress" 28 NamesIngressDeleted metricName = "ingressDeleted" 29 NamesVMOUpdate metricName = "vmoupdate" 30 NamesQueue metricName = "queue" 31 ) 32 33 type metricsExporter struct { 34 internalMetricsDelegate metricsDelegate 35 internalConfig configuration 36 internalData data 37 } 38 39 type configuration struct { 40 // this Metric array will be automatically populated with all the metrics from each map. Metrics not included in a map can be added to thisMetric array for registration. 41 allMetrics []prometheus.Collector 42 // this Metric map will be automatically populated with all metrics which were not registered correctly. Metrics in thisMetric map will be retried periodically. 43 failedMetrics map[prometheus.Collector]int 44 registry prometheus.Registerer 45 } 46 47 type data struct { 48 functionMetricsMap map[metricName]*FunctionMetrics 49 simpleCounterMetricMap map[metricName]*CounterMetric 50 simpleGaugeMetricMap map[metricName]*GaugeMetric 51 durationMetricMap map[metricName]*DurationMetric 52 timestampMetricMap map[metricName]*TimestampMetric 53 errorMetricMap map[metricName]*ErrorMetric 54 } 55 56 type metricsDelegate struct { 57 } 58 59 // Class of metrics to automatically capture 4 types of metrics for a given function 60 type FunctionMetrics struct { 61 durationMetric DurationMetric 62 callsTotal CounterMetric 63 lastCallTimestamp TimestampMetric 64 errorTotal ErrorMetric 65 // The function to create the label values for the error and timestamp metrics. A default is provided as &DefaultLabelFunction 66 labelFunction *func(int64) string 67 index int64 68 } 69 70 // Method to call at the start of the tracked function. Starts the duration timer and increments the total count 71 func (f *FunctionMetrics) LogStart() { 72 f.callsTotal.metric.Inc() 73 f.index = f.index + 1 74 f.durationMetric.TimerStart() 75 } 76 77 // Method to defer to the end of the tracked function. Stops the duration timer, sets the lastCallTimestamp. Pass in an argument of true to set an error for the current function call. 78 func (f *FunctionMetrics) LogEnd(errorObserved bool) { 79 label := (*f.labelFunction)(f.index) 80 f.durationMetric.TimerStop() 81 f.lastCallTimestamp.SetLastTimeWithLabel(label) 82 if errorObserved { 83 f.errorTotal.IncWithLabel(label) 84 } 85 } 86 87 func (f *FunctionMetrics) IncError() { 88 f.errorTotal.IncWithLabel(f.GetLabel()) 89 } 90 91 // Invokes the supplied labelFunction to return the string which would be used as a label. The label can be dynamic and may change depending on the labelFunctions behavior (i.e. a timestamp string) 92 func (f *FunctionMetrics) GetLabel() string { 93 return (*f.labelFunction)(f.index) 94 } 95 96 // Type to count events such as the number fo function calls. 97 type CounterMetric struct { 98 metric prometheus.Counter 99 index int64 100 } 101 102 // Inc increases the counterMetric by one 103 func (c *CounterMetric) Inc() { 104 c.index = c.index + 1 105 c.metric.Inc() 106 } 107 108 // Add increases the counter metric by the argument value 109 func (c *CounterMetric) Add(num float64) { 110 c.index = c.index + int64(num) 111 c.metric.Add(num) 112 } 113 114 // GetLabel returns the current value of the counter as a string 115 func (c *CounterMetric) GetLabel() string { 116 return strconv.FormatInt(c.index, 10) 117 } 118 119 type GaugeMetric struct { 120 metric prometheus.Gauge 121 } 122 123 // Set sets the value of the gauge metric to the given value 124 func (g *GaugeMetric) Set(num float64) { 125 g.metric.Set(num) 126 } 127 128 // SetToCurrentTime sets the value of the gauge metric to the system timestamp 129 func (g *GaugeMetric) SetToCurrentTime() { 130 g.metric.SetToCurrentTime() 131 } 132 133 // Add sets the value of the gauge metric to the current value plus the given value 134 func (g *GaugeMetric) Add(num float64) { 135 g.metric.Add(num) 136 } 137 138 // Type to track length of a function call. Method to start and stop the duration timer are available. 139 type DurationMetric struct { 140 metric prometheus.Summary 141 timer *prometheus.Timer 142 } 143 144 // Creates a new timer, and starts the timer 145 func (d *DurationMetric) TimerStart() { 146 d.timer = prometheus.NewTimer(d.metric) 147 } 148 149 // stops the timer and record the duration since the last call to TimerStart 150 func (d *DurationMetric) TimerStop() { 151 d.timer.ObserveDuration() 152 } 153 154 // Type to track the last timestamp of a function call. Includes a method to set the last timestamp 155 type TimestampMetric struct { 156 metric *prometheus.GaugeVec 157 labelFunction *func() string 158 } 159 160 // Adds a timestamp as the current time. The label must be supplied as an argument 161 func (t *TimestampMetric) SetLastTime() { 162 t.SetLastTimeWithLabel((*t.labelFunction)()) 163 } 164 165 // Adds a timestamp as the current time. The label must be supplied as an argument 166 func (t *TimestampMetric) SetLastTimeWithLabel(indexString string) { 167 lastTimeMetric, err := t.metric.GetMetricWithLabelValues(indexString) 168 if err != nil { 169 zap.S().Errorf("Failed to log the last reconcile time metric label %s: %v", indexString, err) 170 } else { 171 lastTimeMetric.SetToCurrentTime() 172 } 173 } 174 175 // Type to track the occurrence of an error. Includes a metod to add an error count 176 type ErrorMetric struct { 177 metric *prometheus.CounterVec 178 labelFunction *func() string 179 } 180 181 func (e *ErrorMetric) Inc() { 182 e.IncWithLabel((*e.labelFunction)()) 183 } 184 185 // Adds an error count. The label must be supplied as an argument 186 func (e *ErrorMetric) IncWithLabel(label string) { 187 errorMetric, err := e.metric.GetMetricWithLabelValues(label) 188 if err != nil { 189 zap.S().Errorf("Failed to get metric label %s: %v", label, err) 190 } else { 191 errorMetric.Inc() 192 } 193 } 194 195 // initConfiguration returns an empty configuration struct 196 func initConfiguration() configuration { 197 return configuration{ 198 allMetrics: []prometheus.Collector{}, 199 failedMetrics: map[prometheus.Collector]int{}, 200 registry: prometheus.DefaultRegisterer, 201 } 202 } 203 204 // initFunctionMetricsMap returns a populated map of functionMetrics to be used in the data struct, add additional metrics here 205 func initFunctionMetricsMap() map[metricName]*FunctionMetrics { 206 return map[metricName]*FunctionMetrics{ 207 NamesReconcile: { 208 durationMetric: DurationMetric{ 209 metric: prometheus.NewSummary(prometheus.SummaryOpts{Name: "vmo_reconcile_duration_seconds", Help: "Tracks the duration of the reconcile function in seconds"}), 210 }, 211 callsTotal: CounterMetric{ 212 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_reconcile_total", Help: "Tracks how many times the syncHandlerStandardMode function is called. thisMetric corresponds to the number of reconciles performed by the VMO"}), 213 }, 214 lastCallTimestamp: TimestampMetric{ 215 metric: prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "vmo_reconcile_last_timestamp_seconds", Help: "The timestamp of the last time the syncHandlerStandardMode function completed"}, []string{"reconcile_index"}), 216 }, 217 errorTotal: ErrorMetric{ 218 metric: prometheus.NewCounterVec(prometheus.CounterOpts{Name: "vmo_reconcile_error_total", Help: "Tracks how many times the syncHandlerStandardMode function encounters an error"}, []string{"reconcile_index"}), 219 }, 220 index: int64(0), 221 labelFunction: &DefaultLabelFunction, 222 }, 223 224 NamesDeployment: { 225 durationMetric: DurationMetric{ 226 metric: prometheus.NewSummary(prometheus.SummaryOpts{Name: "vmo_deployment_duration_seconds", Help: "The duration of the last call to the deployment function"}), 227 }, 228 callsTotal: CounterMetric{ 229 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_deployment_total", Help: "Tracks how many times the deployment function is called"}), 230 }, 231 lastCallTimestamp: TimestampMetric{ 232 metric: prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "vmo_deployment_last_timestamp_seconds", Help: "The timestamp of the last time the deployment function completed"}, []string{"deployment_index"}), 233 }, 234 errorTotal: ErrorMetric{ 235 metric: prometheus.NewCounterVec(prometheus.CounterOpts{Name: "vmo_deployment_error_total", Help: "Tracks how many times the deployment failed"}, []string{"deployment_index"}), 236 }, 237 index: int64(0), 238 labelFunction: &DefaultLabelFunction, 239 }, 240 241 NamesIngress: { 242 durationMetric: DurationMetric{ 243 metric: prometheus.NewSummary(prometheus.SummaryOpts{Name: "vmo_ingress_duration_seconds", Help: "Tracks the duration of the ingress function in seconds"}), 244 }, 245 callsTotal: CounterMetric{ 246 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_ingress_total", Help: "Tracks how many times the ingress function is called. This metric corresponds to the number of ingress requests performed by the VMO"}), 247 }, 248 lastCallTimestamp: TimestampMetric{ 249 metric: prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "vmo_ingress_last_timestamp_seconds", Help: "The timestamp of the last time the ingress function completed"}, []string{"ingress_index"}), 250 }, 251 errorTotal: ErrorMetric{ 252 metric: prometheus.NewCounterVec(prometheus.CounterOpts{Name: "vmo_ingress_error_total", Help: "Tracks how many times the syncHandlerStandardMode function encounters an error"}, []string{"ingress_index"}), 253 }, 254 index: int64(0), 255 labelFunction: &DefaultLabelFunction, 256 }, 257 } 258 } 259 260 // initCounterMetricMap returns a populated map of counter metrics to be used in the data struct, add additional metrics here 261 func initCounterMetricMap() map[metricName]*CounterMetric { 262 return map[metricName]*CounterMetric{ 263 NamesDeploymentUpdateCounter: { 264 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_deployment_update_total", Help: "Tracks how many times a deployment update is attempted"}), 265 }, 266 NamesDeploymentDeleteCounter: { 267 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_deployment_delete_total", Help: "Tracks how many times the delete functionality is invoked"}), 268 }, 269 NamesIngressDeleted: { 270 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_ingress_delete_total", Help: "Tracks how many ingresses are deleted"}), 271 }, 272 NamesConfigMap: { 273 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_configmap_total", Help: "Tracks how many times the configMap functionality is invoked"}), 274 }, 275 NamesServices: { 276 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_services_total", Help: "Tracks how many times the services functionality is invoked"}), 277 }, 278 NamesServicesCreated: { 279 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_services_created_total", Help: "Tracks how many services are created"}), 280 }, 281 NamesRoleBindings: { 282 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_rolebindings_total", Help: "Tracks how many times the rolebindings functionality is invoked"}), 283 }, 284 NamesVMOUpdate: { 285 metric: prometheus.NewCounter(prometheus.CounterOpts{Name: "vmo_updates_total", Help: "Tracks how many times the update functionality is invoked"}), 286 }, 287 } 288 } 289 290 // initGaugeMetricMap returns a map of gauge metrics to be used in the data struct, add additional metrics here 291 func initGaugeMetricMap() map[metricName]*GaugeMetric { 292 return map[metricName]*GaugeMetric{ 293 NamesQueue: { 294 metric: prometheus.NewGauge(prometheus.GaugeOpts{Name: "vmo_work_queue_size", Help: "Tracks the size of the VMO work queue"}), 295 }, 296 } 297 } 298 299 // initDurationMetricMap returns a map of duration metrics to be used in the data struct, add additional metrics here 300 func initDurationMetricMap() map[metricName]*DurationMetric { 301 return map[metricName]*DurationMetric{} 302 } 303 304 // initTimestampMetricMap returns a map of timestamp metrics to be used in the data struct, add additional metrics here 305 func initTimestampMetricMap() map[metricName]*TimestampMetric { 306 return map[metricName]*TimestampMetric{ 307 NamesConfigMap: { 308 metric: prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "vmo_configmap_last_successful_timestamp", Help: "The timestamp of the last time the configMap function completed successfully"}, []string{"configMap_index"}), 309 labelFunction: &configMapLabelFunction, 310 }, 311 NamesServices: { 312 metric: prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "vmo_services_last_successful_timestamp", Help: "The timestamp of the last time the createService function completed successfully"}, []string{"service_index"}), 313 labelFunction: &servicesLabelFunction, 314 }, 315 NamesRoleBindings: { 316 metric: prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "vmo_rolebindings_last_successful_timestamp", Help: "The timestamp of the last time the roleBindings function completed successfully"}, []string{"rolebindings_index"}), 317 labelFunction: &roleBindingLabelFunction, 318 }, 319 NamesVMOUpdate: { 320 metric: prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: "vmo_update_last_successful_timestamp", Help: "The timestamp of the last time the vmo update completed successfully"}, []string{"update_index"}), 321 labelFunction: &VMOUpdateLabelFunction, 322 }, 323 } 324 } 325 326 // initErrorMetricMap returns a map of error metrics to be used in the data struct, add additional metrics here 327 func initErrorMetricMap() map[metricName]*ErrorMetric { 328 return map[metricName]*ErrorMetric{ 329 NamesDeploymentUpdateError: { 330 metric: prometheus.NewCounterVec(prometheus.CounterOpts{Name: "vmo_deployment_update_error_total", Help: "Tracks how many times a deployment update fails"}, []string{"deployment_index"}), 331 labelFunction: &deploymentLabelFunction, 332 }, 333 NamesDeploymentDeleteError: { 334 metric: prometheus.NewCounterVec(prometheus.CounterOpts{Name: "vmo_deployment_delete_error_counter", Help: "Tracks how many times the delete functionality failed"}, []string{"deployment_index"}), 335 labelFunction: &deploymentLabelFunction, 336 }, 337 } 338 } 339 340 var ( 341 MetricsExp = metricsExporter{} 342 DefaultLabelFunction func(index int64) string 343 deploymentLabelFunction func() string 344 configMapLabelFunction func() string 345 servicesLabelFunction func() string 346 roleBindingLabelFunction func() string 347 VMOUpdateLabelFunction func() string 348 TestDelegate = metricsDelegate{} 349 ) 350 351 // initializeFailedMetricsArray simply adds metrics in the allMetrics array to the failed metrics map, call this before registering metrics 352 func (md *metricsDelegate) initializeFailedMetricsArray() { 353 //the failed metrics array will initially contain all metrics so they may be registered 354 for i, metric := range MetricsExp.internalConfig.allMetrics { 355 MetricsExp.internalConfig.failedMetrics[metric] = i 356 } 357 } 358 359 // registerMetricsHandlersHelper loops through the failed metrics map and deletes metrics which have been registered successfully 360 func (md *metricsDelegate) registerMetricsHandlersHelper() error { 361 var errorObserved error 362 for metric := range MetricsExp.internalConfig.failedMetrics { 363 err := MetricsExp.internalConfig.registry.Register(metric) 364 if err != nil { 365 if errorObserved != nil { 366 errorObserved = errors.Wrap(errorObserved, err.Error()) 367 } else { 368 errorObserved = err 369 } 370 } else { 371 //if a metric is registered, delete it from the failed metrics map so that it is not retried 372 delete(MetricsExp.internalConfig.failedMetrics, metric) 373 } 374 } 375 return errorObserved 376 }