github.com/verrazzano/verrazzano@v1.7.0/application-operator/metricsexporter/metricsexporter_utils.go (about) 1 // Copyright (c) 2022, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package metricsexporter 5 6 import ( 7 "fmt" 8 "net/http" 9 "time" 10 11 "github.com/pkg/errors" 12 "github.com/prometheus/client_golang/prometheus" 13 "github.com/prometheus/client_golang/prometheus/promhttp" 14 vzlogInit "github.com/verrazzano/verrazzano/pkg/log" 15 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 16 "go.uber.org/zap" 17 "k8s.io/apimachinery/pkg/util/wait" 18 ) 19 20 type metricName string 21 22 const ( 23 AppconfigReconcileCounter metricName = "appconfig reconcile counter" 24 AppconfigReconcileError metricName = "appconfig reconcile error" 25 AppconfigReconcileDuration metricName = "appconfig reconcile duration" 26 CohworkloadReconcileCounter metricName = "coherence reconcile counter" 27 CohworkloadReconcileError metricName = "coherence reconcile error" 28 CohworkloadReconcileDuration metricName = "coherence reconcile duration" 29 HelidonReconcileCounter metricName = "helidon reconcile counter" 30 HelidonReconcileError metricName = "helidon reconcile error" 31 HelidonReconcileDuration metricName = "helidon reconcile duration" 32 IngresstraitReconcileCounter metricName = "ingress reconcile counter" 33 IngresstraitReconcileError metricName = "ingress reconcile error" 34 IngresstraitReconcileDuration metricName = "ingress reconcile duration" 35 AppconfigHandleCounter metricName = "appconfig handle counter" 36 AppconfigHandleError metricName = "appconfig handle error" 37 AppconfigHandleDuration metricName = "appconfig handle duration" 38 IstioHandleCounter metricName = "istio handle counter" 39 IstioHandleError metricName = "istio handle error" 40 IstioHandleDuration metricName = "istio handle duration" 41 LabelerPodHandleCounter metricName = "LabelerPod handle counter" 42 LabelerPodHandleError metricName = "LabelerPod handle error" 43 LabelerPodHandleDuration metricName = "LabelerPod handle duration" 44 BindingUpdaterHandleCounter metricName = "BindingUpdater handle counter" 45 BindingUpdaterHandleError metricName = "BindingUpdater handle error" 46 BindingUpdaterHandleDuration metricName = "BindingUpdater handle duration" 47 MultiClusterAppconfigPodHandleCounter metricName = "MultiClusterAppconfig handle counter" 48 MultiClusterAppconfigPodHandleError metricName = "MultiClusterAppconfig handle error" 49 MultiClusterAppconfigPodHandleDuration metricName = "MultiClusterAppconfig handle duration" 50 MultiClusterCompHandleCounter metricName = "MultiClusterComp handle counter" 51 MultiClusterCompHandleError metricName = "MultiClusterComp handle error" 52 MultiClusterCompHandleDuration metricName = "MultiClusterComp handle duration" 53 MultiClusterConfigmapHandleCounter metricName = "MultiClusterConfigmap handle counter" 54 MultiClusterConfigmapHandleError metricName = "MultiClusterConfigmap handle error" 55 MultiClusterConfigmapHandleDuration metricName = "MultiClusterConfigmap handle duration" 56 MultiClusterSecretHandleCounter metricName = "MultiClusterSecret handle counter" 57 MultiClusterSecretHandleError metricName = "MultiClusterSecret handle error" 58 MultiClusterSecretHandleDuration metricName = "MultiClusterSecret handle duration" 59 VzProjHandleCounter metricName = "VzProj handle counter" 60 VzProjHandleError metricName = "VzProj handle error" 61 VzProjHandleDuration metricName = "VzProj handle duration" 62 ) 63 64 func init() { 65 RequiredInitialization() 66 RegisterMetrics() 67 } 68 69 // RequiredInitialization initializes the metrics object, but does not register the metrics 70 func RequiredInitialization() { 71 MetricsExp = metricsExporter{ 72 internalConfig: initConfiguration(), 73 internalData: data{ 74 simpleCounterMetricMap: initCounterMetricMap(), 75 durationMetricMap: initDurationMetricMap(), 76 }, 77 } 78 } 79 80 // RegisterMetrics begins the process of registering metrics 81 func RegisterMetrics() { 82 InitializeAllMetricsArray() 83 go registerMetricsHandlers(zap.S()) 84 } 85 86 // InitializeAllMetricsArray initializes the allMetrics array 87 func InitializeAllMetricsArray() { 88 // Loop through all metrics declarations in metric maps 89 for _, value := range MetricsExp.internalData.simpleCounterMetricMap { 90 MetricsExp.internalConfig.allMetrics = append(MetricsExp.internalConfig.allMetrics, value.metric) 91 } 92 for _, value := range MetricsExp.internalData.durationMetricMap { 93 MetricsExp.internalConfig.allMetrics = append(MetricsExp.internalConfig.allMetrics, value.metric) 94 } 95 96 } 97 98 // initCounterMetricMap initializes the simpleCounterMetricMap for the metricsExporter object 99 func initCounterMetricMap() map[metricName]*SimpleCounterMetric { 100 return map[metricName]*SimpleCounterMetric{ 101 AppconfigReconcileCounter: { 102 metric: prometheus.NewCounter(prometheus.CounterOpts{ 103 Name: "vz_application_operator_appconfig_successful_reconcile_total", 104 Help: "Tracks how many times the appconfig reconcile process has been successful"}), 105 }, 106 AppconfigReconcileError: { 107 metric: prometheus.NewCounter(prometheus.CounterOpts{ 108 Name: "vz_application_operator_appconfig_error_reconcile_total", 109 Help: "Tracks how many times the appconfig reconcile process has failed"}), 110 }, 111 CohworkloadReconcileCounter: { 112 metric: prometheus.NewCounter(prometheus.CounterOpts{ 113 Name: "vz_application_operator_cohworkload_successful_reconcile_total", 114 Help: "Tracks how many times the cohworkload reconcile process has been successful"}), 115 }, 116 CohworkloadReconcileError: { 117 metric: prometheus.NewCounter(prometheus.CounterOpts{ 118 Name: "vz_application_operator_cohworkload_error_reconcile_total", 119 Help: "Tracks how many times the cohworkload reconcile process has failed"}), 120 }, 121 HelidonReconcileCounter: { 122 metric: prometheus.NewCounter(prometheus.CounterOpts{ 123 Name: "vz_application_operator_helidonworkload_successful_reconcile_total", 124 Help: "Tracks how many times the helidonworkload reconcile process has been successful"}), 125 }, 126 HelidonReconcileError: { 127 metric: prometheus.NewCounter(prometheus.CounterOpts{ 128 Name: "vz_application_operator_helidonworkload_error_reconcile_total", 129 Help: "Tracks how many times the helidonworkload reconcile process has failed"}), 130 }, 131 IngresstraitReconcileCounter: { 132 metric: prometheus.NewCounter(prometheus.CounterOpts{ 133 Name: "vz_application_operator_ingresstrait_successful_reconcile_total", 134 Help: "Tracks how many times the ingresstrait reconcile process has been successful"}), 135 }, 136 IngresstraitReconcileError: { 137 metric: prometheus.NewCounter(prometheus.CounterOpts{ 138 Name: "vz_application_operator_ingresstrait_error_reconcile_total", 139 Help: "Tracks how many times the ingresstrait reconcile process has failed"}), 140 }, 141 AppconfigHandleCounter: { 142 metric: prometheus.NewCounter(prometheus.CounterOpts{ 143 Name: "vz_application_operator_appconfig_handle_total", 144 Help: "Tracks how many times appconfig handle process has been successful"}), 145 }, 146 AppconfigHandleError: { 147 metric: prometheus.NewCounter(prometheus.CounterOpts{ 148 Name: "vz_application_operator_appconfig_error_handle_total", 149 Help: "Tracks how many times appconfig handle process has failed"}), 150 }, 151 IstioHandleCounter: { 152 metric: prometheus.NewCounter(prometheus.CounterOpts{ 153 Name: "vz_application_operator_istio_handle_total", 154 Help: "Tracks how many times istio handle process has been successful"}), 155 }, 156 IstioHandleError: { 157 metric: prometheus.NewCounter(prometheus.CounterOpts{ 158 Name: "vz_application_operator_istio_error_handle_total", 159 Help: "Tracks how many times istio handle process has failed"}), 160 }, 161 LabelerPodHandleCounter: { 162 metric: prometheus.NewCounter(prometheus.CounterOpts{ 163 Name: "vz_application_operator_labelerPod_handle_total", 164 Help: "Tracks how many times the labeler pod handle process has been successful"}), 165 }, 166 LabelerPodHandleError: { 167 metric: prometheus.NewCounter(prometheus.CounterOpts{ 168 Name: "vz_application_operator_labelerpod_error_handle_total", 169 Help: "Tracks how many times the labeler pod handle process has failed"}), 170 }, 171 BindingUpdaterHandleCounter: { 172 metric: prometheus.NewCounter(prometheus.CounterOpts{ 173 Name: "vz_application_operator_bindingupdater_handle_total", 174 Help: "Tracks how many times the binding updater handle process has been successful"}), 175 }, 176 BindingUpdaterHandleError: { 177 metric: prometheus.NewCounter(prometheus.CounterOpts{ 178 Name: "vz_application_operator_bindingupdater_error_handle_total", 179 Help: "Tracks how many times the binding updater handle process has failed"}), 180 }, 181 MultiClusterAppconfigPodHandleCounter: { 182 metric: prometheus.NewCounter(prometheus.CounterOpts{ 183 Name: "vz_application_operator_multiclusterappconfig_handle_total", 184 Help: "Tracks how many times the multicluster appconfig pod handle process has been successful"}), 185 }, 186 MultiClusterAppconfigPodHandleError: { 187 metric: prometheus.NewCounter(prometheus.CounterOpts{ 188 Name: "vz_application_operator_multiclusterappconfig_error_handle_total", 189 Help: "Tracks how many times the multicluster appconfig pod handle process has failed"}), 190 }, 191 MultiClusterCompHandleCounter: { 192 metric: prometheus.NewCounter(prometheus.CounterOpts{ 193 Name: "vz_application_operator_multiclustercomp_handle_total", 194 Help: "Tracks how many times the multicluster component handle process has been successful"}), 195 }, 196 MultiClusterCompHandleError: { 197 metric: prometheus.NewCounter(prometheus.CounterOpts{ 198 Name: "vz_application_operator_multiclustercomp_error_handle_total", 199 Help: "Tracks how many times the multicluster component handle process has failed"}), 200 }, 201 MultiClusterConfigmapHandleCounter: { 202 metric: prometheus.NewCounter(prometheus.CounterOpts{ 203 Name: "vz_application_operator_multiclustercomp_handle_total", 204 Help: "Tracks how many times the multicluster configmap handle process has been successful"}), 205 }, 206 MultiClusterConfigmapHandleError: { 207 metric: prometheus.NewCounter(prometheus.CounterOpts{ 208 Name: "vz_application_operator_multiclustercomp_error_handle_total", 209 Help: "Tracks how many times the multicluster configmap handle process has failed"}), 210 }, 211 MultiClusterSecretHandleCounter: { 212 metric: prometheus.NewCounter(prometheus.CounterOpts{ 213 Name: "vz_application_operator_multiclustersecret_handle_total", 214 Help: "Tracks how many times the multicluster secret handle process has been successful"}), 215 }, 216 MultiClusterSecretHandleError: { 217 metric: prometheus.NewCounter(prometheus.CounterOpts{ 218 Name: "vz_application_operator_multiclustersecret_error_handle_total", 219 Help: "Tracks how many times the multicluster secret handle process has failed"}), 220 }, 221 VzProjHandleCounter: { 222 metric: prometheus.NewCounter(prometheus.CounterOpts{ 223 Name: "vz_application_operator_vzproj_handle_total", 224 Help: "Tracks how many times the vz project handle process has been successful"}), 225 }, 226 VzProjHandleError: { 227 metric: prometheus.NewCounter(prometheus.CounterOpts{ 228 Name: "vz_application_operator_vzproj_error_handle_total", 229 Help: "Tracks how many times the vz project handle process has failed"}), 230 }, 231 } 232 } 233 234 // initDurationMetricMap initializes the DurationMetricMap for the metricsExporter object 235 func initDurationMetricMap() map[metricName]*DurationMetrics { 236 return map[metricName]*DurationMetrics{ 237 AppconfigReconcileDuration: { 238 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 239 Name: "vz_application_operator_appconfig_reconcile_duration", 240 Help: "The duration in seconds of vao appconfig reconcile process", 241 }), 242 }, 243 CohworkloadReconcileDuration: { 244 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 245 Name: "vz_application_operator_cohworkload_reconcile_duration", 246 Help: "The duration in seconds of vao coherence workload reconcile process", 247 }), 248 }, 249 HelidonReconcileDuration: { 250 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 251 Name: "vz_application_operator_helidon_reconcile_duration", 252 Help: "The duration in seconds of vao helidon reconcile process", 253 }), 254 }, 255 IngresstraitReconcileDuration: { 256 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 257 Name: "vz_application_operator_ingresstrait_reconcile_duration", 258 Help: "The duration in seconds of vao ingresstrait reconcile process", 259 }), 260 }, 261 AppconfigHandleDuration: { 262 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 263 Name: "vz_application_operator_appconfig_handle_duration", 264 Help: "The duration in seconds of vao appconfig handle process", 265 }), 266 }, 267 IstioHandleDuration: { 268 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 269 Name: "vz_application_operator_istio_handle_duration", 270 Help: "The duration in seconds of vao istio handle process", 271 }), 272 }, 273 LabelerPodHandleDuration: { 274 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 275 Name: "vz_application_operator_labelerpod_handle_duration", 276 Help: "The duration in seconds of vao labeler pod handle process", 277 }), 278 }, 279 MultiClusterConfigmapHandleDuration: { 280 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 281 Name: "vz_application_operator_multiclusterconfigmap_handle_duration", 282 Help: "The duration in seconds of vao multicluster configmap handle process", 283 }), 284 }, 285 MultiClusterAppconfigPodHandleDuration: { 286 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 287 Name: "vz_application_operator_multiclusterappconfig_handle_duration", 288 Help: "The duration in seconds of vao multicluster appconfig process", 289 }), 290 }, 291 MultiClusterCompHandleDuration: { 292 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 293 Name: "vz_application_operator_multiclustercomp_handle_duration", 294 Help: "The duration in seconds of vao multicluster component handle process", 295 }), 296 }, 297 MultiClusterSecretHandleDuration: { 298 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 299 Name: "vz_application_operator_multiclustersecret_handle_duration", 300 Help: "The duration in seconds of vao multicluster secret handle process", 301 }), 302 }, 303 VzProjHandleDuration: { 304 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 305 Name: "vz_application_operator_vzproj_handle_duration", 306 Help: "The duration in seconds of vao vz project handle process", 307 }), 308 }, 309 BindingUpdaterHandleDuration: { 310 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 311 Name: "vz_application_operator_bindingupdater_handle_duration", 312 Help: "The duration in seconds of vao binding updater handle process", 313 }), 314 }, 315 } 316 } 317 318 // registerMetricsHandlersHelper is a helper function that assists in registering metrics 319 func registerMetricsHandlersHelper() error { 320 var errorObserved error 321 for metric := range MetricsExp.internalConfig.failedMetrics { 322 err := MetricsExp.internalConfig.registry.Register(metric) 323 if err != nil { 324 if errorObserved != nil { 325 errorObserved = errors.Wrap(errorObserved, err.Error()) 326 } else { 327 errorObserved = err 328 } 329 } else { 330 // If a metric is registered, delete it from the failed metrics map so that it is not retried 331 delete(MetricsExp.internalConfig.failedMetrics, metric) 332 } 333 } 334 return errorObserved 335 } 336 337 // registerMetricsHandlers registers the metrics and provides error handling 338 func registerMetricsHandlers(log *zap.SugaredLogger) { 339 // Get list of metrics to register initially 340 initializeFailedMetricsArray() 341 // Loop until there is no error in registering 342 for err := registerMetricsHandlersHelper(); err != nil; err = registerMetricsHandlersHelper() { 343 log.Infof("Failed to register metrics for VMI %v", err) 344 time.Sleep(time.Second) 345 } 346 } 347 348 // initializeFailedMetricsArray initializes the failedMetrics array 349 func initializeFailedMetricsArray() { 350 for i, metric := range MetricsExp.internalConfig.allMetrics { 351 MetricsExp.internalConfig.failedMetrics[metric] = i 352 } 353 } 354 355 // StartMetricsServer starts the metric server to begin emitting metrics to Prometheus 356 func StartMetricsServer() error { 357 vlog, err := vzlog.EnsureResourceLogger(&vzlog.ResourceConfig{ 358 Name: "", 359 Namespace: "", 360 ID: "", 361 Generation: 0, 362 ControllerName: "metricsexporter", 363 }) 364 if err != nil { 365 return err 366 } 367 go wait.Until(func() { 368 http.Handle("/metrics", promhttp.Handler()) 369 server := &http.Server{ 370 Addr: ":9100", 371 ReadHeaderTimeout: 3 * time.Second, 372 } 373 err := server.ListenAndServe() 374 if err != nil { 375 vlog.Oncef("Failed to start metrics server for VMI: %v", err) 376 } 377 }, time.Second*3, wait.NeverStop) 378 return nil 379 } 380 381 // initConfiguration returns an empty struct of type configuration 382 func initConfiguration() configuration { 383 return configuration{ 384 allMetrics: []prometheus.Collector{}, 385 failedMetrics: map[prometheus.Collector]int{}, 386 registry: prometheus.DefaultRegisterer, 387 } 388 } 389 390 // GetSimpleCounterMetric returns a simpleCounterMetric from the simpleCounterMetricMap given a metricName 391 func GetSimpleCounterMetric(name metricName) (*SimpleCounterMetric, error) { 392 counterMetric, ok := MetricsExp.internalData.simpleCounterMetricMap[name] 393 if !ok { 394 return nil, fmt.Errorf("%v not found in SimpleCounterMetricMap due to metricName being defined, but not being a key in the map", name) 395 } 396 return counterMetric, nil 397 } 398 399 // GetDurationMetric returns a durationMetric from the durationMetricMap given a metricName 400 func GetDurationMetric(name metricName) (*DurationMetrics, error) { 401 durationMetric, ok := MetricsExp.internalData.durationMetricMap[name] 402 if !ok { 403 return nil, fmt.Errorf("%v not found in durationMetricMap due to metricName being defined, but not being a key in the map", name) 404 } 405 return durationMetric, nil 406 } 407 func ExposeControllerMetrics(controllerName string, successname metricName, errorname metricName, durationname metricName) (*SimpleCounterMetric, *SimpleCounterMetric, *DurationMetrics, *zap.SugaredLogger, error) { 408 zapLogForMetrics := zap.S().With(vzlogInit.FieldController, controllerName) 409 counterMetricObject, err := GetSimpleCounterMetric(successname) 410 if err != nil { 411 zapLogForMetrics.Error(err) 412 return nil, nil, nil, nil, err 413 } 414 errorCounterMetricObject, err := GetSimpleCounterMetric(errorname) 415 if err != nil { 416 zapLogForMetrics.Error(err) 417 return nil, nil, nil, nil, err 418 } 419 420 durationMetricObject, err := GetDurationMetric(durationname) 421 if err != nil { 422 zapLogForMetrics.Error(err) 423 return nil, nil, nil, nil, err 424 } 425 return counterMetricObject, errorCounterMetricObject, durationMetricObject, zapLogForMetrics, nil 426 }