github.com/verrazzano/verrazzano@v1.7.0/platform-operator/metricsexporter/metricsexporter_utils.go (about) 1 // Copyright (c) 2022, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package metricsexporter 5 6 import ( 7 "fmt" 8 "net/http" 9 "time" 10 11 "github.com/pkg/errors" 12 "github.com/prometheus/client_golang/prometheus" 13 "github.com/prometheus/client_golang/prometheus/promhttp" 14 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 15 vzapi "github.com/verrazzano/verrazzano/platform-operator/apis/verrazzano/v1alpha1" 16 "github.com/verrazzano/verrazzano/platform-operator/constants" 17 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/grafanadashboards" 18 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/networkpolicies" 19 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/registry" 20 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/vmo" 21 "go.uber.org/zap" 22 "k8s.io/apimachinery/pkg/util/wait" 23 ) 24 25 var MetricsExp MetricsExporter 26 27 type metricName string 28 29 const ( 30 component = "component" 31 ReconcileCounter metricName = "reconcile counter" 32 ReconcileError metricName = "reconcile error" 33 ReconcileDuration metricName = "reconcile duration" 34 AvailableComponents metricName = "available components" 35 EnabledComponents metricName = "enabled components" 36 ) 37 38 // Init cannot be called until the NGINX namespace is determined at startup 39 func Init() { 40 RequiredInitialization() 41 RegisterMetrics(zap.S()) 42 } 43 44 // This function initializes the metrics object, but does not register the metrics 45 func RequiredInitialization() { 46 MetricsExp = MetricsExporter{ 47 internalConfig: initConfiguration(), 48 internalData: data{ 49 simpleCounterMetricMap: initSimpleCounterMetricMap(), 50 simpleGaugeMetricMap: initSimpleGaugeMetricMap(), 51 durationMetricMap: initDurationMetricMap(), 52 componentHealth: initComponentHealthMetrics(), 53 componentInstallDuration: initComponentInstallDurationMetrics(), 54 componentUpgradeDuration: initComponentUpgradeDurationMetrics(), 55 }, 56 } 57 // initialize component availability metric to false 58 for _, component := range registry.GetComponents() { 59 if IsNonMetricComponent(component.Name()) { 60 continue 61 } 62 MetricsExp.internalData.componentHealth.SetComponentHealth(component.GetJSONName(), false, false) 63 SetComponentInstallDurationMetric(component.GetJSONName(), 0) 64 SetComponentUpgradeDurationMetric(component.GetJSONName(), 0) 65 66 } 67 68 } 69 70 // This function begins the process of registering metrics 71 func RegisterMetrics(log *zap.SugaredLogger) { 72 InitializeAllMetricsArray() 73 go registerMetricsHandlers(log) 74 } 75 76 // This function initializes the simpleCounterMetricMap for the metricsExporter object 77 func initSimpleCounterMetricMap() map[metricName]*SimpleCounterMetric { 78 return map[metricName]*SimpleCounterMetric{ 79 ReconcileCounter: { 80 prometheus.NewCounter(prometheus.CounterOpts{ 81 Name: "vz_platform_operator_reconcile_total", 82 Help: "The number of times the reconcile function has been called in the verrazzano-platform-operator", 83 }), 84 }, 85 ReconcileError: { 86 prometheus.NewCounter(prometheus.CounterOpts{ 87 Name: "vz_platform_operator_error_reconcile_total", 88 Help: "The number of times the reconcile function has returned an error in the verrazzano-platform-operator", 89 }), 90 }, 91 } 92 } 93 94 func initComponentHealthMetrics() *ComponentHealth { 95 return &ComponentHealth{ 96 available: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 97 Name: "vz_platform_operator_component_health", 98 Help: "Is component enabled and available", 99 }, []string{component}), 100 } 101 } 102 103 func initComponentInstallDurationMetrics() *ComponentInstallDuration { 104 return &ComponentInstallDuration{ 105 installDuration: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 106 Name: "vz_platform_operator_component_install_duration_seconds", 107 Help: "The duration of the latest installation of each component in seconds", 108 }, []string{component}), 109 } 110 } 111 112 func initComponentUpgradeDurationMetrics() *ComponentUpgradeDuration { 113 return &ComponentUpgradeDuration{ 114 upgradeDuration: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 115 Name: "vz_platform_operator_component_upgrade_duration_seconds", 116 Help: "The duration of the latest upgrade of each component in seconds", 117 }, []string{component}), 118 } 119 } 120 121 // This function initializes the simpleGaugeMetricMap for the metricsExporter object 122 func initSimpleGaugeMetricMap() map[metricName]*SimpleGaugeMetric { 123 return map[metricName]*SimpleGaugeMetric{ 124 AvailableComponents: { 125 metric: prometheus.NewGauge(prometheus.GaugeOpts{ 126 Name: "vz_platform_operator_component_health_total", 127 Help: "The number of currently available Verrazzano components", 128 }), 129 }, 130 EnabledComponents: { 131 metric: prometheus.NewGauge(prometheus.GaugeOpts{ 132 Name: "vz_platform_operator_component_enabled_total", 133 Help: "The number of currently enabled Verrazzano components", 134 }), 135 }, 136 } 137 } 138 139 // This function initializes the durationMetricMap for the metricsExporter object 140 func initDurationMetricMap() map[metricName]*DurationMetric { 141 return map[metricName]*DurationMetric{ 142 ReconcileDuration: { 143 metric: prometheus.NewSummary(prometheus.SummaryOpts{ 144 Name: "vz_platform_operator_reconcile_duration", 145 Help: "The duration in seconds of vpo reconcile process", 146 }), 147 }, 148 } 149 } 150 151 // This function is used to determine whether a durationTime for a component metric should be set and what the duration time is 152 // If the start time is greater than the completion time, the metric will not be set 153 // After this check, the function calculates the duration time and tries to set the metric of the component 154 // If the component's name is not in the metric map, an error will be raised to prevent a seg fault 155 func metricParserHelperFunction(log vzlog.VerrazzanoLogger, componentName string, startTime string, completionTime string, typeofOperation string) { 156 startInSeconds, err := time.Parse(time.RFC3339, startTime) 157 if err != nil { 158 log.Errorf("Error in parsing start time %s for operation %s for component %s", startTime, typeofOperation, componentName) 159 return 160 } 161 startInSecondsUnix := startInSeconds.Unix() 162 completionInSeconds, err := time.Parse(time.RFC3339, completionTime) 163 if err != nil { 164 log.Errorf("Error in parsing completion time %s for operation %s for component %s", completionTime, typeofOperation, componentName) 165 return 166 } 167 completionInSecondsUnix := completionInSeconds.Unix() 168 if startInSecondsUnix >= completionInSecondsUnix { 169 log.Debug("Component %s is not updated, as there is an ongoing operation in progress") 170 return 171 } 172 totalDuration := (completionInSecondsUnix - startInSecondsUnix) 173 if typeofOperation == constants.InstallOperation { 174 err := SetComponentInstallDurationMetric(componentName, totalDuration) 175 if err != nil { 176 log.Errorf(err.Error()) 177 return 178 } 179 } 180 if typeofOperation == constants.UpgradeOperation { 181 err := SetComponentUpgradeDurationMetric(componentName, totalDuration) 182 if err != nil { 183 log.Errorf(err.Error()) 184 return 185 } 186 } 187 } 188 189 func SetComponentInstallDurationMetric(JSONName string, totalDuration int64) error { 190 metric, err := MetricsExp.internalData.componentInstallDuration.installDuration.GetMetricWithLabelValues(JSONName) 191 if err != nil { 192 return err 193 } 194 metric.Set(float64(totalDuration)) 195 return nil 196 } 197 198 func SetComponentUpgradeDurationMetric(JSONName string, totalDuration int64) error { 199 metric, err := MetricsExp.internalData.componentUpgradeDuration.upgradeDuration.GetMetricWithLabelValues(JSONName) 200 if err != nil { 201 return err 202 } 203 metric.Set(float64(totalDuration)) 204 return nil 205 } 206 207 // This function is a helper function that assists in registering metrics 208 func registerMetricsHandlersHelper() error { 209 var errorObserved error 210 for metric := range MetricsExp.internalConfig.failedMetrics { 211 err := MetricsExp.internalConfig.registry.Register(metric) 212 if err != nil { 213 if errorObserved != nil { 214 errorObserved = errors.Wrap(errorObserved, err.Error()) 215 } else { 216 errorObserved = err 217 } 218 } else { 219 // if a metric is registered, delete it from the failed metrics map so that it is not retried 220 delete(MetricsExp.internalConfig.failedMetrics, metric) 221 } 222 } 223 return errorObserved 224 } 225 226 // This function registers the metrics and provides error handling 227 func registerMetricsHandlers(log *zap.SugaredLogger) { 228 initializeFailedMetricsArray() // Get list of metrics to register initially 229 // loop until there is no error in registering 230 for err := registerMetricsHandlersHelper(); err != nil; err = registerMetricsHandlersHelper() { 231 log.Errorf("Failed to register metrics for VPO %v \n", err) 232 time.Sleep(time.Second) 233 } 234 // register component health metrics vector 235 MetricsExp.internalConfig.registry.MustRegister(MetricsExp.internalData.componentHealth.available) 236 MetricsExp.internalConfig.registry.MustRegister(MetricsExp.internalData.componentInstallDuration.installDuration) 237 MetricsExp.internalConfig.registry.MustRegister(MetricsExp.internalData.componentUpgradeDuration.upgradeDuration) 238 } 239 240 // This function initializes the failedMetrics array 241 func initializeFailedMetricsArray() { 242 for i, metric := range MetricsExp.internalConfig.allMetrics { 243 MetricsExp.internalConfig.failedMetrics[metric] = i 244 } 245 } 246 247 // This function starts the metric server to begin emitting metrics to Prometheus 248 func StartMetricsServer(log *zap.SugaredLogger) { 249 go wait.Until(func() { 250 http.Handle("/metrics", promhttp.Handler()) 251 server := &http.Server{ 252 Addr: ":9100", 253 ReadHeaderTimeout: 3 * time.Second, 254 } 255 if err := server.ListenAndServe(); err != nil { 256 log.Errorf("Failed to start metrics server for verrazzano-platform-operator: %v", err) 257 } 258 }, time.Second*3, wait.NeverStop) 259 } 260 261 // This functionn parses the VZ CR and extracts the install and update data for each component 262 func AnalyzeVerrazzanoResourceMetrics(log vzlog.VerrazzanoLogger, cr vzapi.Verrazzano) { 263 mapOfComponents := cr.Status.Components 264 for componentName, componentStatusDetails := range mapOfComponents { 265 // If component is not in the metricsMap, move on to the next component 266 if IsNonMetricComponent(componentName) { 267 continue 268 } 269 var installCompletionTime string 270 var upgradeCompletionTime string 271 var upgradeStartTime string 272 var installStartTime string 273 for _, status := range componentStatusDetails.Conditions { 274 if status.Type == vzapi.CondInstallStarted { 275 installStartTime = status.LastTransitionTime 276 } 277 if status.Type == vzapi.CondInstallComplete { 278 installCompletionTime = status.LastTransitionTime 279 } 280 if status.Type == vzapi.CondUpgradeStarted { 281 upgradeStartTime = status.LastTransitionTime 282 } 283 if status.Type == vzapi.CondUpgradeComplete { 284 upgradeCompletionTime = status.LastTransitionTime 285 } 286 } 287 found, component := registry.FindComponent(componentName) 288 if !found { 289 continue 290 } 291 componentJSONName := component.GetJSONName() 292 if installStartTime != "" && installCompletionTime != "" { 293 metricParserHelperFunction(log, componentJSONName, installStartTime, installCompletionTime, constants.InstallOperation) 294 } 295 if upgradeStartTime != "" && upgradeCompletionTime != "" { 296 metricParserHelperFunction(log, componentJSONName, upgradeStartTime, upgradeCompletionTime, constants.UpgradeOperation) 297 } 298 } 299 } 300 301 // This function initializes the allMetrics array 302 func InitializeAllMetricsArray() { 303 // loop through all metrics declarations in metric maps 304 for _, value := range MetricsExp.internalData.simpleCounterMetricMap { 305 MetricsExp.internalConfig.allMetrics = append(MetricsExp.internalConfig.allMetrics, value.metric) 306 } 307 for _, value := range MetricsExp.internalData.durationMetricMap { 308 MetricsExp.internalConfig.allMetrics = append(MetricsExp.internalConfig.allMetrics, value.metric) 309 } 310 for _, value := range MetricsExp.internalData.simpleGaugeMetricMap { 311 MetricsExp.internalConfig.allMetrics = append(MetricsExp.internalConfig.allMetrics, value.metric) 312 } 313 } 314 315 // This function returns an empty struct of type configuration 316 func initConfiguration() configuration { 317 return configuration{ 318 allMetrics: []prometheus.Collector{}, 319 failedMetrics: map[prometheus.Collector]int{}, 320 registry: prometheus.DefaultRegisterer, 321 } 322 } 323 324 // This function returns a simpleCounterMetric from the simpleCounterMetricMap given a metricName 325 func GetSimpleCounterMetric(name metricName) (*SimpleCounterMetric, error) { 326 counterMetric, ok := MetricsExp.internalData.simpleCounterMetricMap[name] 327 if !ok { 328 return nil, fmt.Errorf("%v not found in SimpleCounterMetricMap due to metricName being defined, but not being a key in the map", name) 329 } 330 return counterMetric, nil 331 } 332 333 // This function returns a durationMetric from the durationMetricMap given a metricName 334 func GetDurationMetric(name metricName) (*DurationMetric, error) { 335 durationMetric, ok := MetricsExp.internalData.durationMetricMap[name] 336 if !ok { 337 return nil, fmt.Errorf("%v not found in durationMetricMap due to metricName being defined, but not being a key in the map", name) 338 } 339 return durationMetric, nil 340 } 341 342 // This function returns a simpleGaugeMetric from the simpleGaugeMetricMap given a metricName 343 func GetSimpleGaugeMetric(name metricName) (*SimpleGaugeMetric, error) { 344 gaugeMetric, ok := MetricsExp.internalData.simpleGaugeMetricMap[name] 345 if !ok { 346 return nil, fmt.Errorf("%v not found in SimpleGaugeMetricMap due to metricName being defined, but not being a key in the map", name) 347 } 348 return gaugeMetric, nil 349 } 350 351 // SetComponentAvailabilityMetric updates the components availability status metric 352 func SetComponentAvailabilityMetric(JSONname string, availability vzapi.ComponentAvailability, isEnabled bool) error { 353 _, err := MetricsExp.internalData.componentHealth.SetComponentHealth(JSONname, availability == vzapi.ComponentAvailable, isEnabled) 354 if err != nil { 355 return err 356 } 357 return nil 358 } 359 360 func IsNonMetricComponent(componentName string) bool { 361 var nonMetricComponents = map[string]bool{ 362 vmo.ComponentName: true, 363 networkpolicies.ComponentName: true, 364 grafanadashboards.ComponentName: true, 365 } 366 return nonMetricComponents[componentName] 367 }