github.com/verrazzano/verrazzano@v1.7.1/application-operator/controllers/metricsbinding/metricsbinding_update.go (about) 1 // Copyright (c) 2022, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package metricsbinding 5 6 import ( 7 "context" 8 "fmt" 9 "time" 10 11 "github.com/Jeffail/gabs/v2" 12 promoperapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 13 vzapi "github.com/verrazzano/verrazzano/application-operator/apis/app/v1alpha1" 14 "github.com/verrazzano/verrazzano/application-operator/constants" 15 "github.com/verrazzano/verrazzano/application-operator/controllers/clusters" 16 vztemplate "github.com/verrazzano/verrazzano/application-operator/controllers/template" 17 "github.com/verrazzano/verrazzano/application-operator/internal/metrics" 18 vzconst "github.com/verrazzano/verrazzano/pkg/constants" 19 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 20 "github.com/verrazzano/verrazzano/pkg/metricsutils" 21 k8scorev1 "k8s.io/api/core/v1" 22 k8smetav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 24 k8sruntime "k8s.io/apimachinery/pkg/runtime" 25 "k8s.io/apimachinery/pkg/types" 26 "k8s.io/apimachinery/pkg/util/rand" 27 k8scontroller "sigs.k8s.io/controller-runtime" 28 k8sclient "sigs.k8s.io/controller-runtime/pkg/client" 29 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 30 "sigs.k8s.io/controller-runtime/pkg/reconcile" 31 "sigs.k8s.io/yaml" 32 ) 33 34 // reconcileBindingCreateOrUpdate completes the reconcile process for an object that is being created or updated 35 func (r *Reconciler) reconcileBindingCreateOrUpdate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (k8scontroller.Result, error) { 36 log.Debugw("Reconcile for created or updated object", "resource", metricsBinding.GetName()) 37 38 // Requeue with a delay to account for situations where the scrape config 39 // has changed but without the MetricsBinding changing. 40 var seconds = rand.IntnRange(45, 90) 41 var requeueDuration = time.Duration(seconds) * time.Second 42 43 // Handle the case where the workload uses the default metrics template - in this case, we will 44 // delete the metrics binding if processing succeeds, since this is a one-time conversion of 45 // legacy apps using default metrics template, to ServiceMonitor. If it's not using VMI config map, 46 // we treat it like custom metrics setup 47 if isLegacyDefaultMetricsBinding(metricsBinding) { 48 log.Debug("Legacy default MetricsBinding found, creating a Service Monitor and deleting the MetricsBinding") 49 if err := r.handleDefaultMetricsTemplate(ctx, metricsBinding, log); err != nil { 50 return k8scontroller.Result{Requeue: true}, err 51 } 52 log.Infof("Deleting legacy default MetricsBinding %s/%s", metricsBinding.Namespace, metricsBinding.Name) 53 if err := r.deleteMetricsBinding(metricsBinding, log); err != nil { 54 return k8scontroller.Result{Requeue: true}, err 55 } 56 // Requeue with a delay to account for situations where the scrape config 57 // has changed but without the MetricsBinding changing. 58 return reconcile.Result{Requeue: true, RequeueAfter: requeueDuration}, nil 59 } 60 61 // Update the MetricsBinding to add workload as owner ref 62 _, err := controllerutil.CreateOrUpdate(ctx, r.Client, metricsBinding, func() error { 63 return r.updateMetricsBinding(metricsBinding, log) 64 }) 65 if err != nil { 66 return k8scontroller.Result{Requeue: true}, err 67 } 68 69 // Handle the case where the workload uses a custom metrics template 70 if err = r.handleCustomMetricsTemplate(ctx, metricsBinding, log); err != nil { 71 return k8scontroller.Result{Requeue: true}, err 72 } 73 74 // Requeue with a delay to account for situations where the scrape config 75 // has changed but without the MetricsBinding changing. 76 return reconcile.Result{Requeue: true, RequeueAfter: requeueDuration}, nil 77 } 78 79 // handleDefaultMetricsTemplate handles pre-Verrazzano 1.4 metrics bindings that use the default 80 // metrics template, by creating/updating a service monitor that does the same work as the default template 81 func (r *Reconciler) handleDefaultMetricsTemplate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error { 82 log.Infof("Default metrics template used by metrics binding %s/%s, creating service monitor", metricsBinding.Namespace, metricsBinding.Name) 83 84 // Create the Service monitor from information gathered from the Metrics Binding 85 scrapeInfo, err := r.createScrapeInfo(ctx, metricsBinding, log) 86 if err != nil { 87 return err 88 } 89 serviceMonitor := promoperapi.ServiceMonitor{} 90 serviceMonitor.SetName(metricsBinding.Name) 91 serviceMonitor.SetNamespace(metricsBinding.Namespace) 92 _, err = controllerutil.CreateOrUpdate(ctx, r.Client, &serviceMonitor, func() error { 93 return metrics.PopulateServiceMonitor(scrapeInfo, &serviceMonitor, log) 94 }) 95 if err != nil { 96 return log.ErrorfNewErr("Failed to create or update the service monitor for the Metrics Binding %s/%s: %v", metricsBinding.Namespace, metricsBinding.Name, err) 97 } 98 return nil 99 } 100 101 // handleCustomMetricsTemplate handles pre-Verrazzano 1.4 metrics bindings that use a custom 102 // metrics template, by updating the additionalScrapeConfigs secret for the Prometheus CR to collect 103 // metrics as specified by the custom template. 104 func (r *Reconciler) handleCustomMetricsTemplate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error { 105 log.Debugf("Custom metrics template used by metrics binding %s/%s, edit additionalScrapeConfigs", metricsBinding.Namespace, metricsBinding.Name) 106 107 var workloadNamespaceUnstructured *unstructured.Unstructured 108 var err error 109 // Get the Namespace of the Metrics Binding as an unstructured resource so that it can be applied 110 // to the template 111 if workloadNamespaceUnstructured, err = r.createWorkloadNamespaceUnstructured(metricsBinding, log); err != nil { 112 return err 113 } 114 115 // Get the workload object, so that it can be applied to the template 116 var workloadObject *unstructured.Unstructured 117 if workloadObject, err = r.getWorkloadObject(metricsBinding); err != nil { 118 return log.ErrorfNewErr("Failed to get the workload object for metrics binding %s: %v", metricsBinding.GetName(), err) 119 } 120 121 createdJobName := createJobName(metricsBinding) 122 scrapeConfigString, err := r.createScrapeConfigForMetricsBinding(metricsBinding, workloadObject, workloadNamespaceUnstructured, createdJobName, log) 123 if err != nil { 124 return err 125 } 126 127 // Format scrape config into readable container 128 var configYaml []byte 129 if configYaml, err = yaml.YAMLToJSON([]byte(scrapeConfigString)); err != nil { 130 return log.ErrorfNewErr("Failed to convert scrape config YAML to JSON: %v", err) 131 } 132 133 var newScrapeConfig *gabs.Container 134 if newScrapeConfig, err = gabs.ParseJSON(configYaml); err != nil { 135 return log.ErrorfNewErr("Failed to convert scrape config JSON to container: %v", err) 136 } 137 138 // Collect the data from the ConfigMap or the Secret 139 configMapExists, err := r.updateScrapeConfigInConfigMap(ctx, metricsBinding, createdJobName, newScrapeConfig, log) 140 if !configMapExists { 141 _, err = r.updateScrapeConfigInConfigSecret(ctx, metricsBinding, createdJobName, newScrapeConfig, log) 142 } 143 144 return err 145 } 146 147 // updateScrapeConfigInConfigSecret updates the scrape config in the PrometheusConfigSecret if one 148 // is specified in the metrics binding. Returns true if there is a config secret, and any error that occurred 149 func (r *Reconciler) updateScrapeConfigInConfigSecret(ctx context.Context, metricsBinding *vzapi.MetricsBinding, 150 createdJobName string, newScrapeConfig *gabs.Container, log vzlog.VerrazzanoLogger) (bool, error) { 151 secret, key := getPromConfigSecret(metricsBinding) 152 if secret == nil { 153 return false, nil 154 } 155 log.Debugf("Secret %s/%s found in the MetricsBinding, attempting scrape config update", secret.GetNamespace(), secret.GetName()) 156 _, err := controllerutil.CreateOrUpdate(ctx, r.Client, secret, func() error { 157 var err error 158 var data *gabs.Container 159 if data, err = getConfigDataFromSecret(secret, key); err != nil { 160 return log.ErrorfNewErr("Failed to get the Secret data: %v", err) 161 } 162 var promConfig *gabs.Container 163 if promConfig, err = metricsutils.EditScrapeJob(data, createdJobName, newScrapeConfig); err != nil { 164 return log.ErrorfNewErr("Failed to edit the scrape job: %v", err) 165 } 166 var newPromConfigData []byte 167 if newPromConfigData, err = yaml.JSONToYAML(promConfig.Bytes()); err != nil { 168 return log.ErrorfNewErr("Failed to convert scrape config JSON to YAML: %v", err) 169 } 170 secret.Data[key] = newPromConfigData 171 return nil 172 }) 173 return true, err 174 } 175 176 // updateScrapeConfigInConfigMap updates the scrape config in the Prometheus ConfigMap if one 177 // is specified in the metrics binding. Returns true if there is a config map, and any error that occurred 178 func (r *Reconciler) updateScrapeConfigInConfigMap(ctx context.Context, 179 metricsBinding *vzapi.MetricsBinding, jobName string, newScrapeConfig *gabs.Container, log vzlog.VerrazzanoLogger) (bool, error) { 180 var data *gabs.Container 181 configMap := getPromConfigMap(metricsBinding) 182 if configMap == nil { 183 return false, nil 184 } 185 log.Debugf("ConfigMap %s/%s found in the MetricsBinding, attempting scrape config update", configMap.GetNamespace(), configMap.GetName()) 186 _, err := controllerutil.CreateOrUpdate(ctx, r.Client, configMap, func() error { 187 var err error 188 if data, err = getConfigData(configMap); err != nil { 189 return log.ErrorfNewErr("Failed to get the ConfigMap data: %v", err) 190 } 191 if err = metricsutils.EditScrapeJobInPrometheusConfig(data, prometheusScrapeConfigsLabel, jobName, newScrapeConfig); err != nil { 192 return log.ErrorfNewErr("Failed to edit the scrape job: %v", err) 193 } 194 var newPromConfigData []byte 195 if newPromConfigData, err = yaml.JSONToYAML(data.Bytes()); err != nil { 196 return log.ErrorfNewErr("Failed to convert scrape config JSON to YAML: %v", err) 197 } 198 configMap.Data[prometheusConfigKey] = string(newPromConfigData) 199 return nil 200 }) 201 return true, err 202 } 203 204 func (r *Reconciler) createWorkloadNamespaceUnstructured(metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (*unstructured.Unstructured, error) { 205 workloadNamespace := k8scorev1.Namespace{} 206 log.Debugf("Getting the workload namespace %s from the MetricsBinding", metricsBinding.GetNamespace()) 207 err := r.Client.Get(context.TODO(), k8sclient.ObjectKey{Name: metricsBinding.GetNamespace()}, &workloadNamespace) 208 if err != nil { 209 return nil, log.ErrorfNewErr("Failed to get metrics binding namespace %s: %v", metricsBinding.GetName(), err) 210 } 211 212 // Create an unstructured resource from the Namespace, so it can be applied to the template 213 workloadNamespaceUnstructuredMap, err := k8sruntime.DefaultUnstructuredConverter.ToUnstructured(&workloadNamespace) 214 if err != nil { 215 return nil, log.ErrorfNewErr("Failed to get the unstructured for namespace %s: %v", workloadNamespace.GetName(), err) 216 } 217 return &unstructured.Unstructured{Object: workloadNamespaceUnstructuredMap}, nil 218 } 219 220 func (r *Reconciler) createScrapeInfo(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (metrics.ScrapeInfo, error) { 221 log.Debugf("Attempting to create the ServiceMonitor information from the MetricsBinding %s/%s", metricsBinding.Namespace, metricsBinding.Name) 222 var scrapeInfo metrics.ScrapeInfo 223 224 // Get the workload object from the Metrics Binding to populate the Service Monitor 225 workload := metricsBinding.Spec.Workload 226 workloadObject := unstructured.Unstructured{} 227 workloadObject.SetKind(workload.TypeMeta.Kind) 228 workloadObject.SetAPIVersion(workload.TypeMeta.APIVersion) 229 workloadName := types.NamespacedName{Namespace: metricsBinding.Namespace, Name: workload.Name} 230 log.Debugf("Getting the workload resource %s/%s from the MetricsBinding", workloadName.Namespace, workloadName.Name) 231 err := r.Client.Get(ctx, workloadName, &workloadObject) 232 if err != nil { 233 return scrapeInfo, log.ErrorfNewErr("Failed to get the workload %s from the MetricsBinding %s/%s: %v", workload.Name, metricsBinding.Namespace, metricsBinding.Name, err) 234 } 235 236 // Get the namespace for the Metrics Binding to check if Istio is enabled 237 workloadNamespace := k8scorev1.Namespace{} 238 log.Debugf("Getting the workload namespace %s from the MetricsBinding", metricsBinding.GetNamespace()) 239 err = r.Client.Get(context.TODO(), k8sclient.ObjectKey{Name: metricsBinding.GetNamespace()}, &workloadNamespace) 240 if err != nil { 241 return scrapeInfo, log.ErrorfNewErr("Failed to get MetricsBinding namespace %s: %v", metricsBinding.GetName(), err) 242 } 243 244 // Verify if Istio is enabled from the Namespace annotations 245 value, ok := workloadNamespace.Labels[constants.LabelIstioInjection] 246 istioEnabled := ok && value == "enabled" 247 scrapeInfo.IstioEnabled = &istioEnabled 248 249 // Match the Verrazzano workload application labels that get applied by the Metrics Binding labeler 250 value, ok = workloadObject.GetLabels()[constants.MetricsWorkloadLabel] 251 if !ok { 252 return scrapeInfo, log.ErrorfNewErr("Failed to find the annotation %s on the target workload", constants.MetricsWorkloadLabel) 253 } 254 scrapeInfo.KeepLabels = map[string]string{workloadSourceLabel: value} 255 256 // Add a port to the Service Monitor endpoints 257 scrapeInfo.Ports = 1 258 259 // Add the cluster name to the scrape info 260 scrapeInfo.ClusterName = clusters.GetClusterName(ctx, r.Client) 261 262 return scrapeInfo, nil 263 } 264 265 // updateMetricsBinding updates the Metrics Binding Owner Reference from the target workload, 266 // adds a finalizer, and updates the PrometheusConfigSecret field if the metrics binding was using 267 // the legacy default prometheus config map 268 func (r *Reconciler) updateMetricsBinding(metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error { 269 // Add the finalizer 270 controllerutil.AddFinalizer(metricsBinding, finalizerName) 271 272 // Retrieve the workload object from the MetricsBinding 273 workloadObject, err := r.getWorkloadObject(metricsBinding) 274 if err != nil { 275 return log.ErrorfNewErr("Failed to get the Workload from the MetricsBinding %s: %v", metricsBinding.Spec.Workload.Name, err) 276 } 277 278 // Return error if UID is not found 279 if len(workloadObject.GetUID()) == 0 { 280 err = fmt.Errorf("could not get UID from workload resource: %s, %s", workloadObject.GetKind(), workloadObject.GetName()) 281 return log.ErrorfNewErr("Failed to find UID for workload %s: %v", workloadObject.GetName(), err) 282 } 283 284 // Set the owner reference for the MetricsBinding so that it gets deleted with the workload 285 log.Debugf("Updating the MetricsBinding OwnerReference to the target workload %s/%s", workloadObject.GetNamespace(), workloadObject.GetName()) 286 trueValue := true 287 metricsBinding.SetOwnerReferences([]k8smetav1.OwnerReference{ 288 { 289 Name: workloadObject.GetName(), 290 APIVersion: workloadObject.GetAPIVersion(), 291 Kind: workloadObject.GetKind(), 292 UID: workloadObject.GetUID(), 293 Controller: &trueValue, 294 BlockOwnerDeletion: &trueValue, 295 }, 296 }) 297 298 // If the config map specified is the legacy VMI prometheus config map, modify it to use 299 // the additionalScrapeConfigs config map for the Prometheus Operator 300 if isLegacyVmiPrometheusConfigMapName(metricsBinding.Spec.PrometheusConfigMap) { 301 log.Infof("Metrics Binding %s/%s uses legacy VMI prometheus config map - updating to use the Prometheus operator secret %s/%s", 302 metricsBinding.Namespace, metricsBinding.Name, vzconst.PrometheusOperatorNamespace, vzconst.PromAdditionalScrapeConfigsSecretName) 303 metricsBinding.Spec.PrometheusConfigMap = vzapi.NamespaceName{} 304 metricsBinding.Spec.PrometheusConfigSecret = vzapi.SecretKey{ 305 Namespace: vzconst.PrometheusOperatorNamespace, 306 Name: vzconst.PromAdditionalScrapeConfigsSecretName, 307 Key: vzconst.PromAdditionalScrapeConfigsSecretKey, 308 } 309 } 310 311 return nil 312 } 313 314 // getMetricsTemplate returns the MetricsTemplate given in the MetricsBinding 315 func (r *Reconciler) getMetricsTemplate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (*vzapi.MetricsTemplate, error) { 316 template := vzapi.MetricsTemplate{ 317 TypeMeta: k8smetav1.TypeMeta{ 318 Kind: vzconst.MetricsTemplateKind, 319 APIVersion: vzconst.MetricsTemplateAPIVersion, 320 }, 321 } 322 323 templateSpec := metricsBinding.Spec.MetricsTemplate 324 namespacedName := types.NamespacedName{Name: templateSpec.Name, Namespace: templateSpec.Namespace} 325 err := r.Client.Get(ctx, namespacedName, &template) 326 if err != nil { 327 newErr := fmt.Errorf("Failed to get the MetricsTemplate %s: %v", templateSpec.Name, err) 328 return nil, log.ErrorfNewErr(newErr.Error()) 329 } 330 return &template, nil 331 } 332 333 // getWorkloadObject returns the workload object based on the definition in the MetricsBinding 334 func (r *Reconciler) getWorkloadObject(metricsBinding *vzapi.MetricsBinding) (*unstructured.Unstructured, error) { 335 // Retrieve the owner from the workload field of the MetricsBinding 336 owner := metricsBinding.Spec.Workload 337 workloadObject := unstructured.Unstructured{} 338 workloadObject.SetKind(owner.TypeMeta.Kind) 339 workloadObject.SetAPIVersion(owner.TypeMeta.APIVersion) 340 workloadName := types.NamespacedName{Namespace: metricsBinding.GetNamespace(), Name: owner.Name} 341 err := r.Client.Get(context.Background(), workloadName, &workloadObject) 342 if err != nil { 343 return nil, err 344 } 345 return &workloadObject, nil 346 } 347 348 // deleteMetricsBinding deletes the Metrics Binding object from the cluster 349 func (r *Reconciler) deleteMetricsBinding(metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error { 350 // Remove the finalizer from the metrics binding 351 _, err := controllerutil.CreateOrUpdate(context.TODO(), r.Client, metricsBinding, func() error { 352 controllerutil.RemoveFinalizer(metricsBinding, finalizerName) 353 return nil 354 }) 355 if err != nil { 356 return log.ErrorfNewErr("Failed to remove the finalizer from the Metrics Binding %s/%s: %s", metricsBinding.Namespace, metricsBinding.Name, err) 357 } 358 359 // Delete the binding once the finalizer has been removed 360 err = r.Delete(context.Background(), metricsBinding) 361 if err != nil { 362 return log.ErrorfNewErr("Failed to delete the Metrics Binding %s/%s from the cluster: %v", metricsBinding.Namespace, metricsBinding.Name, err) 363 } 364 return err 365 } 366 367 func (r *Reconciler) createScrapeConfigForMetricsBinding( 368 metricsBinding *vzapi.MetricsBinding, workloadObject *unstructured.Unstructured, 369 workloadNamespaceUnstructured *unstructured.Unstructured, jobName string, log vzlog.VerrazzanoLogger) (string, error) { 370 // Get the Metrics Template from the Metrics Binding 371 template, err := r.getMetricsTemplate(context.Background(), metricsBinding, log) 372 if err != nil { 373 return "", err 374 } 375 376 // Organize inputs for template processor 377 log.Debugf("Creating the template inputs from the workload %s and namespace %s", workloadObject.GetName(), metricsBinding.GetNamespace()) 378 templateInputs := map[string]interface{}{ 379 "workload": workloadObject.Object, 380 "namespace": workloadNamespaceUnstructured.Object, 381 } 382 383 // Get scrape config from the template processor and process the template inputs 384 templateProcessor := vztemplate.NewProcessor(r.Client, template.Spec.PrometheusConfig.ScrapeConfigTemplate) 385 scrapeConfigString, err := templateProcessor.Process(templateInputs) 386 if err != nil { 387 return "", log.ErrorfNewErr("Failed to process metrics template %s: %v", template.GetName(), err) 388 } 389 390 // Prepend job name to the scrape config 391 scrapeConfigString = formatJobName(jobName) + scrapeConfigString 392 return scrapeConfigString, nil 393 }