github.com/verrazzano/verrazzano@v1.7.1/application-operator/controllers/metricsbinding/metricsbinding_update.go (about)

     1  // Copyright (c) 2022, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package metricsbinding
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"time"
    10  
    11  	"github.com/Jeffail/gabs/v2"
    12  	promoperapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
    13  	vzapi "github.com/verrazzano/verrazzano/application-operator/apis/app/v1alpha1"
    14  	"github.com/verrazzano/verrazzano/application-operator/constants"
    15  	"github.com/verrazzano/verrazzano/application-operator/controllers/clusters"
    16  	vztemplate "github.com/verrazzano/verrazzano/application-operator/controllers/template"
    17  	"github.com/verrazzano/verrazzano/application-operator/internal/metrics"
    18  	vzconst "github.com/verrazzano/verrazzano/pkg/constants"
    19  	"github.com/verrazzano/verrazzano/pkg/log/vzlog"
    20  	"github.com/verrazzano/verrazzano/pkg/metricsutils"
    21  	k8scorev1 "k8s.io/api/core/v1"
    22  	k8smetav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    23  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    24  	k8sruntime "k8s.io/apimachinery/pkg/runtime"
    25  	"k8s.io/apimachinery/pkg/types"
    26  	"k8s.io/apimachinery/pkg/util/rand"
    27  	k8scontroller "sigs.k8s.io/controller-runtime"
    28  	k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
    29  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    30  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    31  	"sigs.k8s.io/yaml"
    32  )
    33  
    34  // reconcileBindingCreateOrUpdate completes the reconcile process for an object that is being created or updated
    35  func (r *Reconciler) reconcileBindingCreateOrUpdate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (k8scontroller.Result, error) {
    36  	log.Debugw("Reconcile for created or updated object", "resource", metricsBinding.GetName())
    37  
    38  	// Requeue with a delay to account for situations where the scrape config
    39  	// has changed but without the MetricsBinding changing.
    40  	var seconds = rand.IntnRange(45, 90)
    41  	var requeueDuration = time.Duration(seconds) * time.Second
    42  
    43  	// Handle the case where the workload uses the default metrics template - in this case, we will
    44  	// delete the metrics binding if processing succeeds, since this is a one-time conversion of
    45  	// legacy apps using default metrics template, to ServiceMonitor. If it's not using VMI config map,
    46  	// we treat it like custom metrics setup
    47  	if isLegacyDefaultMetricsBinding(metricsBinding) {
    48  		log.Debug("Legacy default MetricsBinding found, creating a Service Monitor and deleting the MetricsBinding")
    49  		if err := r.handleDefaultMetricsTemplate(ctx, metricsBinding, log); err != nil {
    50  			return k8scontroller.Result{Requeue: true}, err
    51  		}
    52  		log.Infof("Deleting legacy default MetricsBinding %s/%s", metricsBinding.Namespace, metricsBinding.Name)
    53  		if err := r.deleteMetricsBinding(metricsBinding, log); err != nil {
    54  			return k8scontroller.Result{Requeue: true}, err
    55  		}
    56  		// Requeue with a delay to account for situations where the scrape config
    57  		// has changed but without the MetricsBinding changing.
    58  		return reconcile.Result{Requeue: true, RequeueAfter: requeueDuration}, nil
    59  	}
    60  
    61  	// Update the MetricsBinding to add workload as owner ref
    62  	_, err := controllerutil.CreateOrUpdate(ctx, r.Client, metricsBinding, func() error {
    63  		return r.updateMetricsBinding(metricsBinding, log)
    64  	})
    65  	if err != nil {
    66  		return k8scontroller.Result{Requeue: true}, err
    67  	}
    68  
    69  	// Handle the case where the workload uses a custom metrics template
    70  	if err = r.handleCustomMetricsTemplate(ctx, metricsBinding, log); err != nil {
    71  		return k8scontroller.Result{Requeue: true}, err
    72  	}
    73  
    74  	// Requeue with a delay to account for situations where the scrape config
    75  	// has changed but without the MetricsBinding changing.
    76  	return reconcile.Result{Requeue: true, RequeueAfter: requeueDuration}, nil
    77  }
    78  
    79  // handleDefaultMetricsTemplate handles pre-Verrazzano 1.4 metrics bindings that use the default
    80  // metrics template, by creating/updating a service monitor that does the same work as the default template
    81  func (r *Reconciler) handleDefaultMetricsTemplate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error {
    82  	log.Infof("Default metrics template used by metrics binding %s/%s, creating service monitor", metricsBinding.Namespace, metricsBinding.Name)
    83  
    84  	// Create the Service monitor from information gathered from the Metrics Binding
    85  	scrapeInfo, err := r.createScrapeInfo(ctx, metricsBinding, log)
    86  	if err != nil {
    87  		return err
    88  	}
    89  	serviceMonitor := promoperapi.ServiceMonitor{}
    90  	serviceMonitor.SetName(metricsBinding.Name)
    91  	serviceMonitor.SetNamespace(metricsBinding.Namespace)
    92  	_, err = controllerutil.CreateOrUpdate(ctx, r.Client, &serviceMonitor, func() error {
    93  		return metrics.PopulateServiceMonitor(scrapeInfo, &serviceMonitor, log)
    94  	})
    95  	if err != nil {
    96  		return log.ErrorfNewErr("Failed to create or update the service monitor for the Metrics Binding %s/%s: %v", metricsBinding.Namespace, metricsBinding.Name, err)
    97  	}
    98  	return nil
    99  }
   100  
   101  // handleCustomMetricsTemplate handles pre-Verrazzano 1.4 metrics bindings that use a custom
   102  // metrics template, by updating the additionalScrapeConfigs secret for the Prometheus CR to collect
   103  // metrics as specified by the custom template.
   104  func (r *Reconciler) handleCustomMetricsTemplate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error {
   105  	log.Debugf("Custom metrics template used by metrics binding %s/%s, edit additionalScrapeConfigs", metricsBinding.Namespace, metricsBinding.Name)
   106  
   107  	var workloadNamespaceUnstructured *unstructured.Unstructured
   108  	var err error
   109  	// Get the Namespace of the Metrics Binding as an unstructured resource so that it can be applied
   110  	// to the template
   111  	if workloadNamespaceUnstructured, err = r.createWorkloadNamespaceUnstructured(metricsBinding, log); err != nil {
   112  		return err
   113  	}
   114  
   115  	// Get the workload object, so that it can be applied to the template
   116  	var workloadObject *unstructured.Unstructured
   117  	if workloadObject, err = r.getWorkloadObject(metricsBinding); err != nil {
   118  		return log.ErrorfNewErr("Failed to get the workload object for metrics binding %s: %v", metricsBinding.GetName(), err)
   119  	}
   120  
   121  	createdJobName := createJobName(metricsBinding)
   122  	scrapeConfigString, err := r.createScrapeConfigForMetricsBinding(metricsBinding, workloadObject, workloadNamespaceUnstructured, createdJobName, log)
   123  	if err != nil {
   124  		return err
   125  	}
   126  
   127  	// Format scrape config into readable container
   128  	var configYaml []byte
   129  	if configYaml, err = yaml.YAMLToJSON([]byte(scrapeConfigString)); err != nil {
   130  		return log.ErrorfNewErr("Failed to convert scrape config YAML to JSON: %v", err)
   131  	}
   132  
   133  	var newScrapeConfig *gabs.Container
   134  	if newScrapeConfig, err = gabs.ParseJSON(configYaml); err != nil {
   135  		return log.ErrorfNewErr("Failed to convert scrape config JSON to container: %v", err)
   136  	}
   137  
   138  	// Collect the data from the ConfigMap or the Secret
   139  	configMapExists, err := r.updateScrapeConfigInConfigMap(ctx, metricsBinding, createdJobName, newScrapeConfig, log)
   140  	if !configMapExists {
   141  		_, err = r.updateScrapeConfigInConfigSecret(ctx, metricsBinding, createdJobName, newScrapeConfig, log)
   142  	}
   143  
   144  	return err
   145  }
   146  
   147  // updateScrapeConfigInConfigSecret updates the scrape config in the PrometheusConfigSecret if one
   148  // is specified in the metrics binding. Returns true if there is a config secret, and any error that occurred
   149  func (r *Reconciler) updateScrapeConfigInConfigSecret(ctx context.Context, metricsBinding *vzapi.MetricsBinding,
   150  	createdJobName string, newScrapeConfig *gabs.Container, log vzlog.VerrazzanoLogger) (bool, error) {
   151  	secret, key := getPromConfigSecret(metricsBinding)
   152  	if secret == nil {
   153  		return false, nil
   154  	}
   155  	log.Debugf("Secret %s/%s found in the MetricsBinding, attempting scrape config update", secret.GetNamespace(), secret.GetName())
   156  	_, err := controllerutil.CreateOrUpdate(ctx, r.Client, secret, func() error {
   157  		var err error
   158  		var data *gabs.Container
   159  		if data, err = getConfigDataFromSecret(secret, key); err != nil {
   160  			return log.ErrorfNewErr("Failed to get the Secret data: %v", err)
   161  		}
   162  		var promConfig *gabs.Container
   163  		if promConfig, err = metricsutils.EditScrapeJob(data, createdJobName, newScrapeConfig); err != nil {
   164  			return log.ErrorfNewErr("Failed to edit the scrape job: %v", err)
   165  		}
   166  		var newPromConfigData []byte
   167  		if newPromConfigData, err = yaml.JSONToYAML(promConfig.Bytes()); err != nil {
   168  			return log.ErrorfNewErr("Failed to convert scrape config JSON to YAML: %v", err)
   169  		}
   170  		secret.Data[key] = newPromConfigData
   171  		return nil
   172  	})
   173  	return true, err
   174  }
   175  
   176  // updateScrapeConfigInConfigMap updates the scrape config in the Prometheus ConfigMap if one
   177  // is specified in the metrics binding. Returns true if there is a config map, and any error that occurred
   178  func (r *Reconciler) updateScrapeConfigInConfigMap(ctx context.Context,
   179  	metricsBinding *vzapi.MetricsBinding, jobName string, newScrapeConfig *gabs.Container, log vzlog.VerrazzanoLogger) (bool, error) {
   180  	var data *gabs.Container
   181  	configMap := getPromConfigMap(metricsBinding)
   182  	if configMap == nil {
   183  		return false, nil
   184  	}
   185  	log.Debugf("ConfigMap %s/%s found in the MetricsBinding, attempting scrape config update", configMap.GetNamespace(), configMap.GetName())
   186  	_, err := controllerutil.CreateOrUpdate(ctx, r.Client, configMap, func() error {
   187  		var err error
   188  		if data, err = getConfigData(configMap); err != nil {
   189  			return log.ErrorfNewErr("Failed to get the ConfigMap data: %v", err)
   190  		}
   191  		if err = metricsutils.EditScrapeJobInPrometheusConfig(data, prometheusScrapeConfigsLabel, jobName, newScrapeConfig); err != nil {
   192  			return log.ErrorfNewErr("Failed to edit the scrape job: %v", err)
   193  		}
   194  		var newPromConfigData []byte
   195  		if newPromConfigData, err = yaml.JSONToYAML(data.Bytes()); err != nil {
   196  			return log.ErrorfNewErr("Failed to convert scrape config JSON to YAML: %v", err)
   197  		}
   198  		configMap.Data[prometheusConfigKey] = string(newPromConfigData)
   199  		return nil
   200  	})
   201  	return true, err
   202  }
   203  
   204  func (r *Reconciler) createWorkloadNamespaceUnstructured(metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (*unstructured.Unstructured, error) {
   205  	workloadNamespace := k8scorev1.Namespace{}
   206  	log.Debugf("Getting the workload namespace %s from the MetricsBinding", metricsBinding.GetNamespace())
   207  	err := r.Client.Get(context.TODO(), k8sclient.ObjectKey{Name: metricsBinding.GetNamespace()}, &workloadNamespace)
   208  	if err != nil {
   209  		return nil, log.ErrorfNewErr("Failed to get metrics binding namespace %s: %v", metricsBinding.GetName(), err)
   210  	}
   211  
   212  	// Create an unstructured resource from the Namespace, so it can be applied to the template
   213  	workloadNamespaceUnstructuredMap, err := k8sruntime.DefaultUnstructuredConverter.ToUnstructured(&workloadNamespace)
   214  	if err != nil {
   215  		return nil, log.ErrorfNewErr("Failed to get the unstructured for namespace %s: %v", workloadNamespace.GetName(), err)
   216  	}
   217  	return &unstructured.Unstructured{Object: workloadNamespaceUnstructuredMap}, nil
   218  }
   219  
   220  func (r *Reconciler) createScrapeInfo(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (metrics.ScrapeInfo, error) {
   221  	log.Debugf("Attempting to create the ServiceMonitor information from the MetricsBinding %s/%s", metricsBinding.Namespace, metricsBinding.Name)
   222  	var scrapeInfo metrics.ScrapeInfo
   223  
   224  	// Get the workload object from the Metrics Binding to populate the Service Monitor
   225  	workload := metricsBinding.Spec.Workload
   226  	workloadObject := unstructured.Unstructured{}
   227  	workloadObject.SetKind(workload.TypeMeta.Kind)
   228  	workloadObject.SetAPIVersion(workload.TypeMeta.APIVersion)
   229  	workloadName := types.NamespacedName{Namespace: metricsBinding.Namespace, Name: workload.Name}
   230  	log.Debugf("Getting the workload resource %s/%s from the MetricsBinding", workloadName.Namespace, workloadName.Name)
   231  	err := r.Client.Get(ctx, workloadName, &workloadObject)
   232  	if err != nil {
   233  		return scrapeInfo, log.ErrorfNewErr("Failed to get the workload %s from the MetricsBinding %s/%s: %v", workload.Name, metricsBinding.Namespace, metricsBinding.Name, err)
   234  	}
   235  
   236  	// Get the namespace for the Metrics Binding to check if Istio is enabled
   237  	workloadNamespace := k8scorev1.Namespace{}
   238  	log.Debugf("Getting the workload namespace %s from the MetricsBinding", metricsBinding.GetNamespace())
   239  	err = r.Client.Get(context.TODO(), k8sclient.ObjectKey{Name: metricsBinding.GetNamespace()}, &workloadNamespace)
   240  	if err != nil {
   241  		return scrapeInfo, log.ErrorfNewErr("Failed to get MetricsBinding namespace %s: %v", metricsBinding.GetName(), err)
   242  	}
   243  
   244  	// Verify if Istio is enabled from the Namespace annotations
   245  	value, ok := workloadNamespace.Labels[constants.LabelIstioInjection]
   246  	istioEnabled := ok && value == "enabled"
   247  	scrapeInfo.IstioEnabled = &istioEnabled
   248  
   249  	// Match the Verrazzano workload application labels that get applied by the Metrics Binding labeler
   250  	value, ok = workloadObject.GetLabels()[constants.MetricsWorkloadLabel]
   251  	if !ok {
   252  		return scrapeInfo, log.ErrorfNewErr("Failed to find the annotation %s on the target workload", constants.MetricsWorkloadLabel)
   253  	}
   254  	scrapeInfo.KeepLabels = map[string]string{workloadSourceLabel: value}
   255  
   256  	// Add a port to the Service Monitor endpoints
   257  	scrapeInfo.Ports = 1
   258  
   259  	// Add the cluster name to the scrape info
   260  	scrapeInfo.ClusterName = clusters.GetClusterName(ctx, r.Client)
   261  
   262  	return scrapeInfo, nil
   263  }
   264  
   265  // updateMetricsBinding updates the Metrics Binding Owner Reference from the target workload,
   266  // adds a finalizer, and updates the PrometheusConfigSecret field if the metrics binding was using
   267  // the legacy default prometheus config map
   268  func (r *Reconciler) updateMetricsBinding(metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error {
   269  	// Add the finalizer
   270  	controllerutil.AddFinalizer(metricsBinding, finalizerName)
   271  
   272  	// Retrieve the workload object from the MetricsBinding
   273  	workloadObject, err := r.getWorkloadObject(metricsBinding)
   274  	if err != nil {
   275  		return log.ErrorfNewErr("Failed to get the Workload from the MetricsBinding %s: %v", metricsBinding.Spec.Workload.Name, err)
   276  	}
   277  
   278  	// Return error if UID is not found
   279  	if len(workloadObject.GetUID()) == 0 {
   280  		err = fmt.Errorf("could not get UID from workload resource: %s, %s", workloadObject.GetKind(), workloadObject.GetName())
   281  		return log.ErrorfNewErr("Failed to find UID for workload %s: %v", workloadObject.GetName(), err)
   282  	}
   283  
   284  	// Set the owner reference for the MetricsBinding so that it gets deleted with the workload
   285  	log.Debugf("Updating the MetricsBinding OwnerReference to the target workload %s/%s", workloadObject.GetNamespace(), workloadObject.GetName())
   286  	trueValue := true
   287  	metricsBinding.SetOwnerReferences([]k8smetav1.OwnerReference{
   288  		{
   289  			Name:               workloadObject.GetName(),
   290  			APIVersion:         workloadObject.GetAPIVersion(),
   291  			Kind:               workloadObject.GetKind(),
   292  			UID:                workloadObject.GetUID(),
   293  			Controller:         &trueValue,
   294  			BlockOwnerDeletion: &trueValue,
   295  		},
   296  	})
   297  
   298  	// If the config map specified is the legacy VMI prometheus config map, modify it to use
   299  	// the additionalScrapeConfigs config map for the Prometheus Operator
   300  	if isLegacyVmiPrometheusConfigMapName(metricsBinding.Spec.PrometheusConfigMap) {
   301  		log.Infof("Metrics Binding %s/%s uses legacy VMI prometheus config map - updating to use the Prometheus operator secret %s/%s",
   302  			metricsBinding.Namespace, metricsBinding.Name, vzconst.PrometheusOperatorNamespace, vzconst.PromAdditionalScrapeConfigsSecretName)
   303  		metricsBinding.Spec.PrometheusConfigMap = vzapi.NamespaceName{}
   304  		metricsBinding.Spec.PrometheusConfigSecret = vzapi.SecretKey{
   305  			Namespace: vzconst.PrometheusOperatorNamespace,
   306  			Name:      vzconst.PromAdditionalScrapeConfigsSecretName,
   307  			Key:       vzconst.PromAdditionalScrapeConfigsSecretKey,
   308  		}
   309  	}
   310  
   311  	return nil
   312  }
   313  
   314  // getMetricsTemplate returns the MetricsTemplate given in the MetricsBinding
   315  func (r *Reconciler) getMetricsTemplate(ctx context.Context, metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) (*vzapi.MetricsTemplate, error) {
   316  	template := vzapi.MetricsTemplate{
   317  		TypeMeta: k8smetav1.TypeMeta{
   318  			Kind:       vzconst.MetricsTemplateKind,
   319  			APIVersion: vzconst.MetricsTemplateAPIVersion,
   320  		},
   321  	}
   322  
   323  	templateSpec := metricsBinding.Spec.MetricsTemplate
   324  	namespacedName := types.NamespacedName{Name: templateSpec.Name, Namespace: templateSpec.Namespace}
   325  	err := r.Client.Get(ctx, namespacedName, &template)
   326  	if err != nil {
   327  		newErr := fmt.Errorf("Failed to get the MetricsTemplate %s: %v", templateSpec.Name, err)
   328  		return nil, log.ErrorfNewErr(newErr.Error())
   329  	}
   330  	return &template, nil
   331  }
   332  
   333  // getWorkloadObject returns the workload object based on the definition in the MetricsBinding
   334  func (r *Reconciler) getWorkloadObject(metricsBinding *vzapi.MetricsBinding) (*unstructured.Unstructured, error) {
   335  	// Retrieve the owner from the workload field of the MetricsBinding
   336  	owner := metricsBinding.Spec.Workload
   337  	workloadObject := unstructured.Unstructured{}
   338  	workloadObject.SetKind(owner.TypeMeta.Kind)
   339  	workloadObject.SetAPIVersion(owner.TypeMeta.APIVersion)
   340  	workloadName := types.NamespacedName{Namespace: metricsBinding.GetNamespace(), Name: owner.Name}
   341  	err := r.Client.Get(context.Background(), workloadName, &workloadObject)
   342  	if err != nil {
   343  		return nil, err
   344  	}
   345  	return &workloadObject, nil
   346  }
   347  
   348  // deleteMetricsBinding deletes the Metrics Binding object from the cluster
   349  func (r *Reconciler) deleteMetricsBinding(metricsBinding *vzapi.MetricsBinding, log vzlog.VerrazzanoLogger) error {
   350  	// Remove the finalizer from the metrics binding
   351  	_, err := controllerutil.CreateOrUpdate(context.TODO(), r.Client, metricsBinding, func() error {
   352  		controllerutil.RemoveFinalizer(metricsBinding, finalizerName)
   353  		return nil
   354  	})
   355  	if err != nil {
   356  		return log.ErrorfNewErr("Failed to remove the finalizer from the Metrics Binding %s/%s: %s", metricsBinding.Namespace, metricsBinding.Name, err)
   357  	}
   358  
   359  	// Delete the binding once the finalizer has been removed
   360  	err = r.Delete(context.Background(), metricsBinding)
   361  	if err != nil {
   362  		return log.ErrorfNewErr("Failed to delete the Metrics Binding %s/%s from the cluster: %v", metricsBinding.Namespace, metricsBinding.Name, err)
   363  	}
   364  	return err
   365  }
   366  
   367  func (r *Reconciler) createScrapeConfigForMetricsBinding(
   368  	metricsBinding *vzapi.MetricsBinding, workloadObject *unstructured.Unstructured,
   369  	workloadNamespaceUnstructured *unstructured.Unstructured, jobName string, log vzlog.VerrazzanoLogger) (string, error) {
   370  	// Get the Metrics Template from the Metrics Binding
   371  	template, err := r.getMetricsTemplate(context.Background(), metricsBinding, log)
   372  	if err != nil {
   373  		return "", err
   374  	}
   375  
   376  	// Organize inputs for template processor
   377  	log.Debugf("Creating the template inputs from the workload %s and namespace %s", workloadObject.GetName(), metricsBinding.GetNamespace())
   378  	templateInputs := map[string]interface{}{
   379  		"workload":  workloadObject.Object,
   380  		"namespace": workloadNamespaceUnstructured.Object,
   381  	}
   382  
   383  	// Get scrape config from the template processor and process the template inputs
   384  	templateProcessor := vztemplate.NewProcessor(r.Client, template.Spec.PrometheusConfig.ScrapeConfigTemplate)
   385  	scrapeConfigString, err := templateProcessor.Process(templateInputs)
   386  	if err != nil {
   387  		return "", log.ErrorfNewErr("Failed to process metrics template %s: %v", template.GetName(), err)
   388  	}
   389  
   390  	// Prepend job name to the scrape config
   391  	scrapeConfigString = formatJobName(jobName) + scrapeConfigString
   392  	return scrapeConfigString, nil
   393  }