github.com/verrazzano/verrazzano@v1.7.1/application-operator/controllers/metricstrait/metricstrait_controller.go (about)

     1  // Copyright (c) 2020, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package metricstrait
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	gabs "github.com/Jeffail/gabs/v2"
    15  	oamv1 "github.com/crossplane/oam-kubernetes-runtime/apis/core/v1alpha2"
    16  	"github.com/crossplane/oam-kubernetes-runtime/pkg/oam"
    17  	promoperapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
    18  	vzapi "github.com/verrazzano/verrazzano/application-operator/apis/oam/v1alpha1"
    19  	"github.com/verrazzano/verrazzano/application-operator/constants"
    20  	"github.com/verrazzano/verrazzano/application-operator/controllers/clusters"
    21  	vznav "github.com/verrazzano/verrazzano/application-operator/controllers/navigation"
    22  	"github.com/verrazzano/verrazzano/application-operator/controllers/reconcileresults"
    23  	vzconst "github.com/verrazzano/verrazzano/pkg/constants"
    24  	vzlog "github.com/verrazzano/verrazzano/pkg/log"
    25  	vzlog2 "github.com/verrazzano/verrazzano/pkg/log/vzlog"
    26  	vzstring "github.com/verrazzano/verrazzano/pkg/string"
    27  	"go.uber.org/zap"
    28  	k8sapps "k8s.io/api/apps/v1"
    29  	k8score "k8s.io/api/core/v1"
    30  	"k8s.io/apimachinery/pkg/api/equality"
    31  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    34  	"k8s.io/apimachinery/pkg/runtime"
    35  	"k8s.io/apimachinery/pkg/runtime/schema"
    36  	"k8s.io/apimachinery/pkg/types"
    37  	"k8s.io/apimachinery/pkg/util/rand"
    38  	ctrl "sigs.k8s.io/controller-runtime"
    39  	"sigs.k8s.io/controller-runtime/pkg/client"
    40  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    41  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    42  )
    43  
    44  const (
    45  	// Kubernetes resource Kinds
    46  	deploymentKind  = "Deployment"
    47  	serviceKind     = "Service"
    48  	statefulSetKind = "StatefulSet"
    49  	podKind         = "Pod"
    50  	controllerName  = "metricstrait"
    51  
    52  	// In code defaults for metrics trait configuration
    53  	defaultWLSAdminScrapePort = 7001
    54  	defaultCohScrapePort      = 9612
    55  	defaultScrapePort         = 8080
    56  	defaultScrapePath         = "/metrics"
    57  	defaultWLSScrapePath      = "/wls-exporter/metrics"
    58  
    59  	// The finalizer name used by this controller
    60  	finalizerName = "metricstrait.finalizers.verrazzano.io"
    61  
    62  	// Markers used during the processing of Prometheus scrape configurations
    63  	prometheusConfigKey          = "prometheus.yml"
    64  	prometheusScrapeConfigsLabel = "scrape_configs"
    65  	prometheusClusterNameLabel   = "verrazzano_cluster"
    66  
    67  	// Annotation names for metrics read by the controller
    68  	prometheusPortAnnotation = "prometheus.io/port"
    69  	prometheusPathAnnotation = "prometheus.io/path"
    70  
    71  	// Annotation names for metrics set by the controller
    72  	verrazzanoMetricsAnnotationPrefix  = "verrazzano.io/metrics"
    73  	verrazzanoMetricsPortAnnotation    = "verrazzano.io/metricsPort%s"
    74  	verrazzanoMetricsPathAnnotation    = "verrazzano.io/metricsPath%s"
    75  	verrazzanoMetricsEnabledAnnotation = "verrazzano.io/metricsEnabled%s"
    76  
    77  	// basicAuthLabel config label for Prometheus basic auth
    78  	basicAuthLabel = "basic_auth"
    79  	// basicAuthUsernameLabel config label for Prometheus username
    80  	basicAuthUsernameLabel = "username"
    81  	// basicPathPasswordLabel config label for Prometheus password
    82  	basicPathPasswordLabel = "password"
    83  
    84  	// Template placeholders for the Prometheus scrape config template
    85  	appNameHolder       = "##APP_NAME##"
    86  	compNameHolder      = "##COMP_NAME##"
    87  	jobNameHolder       = "##JOB_NAME##"
    88  	portOrderHolder     = "##PORT_ORDER##"
    89  	namespaceHolder     = "##NAMESPACE##"
    90  	sslProtocolHolder   = "##SSL_PROTOCOL##"
    91  	vzClusterNameHolder = "##VERRAZZANO_CLUSTER_NAME##"
    92  
    93  	// Roles for use in qualified resource relations
    94  	scraperRole = "scraper"
    95  	sourceRole  = "source"
    96  	ownerRole   = "owner"
    97  
    98  	// SSL protocol scrape parameters for Istio enabled MTLS components
    99  	httpsProtocol = `scheme: https
   100  tls_config:
   101    ca_file: /etc/istio-certs/root-cert.pem  
   102    cert_file: /etc/istio-certs/cert-chain.pem
   103    key_file: /etc/istio-certs/key.pem
   104    insecure_skip_verify: true  # Prometheus does not support Istio security naming, thus skip verifying target pod certificate`
   105  	httpProtocol = "scheme: http"
   106  )
   107  
   108  // prometheusScrapeConfigTemplate configuration for general Prometheus scrape target template
   109  // Used to add new scrape config to a Prometheus configmap
   110  const prometheusScrapeConfigTemplate = vzconst.PrometheusJobNameKey + `: ##JOB_NAME##
   111  ##SSL_PROTOCOL##
   112  kubernetes_sd_configs:
   113  - role: pod
   114    namespaces:
   115      names:
   116      - ##NAMESPACE##
   117  enableHttp2: false
   118  relabel_configs:
   119  - action: replace
   120    source_labels: null
   121    target_label: ` + prometheusClusterNameLabel + `
   122    replacement: ##VERRAZZANO_CLUSTER_NAME##
   123  - action: keep
   124    source_labels: [__meta_kubernetes_pod_annotation_verrazzano_io_metricsEnabled##PORT_ORDER##,__meta_kubernetes_pod_label_app_oam_dev_name,__meta_kubernetes_pod_label_app_oam_dev_component]
   125    regex: true;##APP_NAME##;##COMP_NAME##
   126  - action: replace
   127    source_labels: [__meta_kubernetes_pod_annotation_verrazzano_io_metricsPath##PORT_ORDER##]
   128    target_label: __metrics_path__
   129    regex: (.+)
   130  - action: replace
   131    source_labels: [__address__, __meta_kubernetes_pod_annotation_verrazzano_io_metricsPort##PORT_ORDER##]
   132    target_label: __address__
   133    regex: ([^:]+)(?::\d+)?;(\d+)
   134    replacement: $1:$2
   135  - action: replace
   136    source_labels: [__meta_kubernetes_namespace]
   137    target_label: namespace
   138    regex: (.*)
   139    replacement: $1
   140  - action: labelmap
   141    regex: __meta_kubernetes_pod_label_(.+)
   142  - action: replace
   143    source_labels: [__meta_kubernetes_pod_name]
   144    target_label: pod_name
   145  - action: labeldrop
   146    regex: '(controller_revision_hash)'
   147  - action: replace
   148    source_labels: [name]
   149    target_label: webapp
   150    regex: '.*/(.*)$'
   151    replacement: $1
   152  `
   153  
   154  // prometheusWLSScrapeConfigTemplate configuration for WebLogic Prometheus scrape target template
   155  // Used to add new WebLogic scrape config to a Prometheus configmap
   156  const prometheusWLSScrapeConfigTemplate = vzconst.PrometheusJobNameKey + `: ##JOB_NAME##
   157  ##SSL_PROTOCOL##
   158  kubernetes_sd_configs:
   159  - role: pod
   160    namespaces:
   161      names:
   162      - ##NAMESPACE##
   163  enableHttp2: false
   164  relabel_configs:
   165  - action: replace
   166    source_labels: null
   167    target_label: ` + prometheusClusterNameLabel + `
   168    replacement: ##VERRAZZANO_CLUSTER_NAME##
   169  - action: keep
   170    source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape,__meta_kubernetes_pod_label_app_oam_dev_name,__meta_kubernetes_pod_label_app_oam_dev_component]
   171    regex: true;##APP_NAME##;##COMP_NAME##
   172  - action: replace
   173    source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
   174    target_label: __metrics_path__
   175    regex: (.+)
   176  - action: replace
   177    source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
   178    target_label: __address__
   179    regex: ([^:]+)(?::\d+)?;(\d+)
   180    replacement: $1:$2
   181  - action: replace
   182    source_labels: [__meta_kubernetes_namespace]
   183    target_label: namespace
   184    regex: (.*)
   185    replacement: $1
   186  - action: labelmap
   187    regex: __meta_kubernetes_pod_label_(.+)
   188  - action: replace
   189    source_labels: [__meta_kubernetes_pod_name]
   190    target_label: pod_name
   191  - action: labeldrop
   192    regex: '(controller_revision_hash)'
   193  - action: replace
   194    source_labels: [name]
   195    target_label: webapp
   196    regex: '.*/(.*)$'
   197    replacement: $1
   198  `
   199  
   200  // Reconciler reconciles a MetricsTrait object
   201  type Reconciler struct {
   202  	client.Client
   203  	Log     *zap.SugaredLogger
   204  	Scheme  *runtime.Scheme
   205  	Scraper string
   206  }
   207  
   208  // SetupWithManager creates a controller and adds it to the manager
   209  func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error {
   210  	return ctrl.NewControllerManagedBy(mgr).
   211  		For(&vzapi.MetricsTrait{}).
   212  		Complete(r)
   213  }
   214  
   215  // Reconcile reconciles a metrics trait with related resources
   216  // +kubebuilder:rbac:groups=oam.verrazzano.io,resources=metricstraits,verbs=get;list;watch;create;update;patch;delete
   217  // +kubebuilder:rbac:groups=oam.verrazzano.io,resources=metricstraits/status,verbs=get;update;patch
   218  func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
   219  	if ctx == nil {
   220  		return ctrl.Result{}, errors.New("context cannot be nil")
   221  	}
   222  
   223  	// We do not want any resource to get reconciled if it is in namespace kube-system
   224  	// This is due to a bug found in OKE, it should not affect functionality of any vz operators
   225  	// If this is the case then return success
   226  	if req.Namespace == vzconst.KubeSystem {
   227  		log := zap.S().With(vzlog.FieldResourceNamespace, req.Namespace, vzlog.FieldResourceName, req.Name, vzlog.FieldController, controllerName)
   228  		log.Infof("Metrics trait resource %v should not be reconciled in kube-system namespace, ignoring", req.NamespacedName)
   229  		return reconcile.Result{}, nil
   230  	}
   231  
   232  	// Fetch the trait.
   233  	var err error
   234  	var trait *vzapi.MetricsTrait
   235  	trait, err = vznav.FetchTrait(ctx, r, zap.S(), req.NamespacedName)
   236  	if err != nil {
   237  		return clusters.IgnoreNotFoundWithLog(err, zap.S())
   238  	}
   239  	if trait == nil {
   240  		return reconcile.Result{}, nil
   241  	}
   242  
   243  	log, err := clusters.GetResourceLogger("metricstrait", req.NamespacedName, trait)
   244  	if err != nil {
   245  		zap.S().Errorf("Failed to create controller logger for metrics trait resource: %v", err)
   246  		return clusters.NewRequeueWithDelay(), nil
   247  	}
   248  	log.Oncef("Reconciling metrics trait resource %v, generation %v", req.NamespacedName, trait.Generation)
   249  
   250  	res1, err := r.doReconcile(ctx, trait, log)
   251  	if err != nil {
   252  		return clusters.NewRequeueWithDelay(), err
   253  	}
   254  
   255  	// Do reconcile for the Prometheus Operator controller Prometheus instance
   256  	res2, err := r.doOperatorReconcile(ctx, trait, log)
   257  	if err != nil {
   258  		return clusters.NewRequeueWithDelay(), err
   259  	}
   260  	if clusters.ShouldRequeue(res1) {
   261  		return res1, nil
   262  	}
   263  	if clusters.ShouldRequeue(res2) {
   264  		return res2, nil
   265  	}
   266  
   267  	log.Oncef("Finished reconciling metrics trait %v", req.NamespacedName)
   268  
   269  	return ctrl.Result{}, nil
   270  }
   271  
   272  // doReconcile performs the reconciliation operations for the metrics trait
   273  func (r *Reconciler) doReconcile(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (ctrl.Result, error) {
   274  	if trait.DeletionTimestamp.IsZero() {
   275  		result, supported, err := r.reconcileTraitCreateOrUpdate(ctx, trait, log)
   276  		if err != nil {
   277  			return result, err
   278  		}
   279  		if !supported {
   280  			// If the workload kind is not supported then delete the trait
   281  			log.Debugf("Deleting trait %s because workload is not supported", trait.Name)
   282  			err = r.Client.Delete(context.TODO(), trait, &client.DeleteOptions{})
   283  		}
   284  		return result, err
   285  	}
   286  	return r.reconcileTraitDelete(ctx, trait, log)
   287  }
   288  
   289  // reconcileTraitDelete reconciles a metrics trait that is being deleted.
   290  func (r *Reconciler) reconcileTraitDelete(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (ctrl.Result, error) {
   291  	status := r.deleteOrUpdateObsoleteResources(ctx, trait, &reconcileresults.ReconcileResults{}, log)
   292  	// Only remove the finalizer if all related resources were successfully updated.
   293  	if !status.ContainsErrors() {
   294  		if err := r.removeFinalizerIfRequired(ctx, trait, log); err != nil {
   295  			return clusters.NewRequeueWithDelay(), err // the caller always does a requeue if there is an error
   296  		}
   297  	}
   298  	return r.updateTraitStatus(ctx, trait, status, log)
   299  }
   300  
   301  // reconcileTraitCreateOrUpdate reconciles a metrics trait that is being created or updated.
   302  func (r *Reconciler) reconcileTraitCreateOrUpdate(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (ctrl.Result, bool, error) {
   303  	var err error
   304  
   305  	// Add finalizer if required.
   306  	if err = r.addFinalizerIfRequired(ctx, trait, log); err != nil {
   307  		return reconcile.Result{}, true, err
   308  	}
   309  
   310  	// Fetch workload resource using information from the trait
   311  	var workload *unstructured.Unstructured
   312  	if workload, err = vznav.FetchWorkloadFromTrait(ctx, r, log, trait); err != nil || workload == nil {
   313  		return reconcile.Result{}, true, err
   314  	}
   315  
   316  	// Resolve trait defaults from the trait and the workload.
   317  	var traitDefaults *vzapi.MetricsTraitSpec
   318  	var supported bool
   319  	traitDefaults, supported, err = r.fetchTraitDefaults(ctx, workload, log)
   320  	if err != nil {
   321  		return reconcile.Result{}, supported, err
   322  	}
   323  	if traitDefaults == nil || !supported {
   324  		return reconcile.Result{Requeue: false}, supported, nil
   325  	}
   326  
   327  	// If the legacy Prometheus instance is the scraper, do not attempt to update scrape config, a ServiceMonitor will be
   328  	// created instead.
   329  	if r.isLegacyPrometheusScraper(trait, traitDefaults) {
   330  		return reconcile.Result{}, true, nil
   331  	}
   332  
   333  	var scraper *k8sapps.Deployment
   334  	if scraper, err = r.fetchPrometheusDeploymentFromTrait(ctx, trait, traitDefaults, log); err != nil {
   335  		return reconcile.Result{}, true, err
   336  	}
   337  
   338  	// Find the child resources of the workload based on the childResourceKinds from the
   339  	// workload definition, workload uid and the ownerReferences of the children.
   340  	var children []*unstructured.Unstructured
   341  	if children, err = vznav.FetchWorkloadChildren(ctx, r, log, workload); err != nil {
   342  		return reconcile.Result{}, true, err
   343  	}
   344  
   345  	// Create or update the related resources of the trait and collect the outcomes.
   346  	status := r.createOrUpdateRelatedResources(ctx, trait, workload, traitDefaults, scraper, children, log)
   347  	// Delete or update any previously (but no longer) related resources of the trait.
   348  	status = r.deleteOrUpdateObsoleteResources(ctx, trait, status, log)
   349  
   350  	// Update the status of the trait resource using the outcomes of the create or update.
   351  	traitStatus, err := r.updateTraitStatus(ctx, trait, status, log)
   352  	return traitStatus, true, err
   353  }
   354  
   355  // addFinalizerIfRequired adds the finalizer to the trait if required
   356  // The finalizer is only added if the trait is not being deleted and the finalizer has not previously been added
   357  func (r *Reconciler) addFinalizerIfRequired(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) error {
   358  	if trait.GetDeletionTimestamp().IsZero() && !vzstring.SliceContainsString(trait.Finalizers, finalizerName) {
   359  		traitName := vznav.GetNamespacedNameFromObjectMeta(trait.ObjectMeta)
   360  		log.Debugf("Adding finalizer from trait %s", traitName)
   361  		_, err := controllerutil.CreateOrUpdate(ctx, r.Client, trait, func() error {
   362  			trait.Finalizers = append(trait.Finalizers, finalizerName)
   363  			return nil
   364  		})
   365  		if err != nil {
   366  			return log.ErrorfNewErr("Failed to add finalizer to trait %s: %v", traitName, err)
   367  		}
   368  	}
   369  	return nil
   370  }
   371  
   372  // removeFinalizerIfRequired removes the finalizer from the trait if required
   373  // The finalizer is only removed if the trait is being deleted and the finalizer had been added
   374  func (r *Reconciler) removeFinalizerIfRequired(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) error {
   375  	if !trait.DeletionTimestamp.IsZero() && vzstring.SliceContainsString(trait.Finalizers, finalizerName) {
   376  		traitName := vznav.GetNamespacedNameFromObjectMeta(trait.ObjectMeta)
   377  		log.Debugf("Removing finalizer from trait %s", traitName)
   378  		trait.Finalizers = vzstring.RemoveStringFromSlice(trait.Finalizers, finalizerName)
   379  		_, err := controllerutil.CreateOrUpdate(ctx, r.Client, trait, func() error {
   380  			trait.Finalizers = vzstring.RemoveStringFromSlice(trait.Finalizers, finalizerName)
   381  			return nil
   382  		})
   383  		if err != nil {
   384  			log.Errorf("Failed to remove finalizer for trait %s: %v", traitName, err)
   385  			return err
   386  		}
   387  	}
   388  	return nil
   389  }
   390  
   391  // createOrUpdateRelatedResources creates or updates resources related to this trait
   392  // The related resources are the workload children and the Prometheus config
   393  func (r *Reconciler) createOrUpdateRelatedResources(ctx context.Context, trait *vzapi.MetricsTrait, workload *unstructured.Unstructured, traitDefaults *vzapi.MetricsTraitSpec, deployment *k8sapps.Deployment, children []*unstructured.Unstructured, log vzlog2.VerrazzanoLogger) *reconcileresults.ReconcileResults {
   394  	status := r.createOrUpdateRelatedWorkloads(ctx, trait, workload, traitDefaults, children, log)
   395  	status.RecordOutcome(r.updatePrometheusScraperConfigMap(ctx, trait, workload, traitDefaults, deployment, log))
   396  	return status
   397  }
   398  
   399  // deleteOrUpdateObsoleteResources deletes or updates resources that should no longer be related to this trait.
   400  // This includes previous scrapers when the scraper has changed.
   401  // This also includes previous workload children that are no longer referenced.
   402  func (r *Reconciler) deleteOrUpdateObsoleteResources(ctx context.Context, trait *vzapi.MetricsTrait, status *reconcileresults.ReconcileResults, log vzlog2.VerrazzanoLogger) *reconcileresults.ReconcileResults {
   403  	// For each reference in the trait status references but not in the reconcile status
   404  	//   For references of role "scraper" attempt to remove the scrape config
   405  	//   For references of role "source" attempt to remove the scrape annotations
   406  	//   If the reference is not found or updated dont' add it to the reconcile status
   407  	//   Otherwise carry the reference over in the status as an error.
   408  
   409  	log.Debugf("Deleting obsolete resources for trait: %s", trait.Name)
   410  	// Cleanup the relations that are in the trait status relations but not in the reconcile status.
   411  	update := reconcileresults.ReconcileResults{}
   412  	for _, rel := range trait.Status.Resources {
   413  		if !status.ContainsRelation(rel) {
   414  			switch rel.Role {
   415  			case scraperRole:
   416  				if rel.Kind == promoperapi.ServiceMonitorsKind {
   417  					result, err := r.deleteServiceMonitor(ctx, rel.Namespace, rel.Name, trait, log)
   418  					update.RecordOutcome(rel, result, err)
   419  				} else {
   420  					update.RecordOutcomeIfError(r.deleteOrUpdateScraperConfigMap(ctx, trait, rel, log)) // Need to pass down traitDefaults, current scraper or current scraper deployment
   421  				}
   422  			case sourceRole:
   423  				update.RecordOutcomeIfError(r.deleteOrUpdateMetricSourceResource(ctx, trait, rel, log))
   424  			default:
   425  				// Don't record an outcome for unknown role relations.
   426  				log.Debugf("Skip delete or update of unknown resource role %s", rel.Role)
   427  			}
   428  		}
   429  	}
   430  	// Copy the reconcile outcomes from the current reconcile.
   431  	for i, rel := range status.Relations {
   432  		if !update.ContainsRelation(rel) {
   433  			update.RecordOutcome(status.Relations[i], status.Results[i], status.Errors[i])
   434  		}
   435  	}
   436  
   437  	if !trait.DeletionTimestamp.IsZero() && trait.OwnerReferences != nil {
   438  		for i := range trait.OwnerReferences {
   439  			if trait.OwnerReferences[i].Kind == "ApplicationConfiguration" {
   440  				update.RecordOutcome(r.removedTraitReferencesFromOwner(ctx, &trait.OwnerReferences[i], trait, log))
   441  			}
   442  		}
   443  	}
   444  
   445  	return &update
   446  }
   447  
   448  // deleteOrUpdateMetricSourceResource deletes or updates the related resources that are the source of metrics.
   449  // These are the children of the workloads.  For example for containerized workloads these are deployments.
   450  // For WLS workloads these are pods.
   451  func (r *Reconciler) deleteOrUpdateMetricSourceResource(ctx context.Context, trait *vzapi.MetricsTrait, rel vzapi.QualifiedResourceRelation, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) {
   452  	child := unstructured.Unstructured{}
   453  	child.SetAPIVersion(rel.APIVersion)
   454  	child.SetKind(rel.Kind)
   455  	child.SetNamespace(rel.Namespace)
   456  	child.SetName(rel.Name)
   457  	switch rel.Kind {
   458  	case "Deployment":
   459  		return r.updateRelatedDeployment(ctx, trait, nil, nil, &child, log)
   460  	case "StatefulSet":
   461  		return r.updateRelatedStatefulSet(ctx, trait, nil, nil, &child, log)
   462  	case "Pod":
   463  		return r.updateRelatedPod(ctx, trait, nil, nil, &child, log)
   464  	default:
   465  		// Return a NotFoundError to cause removal the resource relation from the status.
   466  		log.Debugf("Skip delete or update of metrics source of unknown kind %s", rel.Kind)
   467  		return rel, controllerutil.OperationResultNone, apierrors.NewNotFound(schema.GroupResource{Group: rel.APIVersion, Resource: rel.Kind}, rel.Name)
   468  	}
   469  }
   470  
   471  // deleteOrUpdateScraperConfigMap cleans up a scraper (i.e. Prometheus) configmap.
   472  // The scraper config for the trait is removed if present.
   473  func (r *Reconciler) deleteOrUpdateScraperConfigMap(ctx context.Context, trait *vzapi.MetricsTrait, rel vzapi.QualifiedResourceRelation, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) {
   474  	deployment := &k8sapps.Deployment{}
   475  	err := r.Get(ctx, client.ObjectKey{Namespace: rel.Namespace, Name: rel.Name}, deployment)
   476  	if err != nil {
   477  		return rel, controllerutil.OperationResultNone, client.IgnoreNotFound(err)
   478  	}
   479  	return r.updatePrometheusScraperConfigMap(ctx, trait, nil, nil, deployment, log)
   480  }
   481  
   482  // updatePrometheusScraperConfigMap updates the Prometheus scraper configmap.
   483  // This updates only the scrape_configs section of the Prometheus configmap.
   484  // Only the rules for the provided trait will be affected.
   485  // trait - The trait to update scrape_config rules for.
   486  // traitDefaults - Default to use for values not provided in the trait.
   487  // deployment - The Prometheus deployment.
   488  func (r *Reconciler) updatePrometheusScraperConfigMap(ctx context.Context, trait *vzapi.MetricsTrait, workload *unstructured.Unstructured, traitDefaults *vzapi.MetricsTraitSpec, deployment *k8sapps.Deployment, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) {
   489  	rel := vzapi.QualifiedResourceRelation{APIVersion: deployment.APIVersion, Kind: deployment.Kind, Name: deployment.Name, Namespace: deployment.Namespace, Role: scraperRole}
   490  
   491  	// Fetch the secret by name if it is provided in either the trait or the trait defaults.
   492  	secret, err := fetchSourceCredentialsSecretIfRequired(ctx, trait, traitDefaults, workload, r.Client)
   493  	if err != nil {
   494  		return rel, controllerutil.OperationResultNone, err
   495  	}
   496  
   497  	configmapName, err := r.findPrometheusScrapeConfigMapNameFromDeployment(deployment, log)
   498  	if err != nil {
   499  		return rel, controllerutil.OperationResultNone, err
   500  	}
   501  
   502  	configmap := &k8score.ConfigMap{}
   503  	err = r.Get(ctx, client.ObjectKey{Namespace: deployment.Namespace, Name: configmapName}, configmap)
   504  	if err != nil {
   505  		// Don't create the config map if it doesn't already exist - that is the sole responsibility of
   506  		// the Verrazzano Monitoring Operator
   507  		return rel, controllerutil.OperationResultNone, client.IgnoreNotFound(err)
   508  	}
   509  
   510  	existingConfigmap := configmap.DeepCopyObject()
   511  
   512  	if configmap.CreationTimestamp.IsZero() {
   513  		log.Debugf("Create Prometheus configmap %s", vznav.GetNamespacedNameFromObjectMeta(configmap.ObjectMeta))
   514  	} else {
   515  		log.Debugf("Update Prometheus configmap %s", vznav.GetNamespacedNameFromObjectMeta(configmap.ObjectMeta))
   516  	}
   517  	yamlStr, exists := configmap.Data[prometheusConfigKey]
   518  	if !exists {
   519  		yamlStr = ""
   520  	}
   521  	prometheusConf, err := parseYAMLString(yamlStr)
   522  	if err != nil {
   523  		return rel, controllerutil.OperationResultNone, err
   524  	}
   525  	prometheusConf, err = mutatePrometheusScrapeConfig(ctx, trait, traitDefaults, prometheusConf, secret, workload, r.Client)
   526  	if err != nil {
   527  		return rel, controllerutil.OperationResultNone, err
   528  	}
   529  	yamlStr, err = writeYAMLString(prometheusConf)
   530  	if err != nil {
   531  		return rel, controllerutil.OperationResultNone, err
   532  	}
   533  	if configmap.Data == nil {
   534  		configmap.Data = map[string]string{}
   535  	}
   536  	configmap.Data[prometheusConfigKey] = yamlStr
   537  
   538  	// compare and don't update if unchanged
   539  	if equality.Semantic.DeepEqual(existingConfigmap, configmap) {
   540  		return rel, controllerutil.OperationResultNone, nil
   541  	}
   542  
   543  	err = r.Update(ctx, configmap)
   544  	// If the Prometheus configmap was updated, the VMI Prometheus has ConfigReloader sidecar to signal Prometheus to reload config
   545  	if err != nil {
   546  		return rel, controllerutil.OperationResultNone, err
   547  	}
   548  	return rel, controllerutil.OperationResultUpdated, nil
   549  }
   550  
   551  // isLegacyPrometheusScraper returns true if the scraper is the legacy VMO-managed Prometheus.
   552  func (r *Reconciler) isLegacyPrometheusScraper(trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec) bool {
   553  	scraperRef := trait.Spec.Scraper
   554  	if scraperRef == nil {
   555  		scraperRef = traitDefaults.Scraper
   556  	}
   557  	return *scraperRef == constants.DefaultScraperName
   558  }
   559  
   560  // fetchPrometheusDeploymentFromTrait fetches the Prometheus deployment from information in the trait.
   561  func (r *Reconciler) fetchPrometheusDeploymentFromTrait(ctx context.Context, trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec, log vzlog2.VerrazzanoLogger) (*k8sapps.Deployment, error) {
   562  	scraperRef := trait.Spec.Scraper
   563  	if scraperRef == nil {
   564  		scraperRef = traitDefaults.Scraper
   565  	}
   566  	scraperName, err := vznav.ParseNamespacedNameFromQualifiedName(*scraperRef)
   567  	if err != nil {
   568  		return nil, err
   569  	}
   570  	deployment := &k8sapps.Deployment{}
   571  	err = r.Get(ctx, client.ObjectKey{Namespace: scraperName.Namespace, Name: scraperName.Name}, deployment)
   572  	if err != nil {
   573  		return nil, err
   574  	}
   575  	log.Debugf("Found Prometheus deployment %s", vznav.GetNamespacedNameFromObjectMeta(deployment.ObjectMeta))
   576  	return deployment, nil
   577  }
   578  
   579  // findPrometheusScrapeConfigMapNameFromDeployment finds the Prometheus configmap name from the Prometheus deployment.
   580  func (r *Reconciler) findPrometheusScrapeConfigMapNameFromDeployment(deployment *k8sapps.Deployment, log vzlog2.VerrazzanoLogger) (string, error) {
   581  	volumes := deployment.Spec.Template.Spec.Volumes
   582  	for _, volume := range volumes {
   583  		if volume.Name == "config-volume" && volume.ConfigMap != nil && len(volume.ConfigMap.Name) > 0 {
   584  			name := volume.ConfigMap.Name
   585  			log.Debugf("Found Prometheus configmap name %s", name)
   586  			return name, nil
   587  		}
   588  	}
   589  	return "", fmt.Errorf("failed to find Prometheus configmap name from deployment %s", vznav.GetNamespacedNameFromObjectMeta(deployment.ObjectMeta))
   590  }
   591  
   592  // updateTraitStatus updates the trait's status conditions and resources if they have changed.
   593  // The return value can be used as the result of the Reconcile method.
   594  func (r *Reconciler) updateTraitStatus(ctx context.Context, trait *vzapi.MetricsTrait, results *reconcileresults.ReconcileResults, log vzlog2.VerrazzanoLogger) (reconcile.Result, error) {
   595  	name := vznav.GetNamespacedNameFromObjectMeta(trait.ObjectMeta)
   596  
   597  	// If the status content has changed persist the updated status.
   598  	if trait.DeletionTimestamp.IsZero() && updateStatusIfRequired(&trait.Status, results) {
   599  		err := r.Status().Update(ctx, trait)
   600  		if err != nil {
   601  			return vzlog.IgnoreConflictWithLog(fmt.Sprintf("Failed to update metrics trait %s status", name.Name), err, zap.S())
   602  		}
   603  		log.Debugf("Updated metrics trait %s status", name.Name)
   604  	}
   605  
   606  	// If the results contained errors then requeue immediately.
   607  	if results.ContainsErrors() {
   608  		vzlog.ResultErrorsWithLog(fmt.Sprintf("Failed to reconcile metrics trait %s", name), results.Errors, zap.S())
   609  		return reconcile.Result{Requeue: true}, nil
   610  	}
   611  
   612  	// If the status has not change and there are no errors
   613  	// requeue with a jittered delay to account for situations where a workload
   614  	// changes but without necessarily updating the trait spec.
   615  	var seconds = rand.IntnRange(45, 90)
   616  	var duration = time.Duration(seconds) * time.Second
   617  	log.Debugf("Reconciled metrics trait %s successfully", name.Name)
   618  	return reconcile.Result{Requeue: true, RequeueAfter: duration}, nil
   619  }
   620  
   621  // updateStatusIfRequired updates the traits status (i.e. resources and conditions) if they have changed.
   622  // Returns a boolean indicating if status resources or conditions have been updated.
   623  func updateStatusIfRequired(status *vzapi.MetricsTraitStatus, results *reconcileresults.ReconcileResults) bool {
   624  	updated := false
   625  	if !vzapi.QualifiedResourceRelationSlicesEquivalent(status.Resources, results.Relations) {
   626  		for i, relation := range results.Relations {
   627  			if !vzapi.QualifiedResourceRelationsContain(status.Resources, &results.Relations[i]) {
   628  				status.Resources = append(status.Resources, relation)
   629  			}
   630  		}
   631  		updated = true
   632  	}
   633  	conditionedStatus := results.CreateConditionedStatus()
   634  	if !reconcileresults.ConditionedStatusEquivalent(&status.ConditionedStatus, &conditionedStatus) {
   635  		status.ConditionedStatus = conditionedStatus
   636  		updated = true
   637  	}
   638  	return updated
   639  }
   640  
   641  // mutatePrometheusScrapeConfig mutates the Prometheus scrape configuration.
   642  // Scrap configuration rules will be added, updated, deleted depending on the state of the trait.
   643  func mutatePrometheusScrapeConfig(ctx context.Context, trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec, prometheusScrapeConfig *gabs.Container, secret *k8score.Secret, workload *unstructured.Unstructured, c client.Client) (*gabs.Container, error) {
   644  	ports := trait.Spec.Ports
   645  	if len(ports) == 0 {
   646  		// create a port spec from the existing port
   647  		ports = []vzapi.PortSpec{{Port: trait.Spec.Port, Path: trait.Spec.Path}}
   648  	} else {
   649  		// if there are existing ports and a port/path setting, add the latter to the ports
   650  		if trait.Spec.Port != nil {
   651  			// add the port to the ports
   652  			path := trait.Spec.Path
   653  			if path == nil {
   654  				path = traitDefaults.Path
   655  			}
   656  			portSpec := vzapi.PortSpec{
   657  				Port: trait.Spec.Port,
   658  				Path: path,
   659  			}
   660  			ports = append(ports, portSpec)
   661  		}
   662  	}
   663  
   664  	for i := range ports {
   665  		oldScrapeConfigs := prometheusScrapeConfig.Search(prometheusScrapeConfigsLabel).Children()
   666  		prometheusScrapeConfig.Array(prometheusScrapeConfigsLabel) // zero out the array of scrape configs
   667  		newScrapeJob, newScrapeConfig, err := createScrapeConfigFromTrait(ctx, trait, i, secret, workload, c)
   668  		if err != nil {
   669  			return prometheusScrapeConfig, err
   670  		}
   671  		existingReplaced := false
   672  		for _, oldScrapeConfig := range oldScrapeConfigs {
   673  			oldScrapeJob := oldScrapeConfig.Search(vzconst.PrometheusJobNameKey).Data()
   674  			if newScrapeJob == oldScrapeJob {
   675  				// If the scrape config should be removed then skip adding it to the result slice.
   676  				// This will occur in three situations.
   677  				// 1. The trait is being deleted.
   678  				// 2. The trait scraper has been changed and the old scrape config is being updated.
   679  				//    In this case the traitDefaults and newScrapeConfig will be nil.
   680  				// 3. The trait is being disabled.
   681  				if trait.DeletionTimestamp.IsZero() && traitDefaults != nil && newScrapeConfig != nil && isEnabled(trait) {
   682  					prometheusScrapeConfig.ArrayAppendP(newScrapeConfig.Data(), prometheusScrapeConfigsLabel)
   683  				}
   684  				existingReplaced = true
   685  			} else {
   686  				prometheusScrapeConfig.ArrayAppendP(oldScrapeConfig.Data(), prometheusScrapeConfigsLabel)
   687  			}
   688  		}
   689  		// If an existing config was not replaced and there is new config (i.e. newScrapeConfig != nil) then add the new config.
   690  		if !existingReplaced && newScrapeConfig != nil {
   691  			prometheusScrapeConfig.ArrayAppendP(newScrapeConfig.Data(), prometheusScrapeConfigsLabel)
   692  		}
   693  	}
   694  	return prometheusScrapeConfig, nil
   695  }
   696  
   697  // MutateAnnotations mutates annotations with values used by the scraper config.
   698  // Annotations are either set or removed depending on the state of the trait.
   699  func MutateAnnotations(trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec, annotations map[string]string) map[string]string {
   700  	mutated := annotations
   701  
   702  	ports := trait.Spec.Ports
   703  	if len(ports) == 0 {
   704  		// create a port spec from the existing port
   705  		ports = []vzapi.PortSpec{{Port: trait.Spec.Port, Path: trait.Spec.Path}}
   706  	} else {
   707  		// if there are existing ports and a port/path setting, add the latter to the ports
   708  		if trait.Spec.Port != nil {
   709  			// add the port to the ports
   710  			path := trait.Spec.Path
   711  			if path == nil {
   712  				path = traitDefaults.Path
   713  			}
   714  			portSpec := vzapi.PortSpec{
   715  				Port: trait.Spec.Port,
   716  				Path: path,
   717  			}
   718  			ports = append(ports, portSpec)
   719  		}
   720  	}
   721  
   722  	// If the trait is being deleted or disabled, remove the annotations.
   723  	if !trait.DeletionTimestamp.IsZero() || !isEnabled(trait) {
   724  		for k := range mutated {
   725  			if strings.HasPrefix(k, verrazzanoMetricsAnnotationPrefix) {
   726  				delete(mutated, k)
   727  			}
   728  		}
   729  		return mutated
   730  	}
   731  
   732  	// Merge trait, default and existing value.
   733  	var found bool
   734  	var port string
   735  	for i, portSpec := range ports {
   736  
   737  		mutated = updateStringMap(mutated, formatMetric(verrazzanoMetricsEnabledAnnotation, i), strconv.FormatBool(true))
   738  
   739  		if portSpec.Port != nil {
   740  			port = strconv.Itoa(*portSpec.Port)
   741  		} else {
   742  			port, found = annotations[prometheusPortAnnotation]
   743  			if !found {
   744  				port = strconv.Itoa(*traitDefaults.Ports[0].Port)
   745  			}
   746  		}
   747  		mutated = updateStringMap(mutated, formatMetric(verrazzanoMetricsPortAnnotation, i), port)
   748  
   749  		// Merge trait, default and existing value.
   750  		var path string
   751  		if portSpec.Path != nil {
   752  			path = *portSpec.Path
   753  		} else {
   754  			path, found = annotations[prometheusPathAnnotation]
   755  			if !found {
   756  				if traitDefaults.Ports[0].Path != nil {
   757  					path = *traitDefaults.Ports[0].Path
   758  				}
   759  			}
   760  		}
   761  		mutated = updateStringMap(mutated, formatMetric(verrazzanoMetricsPathAnnotation, i), path)
   762  	}
   763  
   764  	return mutated
   765  }
   766  
   767  func formatMetric(format string, i int) string {
   768  	suffix := ""
   769  	if i > 0 {
   770  		suffix = strconv.Itoa(i)
   771  	}
   772  	return fmt.Sprintf(format, suffix)
   773  }
   774  
   775  // MutateLabels mutates the labels associated with a related resources.
   776  func MutateLabels(trait *vzapi.MetricsTrait, workload *unstructured.Unstructured, labels map[string]string) map[string]string {
   777  	mutated := labels
   778  	// If the trait is not being deleted, copy specific labels from the trait.
   779  	if trait.DeletionTimestamp.IsZero() {
   780  		mutated = copyStringMapEntries(mutated, trait.Labels, oam.LabelAppName, oam.LabelAppComponent)
   781  	}
   782  	return mutated
   783  }
   784  
   785  // createPrometheusScrapeConfigMapJobName creates a Prometheus scrape configmap job name from a trait.
   786  // Format is {oam_app}_{cluster}_{namespace}_{oam_comp}
   787  func createPrometheusScrapeConfigMapJobName(trait *vzapi.MetricsTrait, portNum int) (string, error) {
   788  	return createJobOrServiceMonitorName(trait, portNum)
   789  }
   790  
   791  // createScrapeConfigFromTrait creates Prometheus scrape config for a trait.
   792  // This populates the Prometheus scrape config template.
   793  // The job name is returned.
   794  // The YAML container populated from the Prometheus scrape config template is returned.
   795  func createScrapeConfigFromTrait(ctx context.Context, trait *vzapi.MetricsTrait, portIncrement int, secret *k8score.Secret, workload *unstructured.Unstructured, c client.Client) (string, *gabs.Container, error) {
   796  	job, err := createPrometheusScrapeConfigMapJobName(trait, portIncrement)
   797  	if err != nil {
   798  		return "", nil, err
   799  	}
   800  
   801  	// If the metricsTrait is being disabled then return nil for the config
   802  	if !isEnabled(trait) {
   803  		return job, nil, nil
   804  	}
   805  
   806  	// If workload is nil then the trait is being deleted so no config is required
   807  	if workload != nil {
   808  		// Populate the Prometheus scrape config template
   809  		portOrderStr := ""
   810  		if portIncrement > 0 {
   811  			portOrderStr = strconv.Itoa(portIncrement)
   812  		}
   813  		context := map[string]string{
   814  			appNameHolder:       trait.Labels[oam.LabelAppName],
   815  			compNameHolder:      trait.Labels[oam.LabelAppComponent],
   816  			jobNameHolder:       job,
   817  			portOrderHolder:     portOrderStr,
   818  			namespaceHolder:     trait.Namespace,
   819  			sslProtocolHolder:   httpProtocol,
   820  			vzClusterNameHolder: clusters.GetClusterName(ctx, c)}
   821  
   822  		var configTemplate string
   823  		https, err := useHTTPSForScrapeTarget(ctx, c, trait)
   824  		if err != nil {
   825  			return "", nil, err
   826  		}
   827  
   828  		if https {
   829  			context[sslProtocolHolder] = httpsProtocol
   830  		}
   831  		configTemplate = prometheusScrapeConfigTemplate
   832  
   833  		wlsWorkload, err := isWLSWorkload(workload)
   834  		if err != nil {
   835  			return "", nil, err
   836  		}
   837  		if wlsWorkload {
   838  			configTemplate = prometheusWLSScrapeConfigTemplate
   839  		}
   840  
   841  		// Populate the Prometheus scrape config template
   842  		template := mergeTemplateWithContext(configTemplate, context)
   843  
   844  		// Parse the populate the Prometheus scrape config template.
   845  		config, err := parseYAMLString(template)
   846  		if err != nil {
   847  			return job, nil, fmt.Errorf("failed to parse built-in Prometheus scrape config template: %w", err)
   848  		}
   849  		// Add basic auth credentials if provided
   850  		if secret != nil {
   851  			username, secretFound := secret.Data["username"]
   852  			if secretFound {
   853  				config.Set(string(username), basicAuthLabel, basicAuthUsernameLabel)
   854  			}
   855  			password, passwordFound := secret.Data["password"]
   856  			if passwordFound {
   857  				config.Set(string(password), basicAuthLabel, basicPathPasswordLabel)
   858  			}
   859  		}
   860  		return job, config, nil
   861  	}
   862  
   863  	// If the trait is being deleted (i.e. workload==nil) then no config is required.
   864  	return job, nil, nil
   865  }
   866  
   867  // removedTraitReferencesFromOwner removes traits from components of owner ApplicationConfiguration.
   868  func (r *Reconciler) removedTraitReferencesFromOwner(ctx context.Context, ownerRef *metav1.OwnerReference, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) {
   869  	rel := vzapi.QualifiedResourceRelation{APIVersion: "core.oam.dev/v1alpha2", Kind: "ApplicationConfiguration", Namespace: trait.GetNamespace(), Name: ownerRef.Name, Role: ownerRole}
   870  	var appConfig oamv1.ApplicationConfiguration
   871  	err := r.Client.Get(ctx, types.NamespacedName{Namespace: trait.GetNamespace(), Name: ownerRef.Name}, &appConfig)
   872  	if err != nil {
   873  		log.Debugf("Unable to fetch ApplicationConfiguration %s/%s, error: %v", trait.GetNamespace(), ownerRef.Name, err)
   874  		return rel, controllerutil.OperationResultNone, err
   875  	}
   876  
   877  	if appConfig.Spec.Components != nil {
   878  		traitsRemoved := false
   879  		for i := range appConfig.Spec.Components {
   880  			component := &appConfig.Spec.Components[i]
   881  			if component.Traits != nil {
   882  				remainingTraits := []oamv1.ComponentTrait{}
   883  				for _, componentTrait := range component.Traits {
   884  					remainingTraits = append(remainingTraits, componentTrait)
   885  					componentTraitUnstructured, err := vznav.ConvertRawExtensionToUnstructured(&componentTrait.Trait)
   886  					if err != nil || componentTraitUnstructured == nil {
   887  						log.Debugf("Unable to convert trait for component: %s of application configuration: %s/%s, error: %v", component.ComponentName, appConfig.GetNamespace(), appConfig.GetName(), err)
   888  					} else {
   889  						if componentTraitUnstructured.GetAPIVersion() == trait.APIVersion && componentTraitUnstructured.GetKind() == trait.Kind {
   890  							if compName, ok := trait.Labels[oam.LabelAppComponent]; ok && compName == component.ComponentName {
   891  								log.Infof("Removing trait %s/%s for component: %s of application configuration: %s/%s", componentTraitUnstructured.GetAPIVersion(), componentTraitUnstructured.GetKind(), component.ComponentName, appConfig.GetNamespace(), appConfig.GetName())
   892  								remainingTraits = remainingTraits[:len(remainingTraits)-1]
   893  							}
   894  						}
   895  					}
   896  				}
   897  				if len(remainingTraits) < len(component.Traits) {
   898  					component.Traits = remainingTraits
   899  					traitsRemoved = true
   900  				}
   901  			}
   902  		}
   903  		if traitsRemoved {
   904  			log.Infof("Updating ApplicationConfiguration %s/%s", trait.GetNamespace(), ownerRef.Name)
   905  			err = r.Client.Update(ctx, &appConfig)
   906  			if err != nil {
   907  				log.Infof("Unable to update ApplicationConfiguration %s/%s, error: %v", trait.GetNamespace(), ownerRef.Name, err)
   908  				return rel, controllerutil.OperationResultNone, err
   909  			}
   910  
   911  			return rel, controllerutil.OperationResultUpdated, err
   912  		}
   913  	}
   914  	return rel, controllerutil.OperationResultNone, nil
   915  }