github.com/verrazzano/verrazzano@v1.7.1/application-operator/controllers/metricstrait/metricstrait_controller.go (about) 1 // Copyright (c) 2020, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package metricstrait 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "strconv" 11 "strings" 12 "time" 13 14 gabs "github.com/Jeffail/gabs/v2" 15 oamv1 "github.com/crossplane/oam-kubernetes-runtime/apis/core/v1alpha2" 16 "github.com/crossplane/oam-kubernetes-runtime/pkg/oam" 17 promoperapi "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" 18 vzapi "github.com/verrazzano/verrazzano/application-operator/apis/oam/v1alpha1" 19 "github.com/verrazzano/verrazzano/application-operator/constants" 20 "github.com/verrazzano/verrazzano/application-operator/controllers/clusters" 21 vznav "github.com/verrazzano/verrazzano/application-operator/controllers/navigation" 22 "github.com/verrazzano/verrazzano/application-operator/controllers/reconcileresults" 23 vzconst "github.com/verrazzano/verrazzano/pkg/constants" 24 vzlog "github.com/verrazzano/verrazzano/pkg/log" 25 vzlog2 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 26 vzstring "github.com/verrazzano/verrazzano/pkg/string" 27 "go.uber.org/zap" 28 k8sapps "k8s.io/api/apps/v1" 29 k8score "k8s.io/api/core/v1" 30 "k8s.io/apimachinery/pkg/api/equality" 31 apierrors "k8s.io/apimachinery/pkg/api/errors" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 34 "k8s.io/apimachinery/pkg/runtime" 35 "k8s.io/apimachinery/pkg/runtime/schema" 36 "k8s.io/apimachinery/pkg/types" 37 "k8s.io/apimachinery/pkg/util/rand" 38 ctrl "sigs.k8s.io/controller-runtime" 39 "sigs.k8s.io/controller-runtime/pkg/client" 40 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 41 "sigs.k8s.io/controller-runtime/pkg/reconcile" 42 ) 43 44 const ( 45 // Kubernetes resource Kinds 46 deploymentKind = "Deployment" 47 serviceKind = "Service" 48 statefulSetKind = "StatefulSet" 49 podKind = "Pod" 50 controllerName = "metricstrait" 51 52 // In code defaults for metrics trait configuration 53 defaultWLSAdminScrapePort = 7001 54 defaultCohScrapePort = 9612 55 defaultScrapePort = 8080 56 defaultScrapePath = "/metrics" 57 defaultWLSScrapePath = "/wls-exporter/metrics" 58 59 // The finalizer name used by this controller 60 finalizerName = "metricstrait.finalizers.verrazzano.io" 61 62 // Markers used during the processing of Prometheus scrape configurations 63 prometheusConfigKey = "prometheus.yml" 64 prometheusScrapeConfigsLabel = "scrape_configs" 65 prometheusClusterNameLabel = "verrazzano_cluster" 66 67 // Annotation names for metrics read by the controller 68 prometheusPortAnnotation = "prometheus.io/port" 69 prometheusPathAnnotation = "prometheus.io/path" 70 71 // Annotation names for metrics set by the controller 72 verrazzanoMetricsAnnotationPrefix = "verrazzano.io/metrics" 73 verrazzanoMetricsPortAnnotation = "verrazzano.io/metricsPort%s" 74 verrazzanoMetricsPathAnnotation = "verrazzano.io/metricsPath%s" 75 verrazzanoMetricsEnabledAnnotation = "verrazzano.io/metricsEnabled%s" 76 77 // basicAuthLabel config label for Prometheus basic auth 78 basicAuthLabel = "basic_auth" 79 // basicAuthUsernameLabel config label for Prometheus username 80 basicAuthUsernameLabel = "username" 81 // basicPathPasswordLabel config label for Prometheus password 82 basicPathPasswordLabel = "password" 83 84 // Template placeholders for the Prometheus scrape config template 85 appNameHolder = "##APP_NAME##" 86 compNameHolder = "##COMP_NAME##" 87 jobNameHolder = "##JOB_NAME##" 88 portOrderHolder = "##PORT_ORDER##" 89 namespaceHolder = "##NAMESPACE##" 90 sslProtocolHolder = "##SSL_PROTOCOL##" 91 vzClusterNameHolder = "##VERRAZZANO_CLUSTER_NAME##" 92 93 // Roles for use in qualified resource relations 94 scraperRole = "scraper" 95 sourceRole = "source" 96 ownerRole = "owner" 97 98 // SSL protocol scrape parameters for Istio enabled MTLS components 99 httpsProtocol = `scheme: https 100 tls_config: 101 ca_file: /etc/istio-certs/root-cert.pem 102 cert_file: /etc/istio-certs/cert-chain.pem 103 key_file: /etc/istio-certs/key.pem 104 insecure_skip_verify: true # Prometheus does not support Istio security naming, thus skip verifying target pod certificate` 105 httpProtocol = "scheme: http" 106 ) 107 108 // prometheusScrapeConfigTemplate configuration for general Prometheus scrape target template 109 // Used to add new scrape config to a Prometheus configmap 110 const prometheusScrapeConfigTemplate = vzconst.PrometheusJobNameKey + `: ##JOB_NAME## 111 ##SSL_PROTOCOL## 112 kubernetes_sd_configs: 113 - role: pod 114 namespaces: 115 names: 116 - ##NAMESPACE## 117 enableHttp2: false 118 relabel_configs: 119 - action: replace 120 source_labels: null 121 target_label: ` + prometheusClusterNameLabel + ` 122 replacement: ##VERRAZZANO_CLUSTER_NAME## 123 - action: keep 124 source_labels: [__meta_kubernetes_pod_annotation_verrazzano_io_metricsEnabled##PORT_ORDER##,__meta_kubernetes_pod_label_app_oam_dev_name,__meta_kubernetes_pod_label_app_oam_dev_component] 125 regex: true;##APP_NAME##;##COMP_NAME## 126 - action: replace 127 source_labels: [__meta_kubernetes_pod_annotation_verrazzano_io_metricsPath##PORT_ORDER##] 128 target_label: __metrics_path__ 129 regex: (.+) 130 - action: replace 131 source_labels: [__address__, __meta_kubernetes_pod_annotation_verrazzano_io_metricsPort##PORT_ORDER##] 132 target_label: __address__ 133 regex: ([^:]+)(?::\d+)?;(\d+) 134 replacement: $1:$2 135 - action: replace 136 source_labels: [__meta_kubernetes_namespace] 137 target_label: namespace 138 regex: (.*) 139 replacement: $1 140 - action: labelmap 141 regex: __meta_kubernetes_pod_label_(.+) 142 - action: replace 143 source_labels: [__meta_kubernetes_pod_name] 144 target_label: pod_name 145 - action: labeldrop 146 regex: '(controller_revision_hash)' 147 - action: replace 148 source_labels: [name] 149 target_label: webapp 150 regex: '.*/(.*)$' 151 replacement: $1 152 ` 153 154 // prometheusWLSScrapeConfigTemplate configuration for WebLogic Prometheus scrape target template 155 // Used to add new WebLogic scrape config to a Prometheus configmap 156 const prometheusWLSScrapeConfigTemplate = vzconst.PrometheusJobNameKey + `: ##JOB_NAME## 157 ##SSL_PROTOCOL## 158 kubernetes_sd_configs: 159 - role: pod 160 namespaces: 161 names: 162 - ##NAMESPACE## 163 enableHttp2: false 164 relabel_configs: 165 - action: replace 166 source_labels: null 167 target_label: ` + prometheusClusterNameLabel + ` 168 replacement: ##VERRAZZANO_CLUSTER_NAME## 169 - action: keep 170 source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape,__meta_kubernetes_pod_label_app_oam_dev_name,__meta_kubernetes_pod_label_app_oam_dev_component] 171 regex: true;##APP_NAME##;##COMP_NAME## 172 - action: replace 173 source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 174 target_label: __metrics_path__ 175 regex: (.+) 176 - action: replace 177 source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 178 target_label: __address__ 179 regex: ([^:]+)(?::\d+)?;(\d+) 180 replacement: $1:$2 181 - action: replace 182 source_labels: [__meta_kubernetes_namespace] 183 target_label: namespace 184 regex: (.*) 185 replacement: $1 186 - action: labelmap 187 regex: __meta_kubernetes_pod_label_(.+) 188 - action: replace 189 source_labels: [__meta_kubernetes_pod_name] 190 target_label: pod_name 191 - action: labeldrop 192 regex: '(controller_revision_hash)' 193 - action: replace 194 source_labels: [name] 195 target_label: webapp 196 regex: '.*/(.*)$' 197 replacement: $1 198 ` 199 200 // Reconciler reconciles a MetricsTrait object 201 type Reconciler struct { 202 client.Client 203 Log *zap.SugaredLogger 204 Scheme *runtime.Scheme 205 Scraper string 206 } 207 208 // SetupWithManager creates a controller and adds it to the manager 209 func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { 210 return ctrl.NewControllerManagedBy(mgr). 211 For(&vzapi.MetricsTrait{}). 212 Complete(r) 213 } 214 215 // Reconcile reconciles a metrics trait with related resources 216 // +kubebuilder:rbac:groups=oam.verrazzano.io,resources=metricstraits,verbs=get;list;watch;create;update;patch;delete 217 // +kubebuilder:rbac:groups=oam.verrazzano.io,resources=metricstraits/status,verbs=get;update;patch 218 func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 219 if ctx == nil { 220 return ctrl.Result{}, errors.New("context cannot be nil") 221 } 222 223 // We do not want any resource to get reconciled if it is in namespace kube-system 224 // This is due to a bug found in OKE, it should not affect functionality of any vz operators 225 // If this is the case then return success 226 if req.Namespace == vzconst.KubeSystem { 227 log := zap.S().With(vzlog.FieldResourceNamespace, req.Namespace, vzlog.FieldResourceName, req.Name, vzlog.FieldController, controllerName) 228 log.Infof("Metrics trait resource %v should not be reconciled in kube-system namespace, ignoring", req.NamespacedName) 229 return reconcile.Result{}, nil 230 } 231 232 // Fetch the trait. 233 var err error 234 var trait *vzapi.MetricsTrait 235 trait, err = vznav.FetchTrait(ctx, r, zap.S(), req.NamespacedName) 236 if err != nil { 237 return clusters.IgnoreNotFoundWithLog(err, zap.S()) 238 } 239 if trait == nil { 240 return reconcile.Result{}, nil 241 } 242 243 log, err := clusters.GetResourceLogger("metricstrait", req.NamespacedName, trait) 244 if err != nil { 245 zap.S().Errorf("Failed to create controller logger for metrics trait resource: %v", err) 246 return clusters.NewRequeueWithDelay(), nil 247 } 248 log.Oncef("Reconciling metrics trait resource %v, generation %v", req.NamespacedName, trait.Generation) 249 250 res1, err := r.doReconcile(ctx, trait, log) 251 if err != nil { 252 return clusters.NewRequeueWithDelay(), err 253 } 254 255 // Do reconcile for the Prometheus Operator controller Prometheus instance 256 res2, err := r.doOperatorReconcile(ctx, trait, log) 257 if err != nil { 258 return clusters.NewRequeueWithDelay(), err 259 } 260 if clusters.ShouldRequeue(res1) { 261 return res1, nil 262 } 263 if clusters.ShouldRequeue(res2) { 264 return res2, nil 265 } 266 267 log.Oncef("Finished reconciling metrics trait %v", req.NamespacedName) 268 269 return ctrl.Result{}, nil 270 } 271 272 // doReconcile performs the reconciliation operations for the metrics trait 273 func (r *Reconciler) doReconcile(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (ctrl.Result, error) { 274 if trait.DeletionTimestamp.IsZero() { 275 result, supported, err := r.reconcileTraitCreateOrUpdate(ctx, trait, log) 276 if err != nil { 277 return result, err 278 } 279 if !supported { 280 // If the workload kind is not supported then delete the trait 281 log.Debugf("Deleting trait %s because workload is not supported", trait.Name) 282 err = r.Client.Delete(context.TODO(), trait, &client.DeleteOptions{}) 283 } 284 return result, err 285 } 286 return r.reconcileTraitDelete(ctx, trait, log) 287 } 288 289 // reconcileTraitDelete reconciles a metrics trait that is being deleted. 290 func (r *Reconciler) reconcileTraitDelete(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (ctrl.Result, error) { 291 status := r.deleteOrUpdateObsoleteResources(ctx, trait, &reconcileresults.ReconcileResults{}, log) 292 // Only remove the finalizer if all related resources were successfully updated. 293 if !status.ContainsErrors() { 294 if err := r.removeFinalizerIfRequired(ctx, trait, log); err != nil { 295 return clusters.NewRequeueWithDelay(), err // the caller always does a requeue if there is an error 296 } 297 } 298 return r.updateTraitStatus(ctx, trait, status, log) 299 } 300 301 // reconcileTraitCreateOrUpdate reconciles a metrics trait that is being created or updated. 302 func (r *Reconciler) reconcileTraitCreateOrUpdate(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (ctrl.Result, bool, error) { 303 var err error 304 305 // Add finalizer if required. 306 if err = r.addFinalizerIfRequired(ctx, trait, log); err != nil { 307 return reconcile.Result{}, true, err 308 } 309 310 // Fetch workload resource using information from the trait 311 var workload *unstructured.Unstructured 312 if workload, err = vznav.FetchWorkloadFromTrait(ctx, r, log, trait); err != nil || workload == nil { 313 return reconcile.Result{}, true, err 314 } 315 316 // Resolve trait defaults from the trait and the workload. 317 var traitDefaults *vzapi.MetricsTraitSpec 318 var supported bool 319 traitDefaults, supported, err = r.fetchTraitDefaults(ctx, workload, log) 320 if err != nil { 321 return reconcile.Result{}, supported, err 322 } 323 if traitDefaults == nil || !supported { 324 return reconcile.Result{Requeue: false}, supported, nil 325 } 326 327 // If the legacy Prometheus instance is the scraper, do not attempt to update scrape config, a ServiceMonitor will be 328 // created instead. 329 if r.isLegacyPrometheusScraper(trait, traitDefaults) { 330 return reconcile.Result{}, true, nil 331 } 332 333 var scraper *k8sapps.Deployment 334 if scraper, err = r.fetchPrometheusDeploymentFromTrait(ctx, trait, traitDefaults, log); err != nil { 335 return reconcile.Result{}, true, err 336 } 337 338 // Find the child resources of the workload based on the childResourceKinds from the 339 // workload definition, workload uid and the ownerReferences of the children. 340 var children []*unstructured.Unstructured 341 if children, err = vznav.FetchWorkloadChildren(ctx, r, log, workload); err != nil { 342 return reconcile.Result{}, true, err 343 } 344 345 // Create or update the related resources of the trait and collect the outcomes. 346 status := r.createOrUpdateRelatedResources(ctx, trait, workload, traitDefaults, scraper, children, log) 347 // Delete or update any previously (but no longer) related resources of the trait. 348 status = r.deleteOrUpdateObsoleteResources(ctx, trait, status, log) 349 350 // Update the status of the trait resource using the outcomes of the create or update. 351 traitStatus, err := r.updateTraitStatus(ctx, trait, status, log) 352 return traitStatus, true, err 353 } 354 355 // addFinalizerIfRequired adds the finalizer to the trait if required 356 // The finalizer is only added if the trait is not being deleted and the finalizer has not previously been added 357 func (r *Reconciler) addFinalizerIfRequired(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) error { 358 if trait.GetDeletionTimestamp().IsZero() && !vzstring.SliceContainsString(trait.Finalizers, finalizerName) { 359 traitName := vznav.GetNamespacedNameFromObjectMeta(trait.ObjectMeta) 360 log.Debugf("Adding finalizer from trait %s", traitName) 361 _, err := controllerutil.CreateOrUpdate(ctx, r.Client, trait, func() error { 362 trait.Finalizers = append(trait.Finalizers, finalizerName) 363 return nil 364 }) 365 if err != nil { 366 return log.ErrorfNewErr("Failed to add finalizer to trait %s: %v", traitName, err) 367 } 368 } 369 return nil 370 } 371 372 // removeFinalizerIfRequired removes the finalizer from the trait if required 373 // The finalizer is only removed if the trait is being deleted and the finalizer had been added 374 func (r *Reconciler) removeFinalizerIfRequired(ctx context.Context, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) error { 375 if !trait.DeletionTimestamp.IsZero() && vzstring.SliceContainsString(trait.Finalizers, finalizerName) { 376 traitName := vznav.GetNamespacedNameFromObjectMeta(trait.ObjectMeta) 377 log.Debugf("Removing finalizer from trait %s", traitName) 378 trait.Finalizers = vzstring.RemoveStringFromSlice(trait.Finalizers, finalizerName) 379 _, err := controllerutil.CreateOrUpdate(ctx, r.Client, trait, func() error { 380 trait.Finalizers = vzstring.RemoveStringFromSlice(trait.Finalizers, finalizerName) 381 return nil 382 }) 383 if err != nil { 384 log.Errorf("Failed to remove finalizer for trait %s: %v", traitName, err) 385 return err 386 } 387 } 388 return nil 389 } 390 391 // createOrUpdateRelatedResources creates or updates resources related to this trait 392 // The related resources are the workload children and the Prometheus config 393 func (r *Reconciler) createOrUpdateRelatedResources(ctx context.Context, trait *vzapi.MetricsTrait, workload *unstructured.Unstructured, traitDefaults *vzapi.MetricsTraitSpec, deployment *k8sapps.Deployment, children []*unstructured.Unstructured, log vzlog2.VerrazzanoLogger) *reconcileresults.ReconcileResults { 394 status := r.createOrUpdateRelatedWorkloads(ctx, trait, workload, traitDefaults, children, log) 395 status.RecordOutcome(r.updatePrometheusScraperConfigMap(ctx, trait, workload, traitDefaults, deployment, log)) 396 return status 397 } 398 399 // deleteOrUpdateObsoleteResources deletes or updates resources that should no longer be related to this trait. 400 // This includes previous scrapers when the scraper has changed. 401 // This also includes previous workload children that are no longer referenced. 402 func (r *Reconciler) deleteOrUpdateObsoleteResources(ctx context.Context, trait *vzapi.MetricsTrait, status *reconcileresults.ReconcileResults, log vzlog2.VerrazzanoLogger) *reconcileresults.ReconcileResults { 403 // For each reference in the trait status references but not in the reconcile status 404 // For references of role "scraper" attempt to remove the scrape config 405 // For references of role "source" attempt to remove the scrape annotations 406 // If the reference is not found or updated dont' add it to the reconcile status 407 // Otherwise carry the reference over in the status as an error. 408 409 log.Debugf("Deleting obsolete resources for trait: %s", trait.Name) 410 // Cleanup the relations that are in the trait status relations but not in the reconcile status. 411 update := reconcileresults.ReconcileResults{} 412 for _, rel := range trait.Status.Resources { 413 if !status.ContainsRelation(rel) { 414 switch rel.Role { 415 case scraperRole: 416 if rel.Kind == promoperapi.ServiceMonitorsKind { 417 result, err := r.deleteServiceMonitor(ctx, rel.Namespace, rel.Name, trait, log) 418 update.RecordOutcome(rel, result, err) 419 } else { 420 update.RecordOutcomeIfError(r.deleteOrUpdateScraperConfigMap(ctx, trait, rel, log)) // Need to pass down traitDefaults, current scraper or current scraper deployment 421 } 422 case sourceRole: 423 update.RecordOutcomeIfError(r.deleteOrUpdateMetricSourceResource(ctx, trait, rel, log)) 424 default: 425 // Don't record an outcome for unknown role relations. 426 log.Debugf("Skip delete or update of unknown resource role %s", rel.Role) 427 } 428 } 429 } 430 // Copy the reconcile outcomes from the current reconcile. 431 for i, rel := range status.Relations { 432 if !update.ContainsRelation(rel) { 433 update.RecordOutcome(status.Relations[i], status.Results[i], status.Errors[i]) 434 } 435 } 436 437 if !trait.DeletionTimestamp.IsZero() && trait.OwnerReferences != nil { 438 for i := range trait.OwnerReferences { 439 if trait.OwnerReferences[i].Kind == "ApplicationConfiguration" { 440 update.RecordOutcome(r.removedTraitReferencesFromOwner(ctx, &trait.OwnerReferences[i], trait, log)) 441 } 442 } 443 } 444 445 return &update 446 } 447 448 // deleteOrUpdateMetricSourceResource deletes or updates the related resources that are the source of metrics. 449 // These are the children of the workloads. For example for containerized workloads these are deployments. 450 // For WLS workloads these are pods. 451 func (r *Reconciler) deleteOrUpdateMetricSourceResource(ctx context.Context, trait *vzapi.MetricsTrait, rel vzapi.QualifiedResourceRelation, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) { 452 child := unstructured.Unstructured{} 453 child.SetAPIVersion(rel.APIVersion) 454 child.SetKind(rel.Kind) 455 child.SetNamespace(rel.Namespace) 456 child.SetName(rel.Name) 457 switch rel.Kind { 458 case "Deployment": 459 return r.updateRelatedDeployment(ctx, trait, nil, nil, &child, log) 460 case "StatefulSet": 461 return r.updateRelatedStatefulSet(ctx, trait, nil, nil, &child, log) 462 case "Pod": 463 return r.updateRelatedPod(ctx, trait, nil, nil, &child, log) 464 default: 465 // Return a NotFoundError to cause removal the resource relation from the status. 466 log.Debugf("Skip delete or update of metrics source of unknown kind %s", rel.Kind) 467 return rel, controllerutil.OperationResultNone, apierrors.NewNotFound(schema.GroupResource{Group: rel.APIVersion, Resource: rel.Kind}, rel.Name) 468 } 469 } 470 471 // deleteOrUpdateScraperConfigMap cleans up a scraper (i.e. Prometheus) configmap. 472 // The scraper config for the trait is removed if present. 473 func (r *Reconciler) deleteOrUpdateScraperConfigMap(ctx context.Context, trait *vzapi.MetricsTrait, rel vzapi.QualifiedResourceRelation, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) { 474 deployment := &k8sapps.Deployment{} 475 err := r.Get(ctx, client.ObjectKey{Namespace: rel.Namespace, Name: rel.Name}, deployment) 476 if err != nil { 477 return rel, controllerutil.OperationResultNone, client.IgnoreNotFound(err) 478 } 479 return r.updatePrometheusScraperConfigMap(ctx, trait, nil, nil, deployment, log) 480 } 481 482 // updatePrometheusScraperConfigMap updates the Prometheus scraper configmap. 483 // This updates only the scrape_configs section of the Prometheus configmap. 484 // Only the rules for the provided trait will be affected. 485 // trait - The trait to update scrape_config rules for. 486 // traitDefaults - Default to use for values not provided in the trait. 487 // deployment - The Prometheus deployment. 488 func (r *Reconciler) updatePrometheusScraperConfigMap(ctx context.Context, trait *vzapi.MetricsTrait, workload *unstructured.Unstructured, traitDefaults *vzapi.MetricsTraitSpec, deployment *k8sapps.Deployment, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) { 489 rel := vzapi.QualifiedResourceRelation{APIVersion: deployment.APIVersion, Kind: deployment.Kind, Name: deployment.Name, Namespace: deployment.Namespace, Role: scraperRole} 490 491 // Fetch the secret by name if it is provided in either the trait or the trait defaults. 492 secret, err := fetchSourceCredentialsSecretIfRequired(ctx, trait, traitDefaults, workload, r.Client) 493 if err != nil { 494 return rel, controllerutil.OperationResultNone, err 495 } 496 497 configmapName, err := r.findPrometheusScrapeConfigMapNameFromDeployment(deployment, log) 498 if err != nil { 499 return rel, controllerutil.OperationResultNone, err 500 } 501 502 configmap := &k8score.ConfigMap{} 503 err = r.Get(ctx, client.ObjectKey{Namespace: deployment.Namespace, Name: configmapName}, configmap) 504 if err != nil { 505 // Don't create the config map if it doesn't already exist - that is the sole responsibility of 506 // the Verrazzano Monitoring Operator 507 return rel, controllerutil.OperationResultNone, client.IgnoreNotFound(err) 508 } 509 510 existingConfigmap := configmap.DeepCopyObject() 511 512 if configmap.CreationTimestamp.IsZero() { 513 log.Debugf("Create Prometheus configmap %s", vznav.GetNamespacedNameFromObjectMeta(configmap.ObjectMeta)) 514 } else { 515 log.Debugf("Update Prometheus configmap %s", vznav.GetNamespacedNameFromObjectMeta(configmap.ObjectMeta)) 516 } 517 yamlStr, exists := configmap.Data[prometheusConfigKey] 518 if !exists { 519 yamlStr = "" 520 } 521 prometheusConf, err := parseYAMLString(yamlStr) 522 if err != nil { 523 return rel, controllerutil.OperationResultNone, err 524 } 525 prometheusConf, err = mutatePrometheusScrapeConfig(ctx, trait, traitDefaults, prometheusConf, secret, workload, r.Client) 526 if err != nil { 527 return rel, controllerutil.OperationResultNone, err 528 } 529 yamlStr, err = writeYAMLString(prometheusConf) 530 if err != nil { 531 return rel, controllerutil.OperationResultNone, err 532 } 533 if configmap.Data == nil { 534 configmap.Data = map[string]string{} 535 } 536 configmap.Data[prometheusConfigKey] = yamlStr 537 538 // compare and don't update if unchanged 539 if equality.Semantic.DeepEqual(existingConfigmap, configmap) { 540 return rel, controllerutil.OperationResultNone, nil 541 } 542 543 err = r.Update(ctx, configmap) 544 // If the Prometheus configmap was updated, the VMI Prometheus has ConfigReloader sidecar to signal Prometheus to reload config 545 if err != nil { 546 return rel, controllerutil.OperationResultNone, err 547 } 548 return rel, controllerutil.OperationResultUpdated, nil 549 } 550 551 // isLegacyPrometheusScraper returns true if the scraper is the legacy VMO-managed Prometheus. 552 func (r *Reconciler) isLegacyPrometheusScraper(trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec) bool { 553 scraperRef := trait.Spec.Scraper 554 if scraperRef == nil { 555 scraperRef = traitDefaults.Scraper 556 } 557 return *scraperRef == constants.DefaultScraperName 558 } 559 560 // fetchPrometheusDeploymentFromTrait fetches the Prometheus deployment from information in the trait. 561 func (r *Reconciler) fetchPrometheusDeploymentFromTrait(ctx context.Context, trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec, log vzlog2.VerrazzanoLogger) (*k8sapps.Deployment, error) { 562 scraperRef := trait.Spec.Scraper 563 if scraperRef == nil { 564 scraperRef = traitDefaults.Scraper 565 } 566 scraperName, err := vznav.ParseNamespacedNameFromQualifiedName(*scraperRef) 567 if err != nil { 568 return nil, err 569 } 570 deployment := &k8sapps.Deployment{} 571 err = r.Get(ctx, client.ObjectKey{Namespace: scraperName.Namespace, Name: scraperName.Name}, deployment) 572 if err != nil { 573 return nil, err 574 } 575 log.Debugf("Found Prometheus deployment %s", vznav.GetNamespacedNameFromObjectMeta(deployment.ObjectMeta)) 576 return deployment, nil 577 } 578 579 // findPrometheusScrapeConfigMapNameFromDeployment finds the Prometheus configmap name from the Prometheus deployment. 580 func (r *Reconciler) findPrometheusScrapeConfigMapNameFromDeployment(deployment *k8sapps.Deployment, log vzlog2.VerrazzanoLogger) (string, error) { 581 volumes := deployment.Spec.Template.Spec.Volumes 582 for _, volume := range volumes { 583 if volume.Name == "config-volume" && volume.ConfigMap != nil && len(volume.ConfigMap.Name) > 0 { 584 name := volume.ConfigMap.Name 585 log.Debugf("Found Prometheus configmap name %s", name) 586 return name, nil 587 } 588 } 589 return "", fmt.Errorf("failed to find Prometheus configmap name from deployment %s", vznav.GetNamespacedNameFromObjectMeta(deployment.ObjectMeta)) 590 } 591 592 // updateTraitStatus updates the trait's status conditions and resources if they have changed. 593 // The return value can be used as the result of the Reconcile method. 594 func (r *Reconciler) updateTraitStatus(ctx context.Context, trait *vzapi.MetricsTrait, results *reconcileresults.ReconcileResults, log vzlog2.VerrazzanoLogger) (reconcile.Result, error) { 595 name := vznav.GetNamespacedNameFromObjectMeta(trait.ObjectMeta) 596 597 // If the status content has changed persist the updated status. 598 if trait.DeletionTimestamp.IsZero() && updateStatusIfRequired(&trait.Status, results) { 599 err := r.Status().Update(ctx, trait) 600 if err != nil { 601 return vzlog.IgnoreConflictWithLog(fmt.Sprintf("Failed to update metrics trait %s status", name.Name), err, zap.S()) 602 } 603 log.Debugf("Updated metrics trait %s status", name.Name) 604 } 605 606 // If the results contained errors then requeue immediately. 607 if results.ContainsErrors() { 608 vzlog.ResultErrorsWithLog(fmt.Sprintf("Failed to reconcile metrics trait %s", name), results.Errors, zap.S()) 609 return reconcile.Result{Requeue: true}, nil 610 } 611 612 // If the status has not change and there are no errors 613 // requeue with a jittered delay to account for situations where a workload 614 // changes but without necessarily updating the trait spec. 615 var seconds = rand.IntnRange(45, 90) 616 var duration = time.Duration(seconds) * time.Second 617 log.Debugf("Reconciled metrics trait %s successfully", name.Name) 618 return reconcile.Result{Requeue: true, RequeueAfter: duration}, nil 619 } 620 621 // updateStatusIfRequired updates the traits status (i.e. resources and conditions) if they have changed. 622 // Returns a boolean indicating if status resources or conditions have been updated. 623 func updateStatusIfRequired(status *vzapi.MetricsTraitStatus, results *reconcileresults.ReconcileResults) bool { 624 updated := false 625 if !vzapi.QualifiedResourceRelationSlicesEquivalent(status.Resources, results.Relations) { 626 for i, relation := range results.Relations { 627 if !vzapi.QualifiedResourceRelationsContain(status.Resources, &results.Relations[i]) { 628 status.Resources = append(status.Resources, relation) 629 } 630 } 631 updated = true 632 } 633 conditionedStatus := results.CreateConditionedStatus() 634 if !reconcileresults.ConditionedStatusEquivalent(&status.ConditionedStatus, &conditionedStatus) { 635 status.ConditionedStatus = conditionedStatus 636 updated = true 637 } 638 return updated 639 } 640 641 // mutatePrometheusScrapeConfig mutates the Prometheus scrape configuration. 642 // Scrap configuration rules will be added, updated, deleted depending on the state of the trait. 643 func mutatePrometheusScrapeConfig(ctx context.Context, trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec, prometheusScrapeConfig *gabs.Container, secret *k8score.Secret, workload *unstructured.Unstructured, c client.Client) (*gabs.Container, error) { 644 ports := trait.Spec.Ports 645 if len(ports) == 0 { 646 // create a port spec from the existing port 647 ports = []vzapi.PortSpec{{Port: trait.Spec.Port, Path: trait.Spec.Path}} 648 } else { 649 // if there are existing ports and a port/path setting, add the latter to the ports 650 if trait.Spec.Port != nil { 651 // add the port to the ports 652 path := trait.Spec.Path 653 if path == nil { 654 path = traitDefaults.Path 655 } 656 portSpec := vzapi.PortSpec{ 657 Port: trait.Spec.Port, 658 Path: path, 659 } 660 ports = append(ports, portSpec) 661 } 662 } 663 664 for i := range ports { 665 oldScrapeConfigs := prometheusScrapeConfig.Search(prometheusScrapeConfigsLabel).Children() 666 prometheusScrapeConfig.Array(prometheusScrapeConfigsLabel) // zero out the array of scrape configs 667 newScrapeJob, newScrapeConfig, err := createScrapeConfigFromTrait(ctx, trait, i, secret, workload, c) 668 if err != nil { 669 return prometheusScrapeConfig, err 670 } 671 existingReplaced := false 672 for _, oldScrapeConfig := range oldScrapeConfigs { 673 oldScrapeJob := oldScrapeConfig.Search(vzconst.PrometheusJobNameKey).Data() 674 if newScrapeJob == oldScrapeJob { 675 // If the scrape config should be removed then skip adding it to the result slice. 676 // This will occur in three situations. 677 // 1. The trait is being deleted. 678 // 2. The trait scraper has been changed and the old scrape config is being updated. 679 // In this case the traitDefaults and newScrapeConfig will be nil. 680 // 3. The trait is being disabled. 681 if trait.DeletionTimestamp.IsZero() && traitDefaults != nil && newScrapeConfig != nil && isEnabled(trait) { 682 prometheusScrapeConfig.ArrayAppendP(newScrapeConfig.Data(), prometheusScrapeConfigsLabel) 683 } 684 existingReplaced = true 685 } else { 686 prometheusScrapeConfig.ArrayAppendP(oldScrapeConfig.Data(), prometheusScrapeConfigsLabel) 687 } 688 } 689 // If an existing config was not replaced and there is new config (i.e. newScrapeConfig != nil) then add the new config. 690 if !existingReplaced && newScrapeConfig != nil { 691 prometheusScrapeConfig.ArrayAppendP(newScrapeConfig.Data(), prometheusScrapeConfigsLabel) 692 } 693 } 694 return prometheusScrapeConfig, nil 695 } 696 697 // MutateAnnotations mutates annotations with values used by the scraper config. 698 // Annotations are either set or removed depending on the state of the trait. 699 func MutateAnnotations(trait *vzapi.MetricsTrait, traitDefaults *vzapi.MetricsTraitSpec, annotations map[string]string) map[string]string { 700 mutated := annotations 701 702 ports := trait.Spec.Ports 703 if len(ports) == 0 { 704 // create a port spec from the existing port 705 ports = []vzapi.PortSpec{{Port: trait.Spec.Port, Path: trait.Spec.Path}} 706 } else { 707 // if there are existing ports and a port/path setting, add the latter to the ports 708 if trait.Spec.Port != nil { 709 // add the port to the ports 710 path := trait.Spec.Path 711 if path == nil { 712 path = traitDefaults.Path 713 } 714 portSpec := vzapi.PortSpec{ 715 Port: trait.Spec.Port, 716 Path: path, 717 } 718 ports = append(ports, portSpec) 719 } 720 } 721 722 // If the trait is being deleted or disabled, remove the annotations. 723 if !trait.DeletionTimestamp.IsZero() || !isEnabled(trait) { 724 for k := range mutated { 725 if strings.HasPrefix(k, verrazzanoMetricsAnnotationPrefix) { 726 delete(mutated, k) 727 } 728 } 729 return mutated 730 } 731 732 // Merge trait, default and existing value. 733 var found bool 734 var port string 735 for i, portSpec := range ports { 736 737 mutated = updateStringMap(mutated, formatMetric(verrazzanoMetricsEnabledAnnotation, i), strconv.FormatBool(true)) 738 739 if portSpec.Port != nil { 740 port = strconv.Itoa(*portSpec.Port) 741 } else { 742 port, found = annotations[prometheusPortAnnotation] 743 if !found { 744 port = strconv.Itoa(*traitDefaults.Ports[0].Port) 745 } 746 } 747 mutated = updateStringMap(mutated, formatMetric(verrazzanoMetricsPortAnnotation, i), port) 748 749 // Merge trait, default and existing value. 750 var path string 751 if portSpec.Path != nil { 752 path = *portSpec.Path 753 } else { 754 path, found = annotations[prometheusPathAnnotation] 755 if !found { 756 if traitDefaults.Ports[0].Path != nil { 757 path = *traitDefaults.Ports[0].Path 758 } 759 } 760 } 761 mutated = updateStringMap(mutated, formatMetric(verrazzanoMetricsPathAnnotation, i), path) 762 } 763 764 return mutated 765 } 766 767 func formatMetric(format string, i int) string { 768 suffix := "" 769 if i > 0 { 770 suffix = strconv.Itoa(i) 771 } 772 return fmt.Sprintf(format, suffix) 773 } 774 775 // MutateLabels mutates the labels associated with a related resources. 776 func MutateLabels(trait *vzapi.MetricsTrait, workload *unstructured.Unstructured, labels map[string]string) map[string]string { 777 mutated := labels 778 // If the trait is not being deleted, copy specific labels from the trait. 779 if trait.DeletionTimestamp.IsZero() { 780 mutated = copyStringMapEntries(mutated, trait.Labels, oam.LabelAppName, oam.LabelAppComponent) 781 } 782 return mutated 783 } 784 785 // createPrometheusScrapeConfigMapJobName creates a Prometheus scrape configmap job name from a trait. 786 // Format is {oam_app}_{cluster}_{namespace}_{oam_comp} 787 func createPrometheusScrapeConfigMapJobName(trait *vzapi.MetricsTrait, portNum int) (string, error) { 788 return createJobOrServiceMonitorName(trait, portNum) 789 } 790 791 // createScrapeConfigFromTrait creates Prometheus scrape config for a trait. 792 // This populates the Prometheus scrape config template. 793 // The job name is returned. 794 // The YAML container populated from the Prometheus scrape config template is returned. 795 func createScrapeConfigFromTrait(ctx context.Context, trait *vzapi.MetricsTrait, portIncrement int, secret *k8score.Secret, workload *unstructured.Unstructured, c client.Client) (string, *gabs.Container, error) { 796 job, err := createPrometheusScrapeConfigMapJobName(trait, portIncrement) 797 if err != nil { 798 return "", nil, err 799 } 800 801 // If the metricsTrait is being disabled then return nil for the config 802 if !isEnabled(trait) { 803 return job, nil, nil 804 } 805 806 // If workload is nil then the trait is being deleted so no config is required 807 if workload != nil { 808 // Populate the Prometheus scrape config template 809 portOrderStr := "" 810 if portIncrement > 0 { 811 portOrderStr = strconv.Itoa(portIncrement) 812 } 813 context := map[string]string{ 814 appNameHolder: trait.Labels[oam.LabelAppName], 815 compNameHolder: trait.Labels[oam.LabelAppComponent], 816 jobNameHolder: job, 817 portOrderHolder: portOrderStr, 818 namespaceHolder: trait.Namespace, 819 sslProtocolHolder: httpProtocol, 820 vzClusterNameHolder: clusters.GetClusterName(ctx, c)} 821 822 var configTemplate string 823 https, err := useHTTPSForScrapeTarget(ctx, c, trait) 824 if err != nil { 825 return "", nil, err 826 } 827 828 if https { 829 context[sslProtocolHolder] = httpsProtocol 830 } 831 configTemplate = prometheusScrapeConfigTemplate 832 833 wlsWorkload, err := isWLSWorkload(workload) 834 if err != nil { 835 return "", nil, err 836 } 837 if wlsWorkload { 838 configTemplate = prometheusWLSScrapeConfigTemplate 839 } 840 841 // Populate the Prometheus scrape config template 842 template := mergeTemplateWithContext(configTemplate, context) 843 844 // Parse the populate the Prometheus scrape config template. 845 config, err := parseYAMLString(template) 846 if err != nil { 847 return job, nil, fmt.Errorf("failed to parse built-in Prometheus scrape config template: %w", err) 848 } 849 // Add basic auth credentials if provided 850 if secret != nil { 851 username, secretFound := secret.Data["username"] 852 if secretFound { 853 config.Set(string(username), basicAuthLabel, basicAuthUsernameLabel) 854 } 855 password, passwordFound := secret.Data["password"] 856 if passwordFound { 857 config.Set(string(password), basicAuthLabel, basicPathPasswordLabel) 858 } 859 } 860 return job, config, nil 861 } 862 863 // If the trait is being deleted (i.e. workload==nil) then no config is required. 864 return job, nil, nil 865 } 866 867 // removedTraitReferencesFromOwner removes traits from components of owner ApplicationConfiguration. 868 func (r *Reconciler) removedTraitReferencesFromOwner(ctx context.Context, ownerRef *metav1.OwnerReference, trait *vzapi.MetricsTrait, log vzlog2.VerrazzanoLogger) (vzapi.QualifiedResourceRelation, controllerutil.OperationResult, error) { 869 rel := vzapi.QualifiedResourceRelation{APIVersion: "core.oam.dev/v1alpha2", Kind: "ApplicationConfiguration", Namespace: trait.GetNamespace(), Name: ownerRef.Name, Role: ownerRole} 870 var appConfig oamv1.ApplicationConfiguration 871 err := r.Client.Get(ctx, types.NamespacedName{Namespace: trait.GetNamespace(), Name: ownerRef.Name}, &appConfig) 872 if err != nil { 873 log.Debugf("Unable to fetch ApplicationConfiguration %s/%s, error: %v", trait.GetNamespace(), ownerRef.Name, err) 874 return rel, controllerutil.OperationResultNone, err 875 } 876 877 if appConfig.Spec.Components != nil { 878 traitsRemoved := false 879 for i := range appConfig.Spec.Components { 880 component := &appConfig.Spec.Components[i] 881 if component.Traits != nil { 882 remainingTraits := []oamv1.ComponentTrait{} 883 for _, componentTrait := range component.Traits { 884 remainingTraits = append(remainingTraits, componentTrait) 885 componentTraitUnstructured, err := vznav.ConvertRawExtensionToUnstructured(&componentTrait.Trait) 886 if err != nil || componentTraitUnstructured == nil { 887 log.Debugf("Unable to convert trait for component: %s of application configuration: %s/%s, error: %v", component.ComponentName, appConfig.GetNamespace(), appConfig.GetName(), err) 888 } else { 889 if componentTraitUnstructured.GetAPIVersion() == trait.APIVersion && componentTraitUnstructured.GetKind() == trait.Kind { 890 if compName, ok := trait.Labels[oam.LabelAppComponent]; ok && compName == component.ComponentName { 891 log.Infof("Removing trait %s/%s for component: %s of application configuration: %s/%s", componentTraitUnstructured.GetAPIVersion(), componentTraitUnstructured.GetKind(), component.ComponentName, appConfig.GetNamespace(), appConfig.GetName()) 892 remainingTraits = remainingTraits[:len(remainingTraits)-1] 893 } 894 } 895 } 896 } 897 if len(remainingTraits) < len(component.Traits) { 898 component.Traits = remainingTraits 899 traitsRemoved = true 900 } 901 } 902 } 903 if traitsRemoved { 904 log.Infof("Updating ApplicationConfiguration %s/%s", trait.GetNamespace(), ownerRef.Name) 905 err = r.Client.Update(ctx, &appConfig) 906 if err != nil { 907 log.Infof("Unable to update ApplicationConfiguration %s/%s, error: %v", trait.GetNamespace(), ownerRef.Name, err) 908 return rel, controllerutil.OperationResultNone, err 909 } 910 911 return rel, controllerutil.OperationResultUpdated, err 912 } 913 } 914 return rel, controllerutil.OperationResultNone, nil 915 }