github.com/verrazzano/verrazzano@v1.7.1/cluster-operator/controllers/vmc/vmc_controller.go (about)

     1  // Copyright (c) 2021, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package vmc
     5  
     6  import (
     7  	"context"
     8  	goerrors "errors"
     9  	"fmt"
    10  	"time"
    11  
    12  	"github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/common"
    13  	appsv1 "k8s.io/api/apps/v1"
    14  	netv1 "k8s.io/api/networking/v1"
    15  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    16  	"k8s.io/client-go/rest"
    17  	"k8s.io/client-go/tools/clientcmd"
    18  
    19  	"github.com/verrazzano/verrazzano/pkg/k8sutil"
    20  	"github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/keycloak"
    21  	"github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/spi"
    22  
    23  	"github.com/prometheus/client_golang/prometheus"
    24  	"github.com/prometheus/client_golang/prometheus/promauto"
    25  	clustersv1alpha1 "github.com/verrazzano/verrazzano/cluster-operator/apis/clusters/v1alpha1"
    26  	vzctrl "github.com/verrazzano/verrazzano/pkg/controller"
    27  	"github.com/verrazzano/verrazzano/pkg/log/vzlog"
    28  	"github.com/verrazzano/verrazzano/pkg/rancherutil"
    29  	vzstring "github.com/verrazzano/verrazzano/pkg/string"
    30  	"github.com/verrazzano/verrazzano/platform-operator/apis/verrazzano/v1beta1"
    31  	"github.com/verrazzano/verrazzano/platform-operator/constants"
    32  	"go.uber.org/zap"
    33  	corev1 "k8s.io/api/core/v1"
    34  	rbacv1 "k8s.io/api/rbac/v1"
    35  	"k8s.io/apimachinery/pkg/api/errors"
    36  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    37  	"k8s.io/apimachinery/pkg/runtime"
    38  	"k8s.io/apimachinery/pkg/types"
    39  	ctrl "sigs.k8s.io/controller-runtime"
    40  	"sigs.k8s.io/controller-runtime/pkg/client"
    41  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    42  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    43  )
    44  
    45  const finalizerName = "managedcluster.verrazzano.io"
    46  
    47  // VerrazzanoManagedClusterReconciler reconciles a VerrazzanoManagedCluster object.
    48  // The reconciler will create a ServiceAcount, RoleBinding, and a Secret which
    49  // contains the kubeconfig to be used by the Multi-Cluster Agent to access the admin cluster.
    50  type VerrazzanoManagedClusterReconciler struct {
    51  	client.Client
    52  	Scheme             *runtime.Scheme
    53  	RancherIngressHost string
    54  	log                vzlog.VerrazzanoLogger
    55  }
    56  
    57  // bindingParams used to mutate the RoleBinding
    58  type bindingParams struct {
    59  	vmc                *clustersv1alpha1.VerrazzanoManagedCluster
    60  	roleName           string
    61  	serviceAccountName string
    62  }
    63  
    64  var (
    65  	reconcileTimeMetric = promauto.NewGauge(prometheus.GaugeOpts{
    66  		Name: "vz_cluster_operator_reconcile_vmc_duration_seconds",
    67  		Help: "The duration of the reconcile process for cluster objects",
    68  	})
    69  	reconcileErrorCount = promauto.NewCounter(prometheus.CounterOpts{
    70  		Name: "vz_cluster_operator_reconcile_vmc_error_total",
    71  		Help: "The amount of errors encountered in the reconcile process",
    72  	})
    73  	reconcileSuccessCount = promauto.NewCounter(prometheus.CounterOpts{
    74  		Name: "vz_cluster_operator_reconcile_vmc_success_total",
    75  		Help: "The number of times the reconcile process succeeded",
    76  	})
    77  )
    78  
    79  func (r *VerrazzanoManagedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    80  	// Time the reconcile process and set the metric with the elapsed time
    81  	startTime := time.Now()
    82  	defer func() { reconcileTimeMetric.Set(time.Since(startTime).Seconds()) }()
    83  
    84  	if ctx == nil {
    85  		reconcileErrorCount.Inc()
    86  		return ctrl.Result{}, goerrors.New("context cannot be nil")
    87  	}
    88  	cr := &clustersv1alpha1.VerrazzanoManagedCluster{}
    89  	if err := r.Get(context.TODO(), req.NamespacedName, cr); err != nil {
    90  		// If the resource is not found, that means all of the finalizers have been removed,
    91  		// and the Verrazzano resource has been deleted, so there is nothing left to do.
    92  		if errors.IsNotFound(err) {
    93  			reconcileSuccessCount.Inc()
    94  			return reconcile.Result{}, nil
    95  		}
    96  		reconcileErrorCount.Inc()
    97  		zap.S().Errorf("Failed to fetch VerrazzanoManagedCluster resource: %v", err)
    98  		return newRequeueWithDelay(), nil
    99  	}
   100  
   101  	// Get the resource logger needed to log message using 'progress' and 'once' methods
   102  	log, err := vzlog.EnsureResourceLogger(&vzlog.ResourceConfig{
   103  		Name:           cr.Name,
   104  		Namespace:      cr.Namespace,
   105  		ID:             string(cr.UID),
   106  		Generation:     cr.Generation,
   107  		ControllerName: "multicluster",
   108  	})
   109  	if err != nil {
   110  		reconcileErrorCount.Inc()
   111  		zap.S().Errorf("Failed to create controller logger for VerrazzanoManagedCluster controller", err)
   112  	}
   113  
   114  	r.log = log
   115  	log.Oncef("Reconciling VerrazzanoManagedCluster resource %v", req.NamespacedName)
   116  	res, err := r.doReconcile(ctx, log, cr)
   117  	if err != nil {
   118  		// Never return an error since it has already been logged and we don't want the
   119  		// controller runtime to log again (with stack trace).  Just re-queue if there is an error.
   120  		reconcileErrorCount.Inc()
   121  		return newRequeueWithDelay(), nil
   122  	}
   123  	if vzctrl.ShouldRequeue(res) {
   124  		reconcileSuccessCount.Inc()
   125  		return res, nil
   126  	}
   127  
   128  	// The resource has been reconciled.
   129  	log.Oncef("Successfully reconciled VerrazzanoManagedCluster resource %v", req.NamespacedName)
   130  
   131  	reconcileSuccessCount.Inc()
   132  	return ctrl.Result{}, nil
   133  }
   134  
   135  // Reconcile reconciles a VerrazzanoManagedCluster object
   136  func (r *VerrazzanoManagedClusterReconciler) doReconcile(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (ctrl.Result, error) {
   137  
   138  	if !vmc.ObjectMeta.DeletionTimestamp.IsZero() {
   139  		// Finalizer is present, so lets do the cluster deletion
   140  		if vzstring.SliceContainsString(vmc.ObjectMeta.Finalizers, finalizerName) {
   141  			if err := r.reconcileManagedClusterDelete(ctx, vmc); err != nil {
   142  				return reconcile.Result{}, err
   143  			}
   144  
   145  			// Remove the finalizer and update the Verrazzano resource if the deletion has finished.
   146  			log.Infof("Removing finalizer %s", finalizerName)
   147  			vmc.ObjectMeta.Finalizers = vzstring.RemoveStringFromSlice(vmc.ObjectMeta.Finalizers, finalizerName)
   148  			err := r.Update(ctx, vmc)
   149  			if err != nil && !errors.IsConflict(err) {
   150  				return reconcile.Result{}, err
   151  			}
   152  		}
   153  		return reconcile.Result{}, nil
   154  	}
   155  
   156  	// Add our finalizer if not already added
   157  	if !vzstring.SliceContainsString(vmc.ObjectMeta.Finalizers, finalizerName) {
   158  		log.Infof("Adding finalizer %s", finalizerName)
   159  		vmc.ObjectMeta.Finalizers = append(vmc.ObjectMeta.Finalizers, finalizerName)
   160  		if err := r.Update(ctx, vmc); err != nil {
   161  			return ctrl.Result{}, err
   162  		}
   163  	}
   164  
   165  	// Sync the service account
   166  	log.Debugf("Syncing the ServiceAccount for VMC %s", vmc.Name)
   167  	err := r.syncServiceAccount(vmc)
   168  	if err != nil {
   169  		r.handleError(ctx, vmc, "Failed to sync the ServiceAccount", err, log)
   170  		return newRequeueWithDelay(), err
   171  	}
   172  
   173  	log.Debugf("Syncing the RoleBinding for VMC %s", vmc.Name)
   174  	_, err = r.syncManagedRoleBinding(vmc)
   175  	if err != nil {
   176  		r.handleError(ctx, vmc, "Failed to sync the RoleBinding", err, log)
   177  		return newRequeueWithDelay(), err
   178  	}
   179  
   180  	log.Debugf("Syncing the Agent secret for VMC %s", vmc.Name)
   181  	err = r.syncAgentSecret(vmc)
   182  	if err != nil {
   183  		r.handleError(ctx, vmc, "Failed to sync the agent secret", err, log)
   184  		return newRequeueWithDelay(), err
   185  	}
   186  
   187  	log.Debugf("Syncing the Registration secret for VMC %s", vmc.Name)
   188  	err = r.syncRegistrationSecret(vmc)
   189  	if err != nil {
   190  		r.handleError(ctx, vmc, "Failed to sync the registration secret", err, log)
   191  		return newRequeueWithDelay(), err
   192  	}
   193  
   194  	rancherEnabled, err := r.isRancherEnabled()
   195  	if err != nil {
   196  		return newRequeueWithDelay(), err
   197  	}
   198  
   199  	log.Debugf("Syncing the Manifest secret for VMC %s", vmc.Name)
   200  	vzVMCWaitingForClusterID, err := r.syncManifestSecret(ctx, rancherEnabled, vmc)
   201  	if err != nil {
   202  		r.handleError(ctx, vmc, "Failed to sync the Manifest secret", err, log)
   203  		return newRequeueWithDelay(), err
   204  	}
   205  
   206  	// create/update a secret with the CA cert from the managed cluster (if any errors occur we just log and continue)
   207  	syncedCert, err := r.syncCACertSecret(ctx, vmc, rancherEnabled)
   208  	if err != nil {
   209  		msg := fmt.Sprintf("Unable to get CA cert from managed cluster %s with id %s: %v", vmc.Name, vmc.Status.RancherRegistration.ClusterID, err)
   210  		r.log.Infof(msg)
   211  		r.setStatusConditionManagedCARetrieved(vmc, corev1.ConditionFalse, msg)
   212  	} else {
   213  		if syncedCert {
   214  			r.setStatusConditionManagedCARetrieved(vmc, corev1.ConditionTrue, "Managed cluster CA cert retrieved successfully")
   215  		}
   216  	}
   217  
   218  	log.Debugf("Updating Rancher ClusterRoleBindingTemplate for VMC %s", vmc.Name)
   219  	err = r.updateRancherClusterRoleBindingTemplate(vmc)
   220  	if err != nil {
   221  		r.handleError(ctx, vmc, "Failed to update Rancher ClusterRoleBindingTemplate", err, log)
   222  		return newRequeueWithDelay(), err
   223  	}
   224  
   225  	log.Debugf("Pushing the Manifest objects for VMC %s", vmc.Name)
   226  	pushedManifest, err := r.pushManifestObjects(ctx, rancherEnabled, vmc)
   227  	if err != nil {
   228  		r.handleError(ctx, vmc, "Failed to push the Manifest objects", err, log)
   229  		r.setStatusConditionManifestPushed(vmc, corev1.ConditionFalse, fmt.Sprintf("Failed to push the manifest objects to the managed cluster: %v", err))
   230  		return newRequeueWithDelay(), err
   231  	}
   232  	if pushedManifest {
   233  		r.log.Oncef("Manifest objects have been successfully pushed to the managed cluster")
   234  		r.setStatusConditionManifestPushed(vmc, corev1.ConditionTrue, "Manifest objects pushed to the managed cluster")
   235  	}
   236  
   237  	log.Debugf("Registering ArgoCD for VMC %s", vmc.Name)
   238  	var argoCDRegistration *clustersv1alpha1.ArgoCDRegistration
   239  	argoCDEnabled, err := r.isArgoCDEnabled()
   240  	if err != nil {
   241  		return newRequeueWithDelay(), err
   242  	}
   243  	if argoCDEnabled && rancherEnabled {
   244  		argoCDRegistration, err = r.registerManagedClusterWithArgoCD(vmc)
   245  		if err != nil {
   246  			r.handleError(ctx, vmc, "Failed to register managed cluster with Argo CD", err, log)
   247  			return newRequeueWithDelay(), err
   248  		}
   249  		vmc.Status.ArgoCDRegistration = *argoCDRegistration
   250  	}
   251  	if !rancherEnabled && argoCDEnabled {
   252  		now := metav1.Now()
   253  		vmc.Status.ArgoCDRegistration = clustersv1alpha1.ArgoCDRegistration{
   254  			Status:    clustersv1alpha1.RegistrationPendingRancher,
   255  			Timestamp: &now,
   256  			Message:   "Skipping Argo CD cluster registration due to Rancher not installed"}
   257  	}
   258  
   259  	if !vzVMCWaitingForClusterID {
   260  		r.setStatusConditionReady(vmc, "Ready")
   261  		statusErr := r.updateStatus(ctx, vmc)
   262  
   263  		if statusErr != nil {
   264  			log.Errorf("Failed to update status to ready for VMC %s: %v", vmc.Name, statusErr)
   265  		}
   266  	}
   267  
   268  	if err := r.syncManagedMetrics(ctx, log, vmc); err != nil {
   269  		return newRequeueWithDelay(), err
   270  	}
   271  
   272  	log.Debugf("Creating or updating keycloak client for %s", vmc.Name)
   273  	err = r.createManagedClusterKeycloakClient(vmc)
   274  	if err != nil {
   275  		r.handleError(ctx, vmc, "Failed to create or update Keycloak client for managed cluster", err, log)
   276  		return newRequeueWithDelay(), err
   277  	}
   278  
   279  	return ctrl.Result{Requeue: true, RequeueAfter: constants.ReconcileLoopRequeueInterval}, nil
   280  }
   281  
   282  func (r *VerrazzanoManagedClusterReconciler) syncServiceAccount(vmc *clustersv1alpha1.VerrazzanoManagedCluster) error {
   283  	// Create or update the service account
   284  	_, serviceAccount, err := r.createOrUpdateServiceAccount(context.TODO(), vmc)
   285  	if err != nil {
   286  		return err
   287  	}
   288  
   289  	if len(serviceAccount.Secrets) == 0 {
   290  		_, err = r.createServiceAccountTokenSecret(context.TODO(), serviceAccount)
   291  		if err != nil {
   292  			return err
   293  		}
   294  	}
   295  
   296  	// Does the VerrazzanoManagedCluster object contain the service account name?
   297  	saName := generateManagedResourceName(vmc.Name)
   298  	if vmc.Spec.ServiceAccount != saName {
   299  		r.log.Oncef("Updating ServiceAccount from %s to %s", vmc.Spec.ServiceAccount, saName)
   300  		vmc.Spec.ServiceAccount = saName
   301  		err = r.Update(context.TODO(), vmc)
   302  		if err != nil {
   303  			return err
   304  		}
   305  	}
   306  
   307  	return nil
   308  }
   309  
   310  // Create or update the ServiceAccount for a VerrazzanoManagedCluster
   311  func (r *VerrazzanoManagedClusterReconciler) createOrUpdateServiceAccount(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (controllerutil.OperationResult, *corev1.ServiceAccount, error) {
   312  	var serviceAccount corev1.ServiceAccount
   313  	serviceAccount.Namespace = vmc.Namespace
   314  	serviceAccount.Name = generateManagedResourceName(vmc.Name)
   315  
   316  	operationResult, err := controllerutil.CreateOrUpdate(ctx, r.Client, &serviceAccount, func() error {
   317  		r.mutateServiceAccount(vmc, &serviceAccount)
   318  		// This SetControllerReference call will trigger garbage collection i.e. the serviceAccount
   319  		// will automatically get deleted when the VerrazzanoManagedCluster is deleted
   320  		return controllerutil.SetControllerReference(vmc, &serviceAccount, r.Scheme)
   321  	})
   322  	return operationResult, &serviceAccount, err
   323  }
   324  
   325  func (r *VerrazzanoManagedClusterReconciler) mutateServiceAccount(vmc *clustersv1alpha1.VerrazzanoManagedCluster, serviceAccount *corev1.ServiceAccount) {
   326  	serviceAccount.Name = generateManagedResourceName(vmc.Name)
   327  }
   328  
   329  func (r *VerrazzanoManagedClusterReconciler) createServiceAccountTokenSecret(ctx context.Context, serviceAccount *corev1.ServiceAccount) (controllerutil.OperationResult, error) {
   330  	var secret corev1.Secret
   331  	secret.Name = serviceAccount.Name + "-token"
   332  	secret.Namespace = serviceAccount.Namespace
   333  	secret.Type = corev1.SecretTypeServiceAccountToken
   334  	secret.Annotations = map[string]string{
   335  		corev1.ServiceAccountNameKey: serviceAccount.Name,
   336  	}
   337  
   338  	return controllerutil.CreateOrUpdate(ctx, r.Client, &secret, func() error {
   339  		// This SetControllerReference call will trigger garbage collection i.e. the token secret
   340  		// will automatically get deleted when the service account is deleted
   341  		return controllerutil.SetControllerReference(serviceAccount, &secret, r.Scheme)
   342  	})
   343  }
   344  
   345  // syncManagedRoleBinding syncs the RoleBinding that binds the service account used by the managed cluster
   346  // to the role containing the permission
   347  func (r *VerrazzanoManagedClusterReconciler) syncManagedRoleBinding(vmc *clustersv1alpha1.VerrazzanoManagedCluster) (controllerutil.OperationResult, error) {
   348  	var roleBinding rbacv1.RoleBinding
   349  	roleBinding.Namespace = vmc.Namespace
   350  	roleBinding.Name = generateManagedResourceName(vmc.Name)
   351  
   352  	return controllerutil.CreateOrUpdate(context.TODO(), r.Client, &roleBinding, func() error {
   353  		mutateBinding(&roleBinding, bindingParams{
   354  			vmc:                vmc,
   355  			roleName:           constants.MCClusterRole,
   356  			serviceAccountName: vmc.Spec.ServiceAccount,
   357  		})
   358  		// This SetControllerReference call will trigger garbage collection i.e. the roleBinding
   359  		// will automatically get deleted when the VerrazzanoManagedCluster is deleted
   360  		return controllerutil.SetControllerReference(vmc, &roleBinding, r.Scheme)
   361  	})
   362  }
   363  
   364  // syncMultiClusterCASecret gets the CA secret in the VMC from the managed cluster and populates the CA secret for metrics scraping
   365  func (r *VerrazzanoManagedClusterReconciler) syncMultiClusterCASecret(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (corev1.Secret, error) {
   366  	var secret corev1.Secret
   367  
   368  	// read the configuration secret specified if it exists
   369  	if len(vmc.Spec.CASecret) > 0 {
   370  		secretNsn := types.NamespacedName{
   371  			Namespace: vmc.Namespace,
   372  			Name:      vmc.Spec.CASecret,
   373  		}
   374  
   375  		// validate secret if it exists
   376  		if err := r.Get(context.TODO(), secretNsn, &secret); err != nil {
   377  			return secret, log.ErrorfNewErr("failed to fetch the managed cluster CA secret %s/%s, %v", vmc.Namespace, vmc.Spec.CASecret, err)
   378  		}
   379  	}
   380  	if err := r.mutateManagedClusterCACertsSecret(ctx, vmc, &secret); err != nil {
   381  		return secret, log.ErrorfNewErr("Failed to sync the managed cluster CA certs for VMC %s: %v", vmc.Name, err)
   382  	}
   383  	return secret, nil
   384  }
   385  
   386  // mutateManagedClusterCACertsSecret adds and removes managed cluster CA certs to/from the managed cluster CA certs secret
   387  func (r *VerrazzanoManagedClusterReconciler) mutateManagedClusterCACertsSecret(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, cacrtSecret *corev1.Secret) error {
   388  	ns := &corev1.Namespace{}
   389  	err := r.Client.Get(ctx, types.NamespacedName{Name: constants.VerrazzanoMonitoringNamespace}, ns)
   390  	if errors.IsNotFound(err) {
   391  		r.log.Infof("namespace %s does not exist", constants.VerrazzanoMonitoringNamespace)
   392  		return nil
   393  	}
   394  	secret := &corev1.Secret{
   395  		ObjectMeta: metav1.ObjectMeta{
   396  			Name:      constants.PromManagedClusterCACertsSecretName,
   397  			Namespace: constants.VerrazzanoMonitoringNamespace,
   398  		},
   399  	}
   400  
   401  	if _, err := controllerutil.CreateOrUpdate(ctx, r.Client, secret, func() error {
   402  		if secret.Data == nil {
   403  			secret.Data = make(map[string][]byte)
   404  		}
   405  		if cacrtSecret != nil && cacrtSecret.Data != nil && len(cacrtSecret.Data["cacrt"]) > 0 {
   406  			secret.Data[getCAKey(vmc)] = cacrtSecret.Data["cacrt"]
   407  		} else {
   408  			delete(secret.Data, getCAKey(vmc))
   409  		}
   410  		return nil
   411  	}); err != nil {
   412  		return err
   413  	}
   414  
   415  	return nil
   416  }
   417  
   418  // syncManagedMetrics syncs the metrics federation for managed clusters
   419  // There are currently two ways of federating metrics from managed clusters:
   420  // 1. Creating a Scrape config for the managed cluster on the admin cluster Prometheus
   421  // 2. Creating a Store in Thanos so that managed cluster metrics can be accessed by the admin cluster Query
   422  // These scenarios are mutually exclusive and the Thanos Query method takes precedence
   423  // There are two conditions that enable the Thanos query method
   424  //  1. Thanos is enabled on the managed cluster
   425  //     a. This manifests as the ThanosHost field in the VMC being populated
   426  //  2. Thanos is enabled on the managed cluster
   427  //
   428  // If these two conditions are not met, the Prometheus federation will be enabled
   429  func (r *VerrazzanoManagedClusterReconciler) syncManagedMetrics(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error {
   430  	// We need to sync the multicluster CA secret for Prometheus and Thanos
   431  	caSecret, err := r.syncMultiClusterCASecret(ctx, log, vmc)
   432  	if err != nil {
   433  		r.handleError(ctx, vmc, "Failed to sync the multicluster CA secret", err, log)
   434  	}
   435  
   436  	thanosEnabled, err := r.isThanosEnabled()
   437  	if err != nil {
   438  		r.handleError(ctx, vmc, "Failed to verify if Thanos is enabled", err, log)
   439  		return err
   440  	}
   441  	// If the Thanos multicluster requirements are met, set up the Thanos Query store
   442  	if vmc.Status.ThanosQueryStore != "" && thanosEnabled {
   443  		err = r.syncThanosQuery(ctx, vmc)
   444  		if err != nil {
   445  			r.handleError(ctx, vmc, "Failed to update Thanos Query endpoint managed cluster", err, log)
   446  			return err
   447  		}
   448  
   449  		// If we successfully sync the managed cluster Thanos Query store, we should remove the federated Prometheus to avoid duplication
   450  		r.log.Oncef("Thanos Query synced for VMC %s. Removing the Prometheus scraper", vmc.Name)
   451  		err = r.deleteClusterPrometheusConfiguration(ctx, vmc)
   452  		if err != nil {
   453  			r.handleError(ctx, vmc, "Failed to remove the Prometheus scrape config", err, log)
   454  			return err
   455  		}
   456  		return nil
   457  	}
   458  
   459  	// If Thanos multicluster is disabled, attempt to delete left over resources
   460  	err = r.syncThanosQueryEndpointDelete(ctx, vmc)
   461  	if err != nil {
   462  		r.handleError(ctx, vmc, "Failed to delete Thanos Query endpoint managed cluster", err, log)
   463  		return err
   464  	}
   465  
   466  	// If the Prometheus host is not populated, skip federation and do nothing
   467  	if vmc.Status.PrometheusHost == "" {
   468  		// If reached, the managed cluster metrics are not populated, so we should remove the CA cert from the secret
   469  		err := r.mutateManagedClusterCACertsSecret(ctx, vmc, nil)
   470  		if err != nil {
   471  			r.handleError(ctx, vmc, "Failed to delete the managed cluster CA cert from the secret", err, log)
   472  			return err
   473  		}
   474  		log.Oncef("Managed cluster Prometheus Host not found in VMC Status for VMC %s. Waiting for VMC to be registered...", vmc.Name)
   475  		return nil
   476  	}
   477  
   478  	// Sync the Prometheus Scraper if Thanos multicluster is disabled and the host is populated
   479  	log.Debugf("Syncing the prometheus scraper for VMC %s", vmc.Name)
   480  	err = r.syncPrometheusScraper(ctx, vmc, &caSecret)
   481  	if err != nil {
   482  		r.handleError(ctx, vmc, "Failed to setup the prometheus scraper for managed cluster", err, log)
   483  		return err
   484  	}
   485  
   486  	return nil
   487  }
   488  
   489  // mutateBinding mutates the RoleBinding to ensure it has the valid params
   490  func mutateBinding(binding *rbacv1.RoleBinding, p bindingParams) {
   491  	binding.Name = generateManagedResourceName(p.vmc.Name)
   492  	binding.Namespace = p.vmc.Namespace
   493  	binding.Labels = p.vmc.Labels
   494  
   495  	binding.RoleRef = rbacv1.RoleRef{
   496  		APIGroup: "rbac.authorization.k8s.io",
   497  		Kind:     "ClusterRole",
   498  		Name:     p.roleName,
   499  	}
   500  	binding.Subjects = []rbacv1.Subject{
   501  		{
   502  			Kind:      "ServiceAccount",
   503  			Name:      p.serviceAccountName,
   504  			Namespace: constants.VerrazzanoMultiClusterNamespace,
   505  		},
   506  	}
   507  }
   508  
   509  // Generate the common name used by all resources specific to a given managed cluster
   510  func generateManagedResourceName(clusterName string) string {
   511  	return fmt.Sprintf("verrazzano-cluster-%s", clusterName)
   512  }
   513  
   514  // SetupWithManager creates a new controller and adds it to the manager
   515  func (r *VerrazzanoManagedClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
   516  	return ctrl.NewControllerManagedBy(mgr).
   517  		For(&clustersv1alpha1.VerrazzanoManagedCluster{}).
   518  		Complete(r)
   519  }
   520  
   521  // reconcileManagedClusterDelete performs all necessary cleanup during cluster deletion
   522  func (r *VerrazzanoManagedClusterReconciler) reconcileManagedClusterDelete(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error {
   523  	if err := r.deleteClusterPrometheusConfiguration(ctx, vmc); err != nil {
   524  		return err
   525  	}
   526  	if err := r.unregisterClusterFromArgoCD(ctx, vmc); err != nil {
   527  		return err
   528  	}
   529  	if err := r.syncThanosQueryEndpointDelete(ctx, vmc); err != nil {
   530  		return err
   531  	}
   532  	if err := r.mutateManagedClusterCACertsSecret(ctx, vmc, nil); err != nil {
   533  		return err
   534  	}
   535  	return r.deleteClusterFromRancher(ctx, vmc)
   536  }
   537  
   538  // deleteClusterFromRancher calls the Rancher API to delete the cluster associated with the VMC if the VMC has a cluster id set in the status.
   539  func (r *VerrazzanoManagedClusterReconciler) deleteClusterFromRancher(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error {
   540  	clusterID := vmc.Status.RancherRegistration.ClusterID
   541  	if clusterID == "" {
   542  		r.log.Debugf("VMC %s/%s has no Rancher cluster id, skipping delete", vmc.Namespace, vmc.Name)
   543  		return nil
   544  	}
   545  
   546  	rc, err := rancherutil.NewAdminRancherConfig(r.Client, r.RancherIngressHost, r.log)
   547  	if err != nil {
   548  		msg := "Failed to create Rancher API client"
   549  		r.updateRancherStatus(ctx, vmc, clustersv1alpha1.DeleteFailed, clusterID, msg)
   550  		r.log.Errorf("Unable to connect to Rancher API on admin cluster while attempting delete operation: %v", err)
   551  		return err
   552  	}
   553  	if _, err = DeleteClusterFromRancher(rc, clusterID, r.log); err != nil {
   554  		msg := "Failed deleting cluster"
   555  		r.updateRancherStatus(ctx, vmc, clustersv1alpha1.DeleteFailed, clusterID, msg)
   556  		r.log.Errorf("Unable to delete Rancher cluster %s/%s: %v", vmc.Namespace, vmc.Name, err)
   557  		return err
   558  	}
   559  
   560  	return nil
   561  }
   562  
   563  func (r *VerrazzanoManagedClusterReconciler) setStatusConditionManagedCARetrieved(vmc *clustersv1alpha1.VerrazzanoManagedCluster, value corev1.ConditionStatus, msg string) {
   564  	now := metav1.Now()
   565  	r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: value, Type: clustersv1alpha1.ConditionManagedCARetrieved, Message: msg, LastTransitionTime: &now}, false)
   566  }
   567  
   568  func (r *VerrazzanoManagedClusterReconciler) setStatusConditionManifestPushed(vmc *clustersv1alpha1.VerrazzanoManagedCluster, value corev1.ConditionStatus, msg string) {
   569  	now := metav1.Now()
   570  	r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: value, Type: clustersv1alpha1.ConditionManifestPushed, Message: msg, LastTransitionTime: &now}, true)
   571  }
   572  
   573  // setStatusConditionNotReady sets the status condition Ready = false on the VMC in memory - does NOT update the status in the cluster
   574  func (r *VerrazzanoManagedClusterReconciler) setStatusConditionNotReady(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string) {
   575  	now := metav1.Now()
   576  	r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: corev1.ConditionFalse, Type: clustersv1alpha1.ConditionReady, Message: msg, LastTransitionTime: &now}, false)
   577  }
   578  
   579  // setStatusConditionReady sets the status condition Ready = true on the VMC in memory - does NOT update the status in the cluster
   580  func (r *VerrazzanoManagedClusterReconciler) setStatusConditionReady(vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string) {
   581  	now := metav1.Now()
   582  	r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: corev1.ConditionTrue, Type: clustersv1alpha1.ConditionReady, Message: msg, LastTransitionTime: &now}, false)
   583  }
   584  
   585  func (r *VerrazzanoManagedClusterReconciler) handleError(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string, err error, log vzlog.VerrazzanoLogger) {
   586  	fullMsg := fmt.Sprintf("%s: %v", msg, err)
   587  	log.ErrorfThrottled(fullMsg)
   588  	r.setStatusConditionNotReady(ctx, vmc, fullMsg)
   589  	statusErr := r.updateStatus(ctx, vmc)
   590  	if statusErr != nil {
   591  		log.ErrorfThrottled("Failed to update status for VMC %s: %v", vmc.Name, statusErr)
   592  	}
   593  }
   594  
   595  // setStatusCondition updates the VMC status conditions based and replaces already created status conditions
   596  // the onTime flag updates the status condition if the time has changed
   597  func (r *VerrazzanoManagedClusterReconciler) setStatusCondition(vmc *clustersv1alpha1.VerrazzanoManagedCluster, condition clustersv1alpha1.Condition, onTime bool) {
   598  	r.log.Debugf("Entered setStatusCondition for VMC %s for condition %s = %s, existing conditions = %v",
   599  		vmc.Name, condition.Type, condition.Status, vmc.Status.Conditions)
   600  	var matchingCondition *clustersv1alpha1.Condition
   601  	var conditionExists bool
   602  	for i, existingCondition := range vmc.Status.Conditions {
   603  		if condition.Type == existingCondition.Type &&
   604  			condition.Status == existingCondition.Status &&
   605  			condition.Message == existingCondition.Message &&
   606  			(!onTime || condition.LastTransitionTime == existingCondition.LastTransitionTime) {
   607  			// the exact same condition already exists, don't update
   608  			conditionExists = true
   609  			break
   610  		}
   611  		if condition.Type == existingCondition.Type {
   612  			// use the index here since "existingCondition" is a copy and won't point to the object in the slice
   613  			matchingCondition = &vmc.Status.Conditions[i]
   614  			break
   615  		}
   616  	}
   617  	if !conditionExists {
   618  
   619  		if matchingCondition == nil {
   620  			vmc.Status.Conditions = append(vmc.Status.Conditions, condition)
   621  		} else {
   622  			matchingCondition.Message = condition.Message
   623  			matchingCondition.Status = condition.Status
   624  			matchingCondition.LastTransitionTime = condition.LastTransitionTime
   625  		}
   626  	}
   627  }
   628  
   629  // getVerrazzanoResource gets the installed Verrazzano resource in the cluster (of which only one is expected)
   630  func (r *VerrazzanoManagedClusterReconciler) getVerrazzanoResource() (*v1beta1.Verrazzano, error) {
   631  	// Get the Verrazzano resource
   632  	verrazzano := v1beta1.VerrazzanoList{}
   633  	err := r.Client.List(context.TODO(), &verrazzano, &client.ListOptions{})
   634  	if err != nil || len(verrazzano.Items) == 0 {
   635  		return nil, r.log.ErrorfNewErr("Verrazzano must be installed: %v", err)
   636  
   637  	}
   638  	return &verrazzano.Items[0], nil
   639  }
   640  
   641  // leveraged to replace method (unit testing)
   642  var createClient = func(r *VerrazzanoManagedClusterReconciler, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error {
   643  	const prometheusHostPrefix = "prometheus.vmi.system"
   644  	promHost := vmc.Status.PrometheusHost
   645  	// Skip Keycloak client generation if Prometheus isn't present in VMC status
   646  	// MCAgent on the managed cluster will set this if/when it is ready
   647  	if len(promHost) == 0 {
   648  		r.log.Debug("Skipping Prometheus Keycloak client creation: VMC Prometheus not found")
   649  		return nil
   650  	}
   651  
   652  	// login to keycloak
   653  	cfg, cli, err := k8sutil.ClientConfig()
   654  	if err != nil {
   655  		return err
   656  	}
   657  
   658  	// create a context that can be leveraged by keycloak method
   659  	ctx, err := spi.NewMinimalContext(r.Client, r.log)
   660  	if err != nil {
   661  		return err
   662  	}
   663  
   664  	err = keycloak.LoginKeycloak(ctx, cfg, cli)
   665  	if err != nil {
   666  		return err
   667  	}
   668  
   669  	dnsSubdomain := promHost[len(prometheusHostPrefix)+1:]
   670  	clientID := fmt.Sprintf("verrazzano-%s", vmc.Name)
   671  	err = keycloak.CreateOrUpdateClient(ctx, cfg, cli, clientID, keycloak.ManagedClusterClientTmpl, keycloak.ManagedClusterClientUrisTemplate, false, &dnsSubdomain)
   672  	if err != nil {
   673  		return err
   674  	}
   675  
   676  	return nil
   677  }
   678  
   679  // createManagedClusterKeycloakClient creates a Keycloak client for the managed cluster
   680  func (r *VerrazzanoManagedClusterReconciler) createManagedClusterKeycloakClient(vmc *clustersv1alpha1.VerrazzanoManagedCluster) error {
   681  	return createClient(r, vmc)
   682  }
   683  
   684  // getClusterClient returns a controller runtime client configured for the workload cluster
   685  func (r *VerrazzanoManagedClusterReconciler) getClusterClient(restConfig *rest.Config) (client.Client, error) {
   686  	scheme := runtime.NewScheme()
   687  	_ = rbacv1.AddToScheme(scheme)
   688  	_ = corev1.AddToScheme(scheme)
   689  	_ = netv1.AddToScheme(scheme)
   690  	_ = appsv1.AddToScheme(scheme)
   691  	_ = clustersv1alpha1.AddToScheme(scheme)
   692  
   693  	return client.New(restConfig, client.Options{Scheme: scheme})
   694  }
   695  
   696  // getWorkloadClusterKubeconfig returns a kubeconfig for accessing the workload cluster
   697  func (r *VerrazzanoManagedClusterReconciler) getWorkloadClusterKubeconfig(cluster *unstructured.Unstructured) ([]byte, error) {
   698  	// get the cluster kubeconfig
   699  	kubeconfigSecret := &corev1.Secret{}
   700  	err := r.Client.Get(context.TODO(), types.NamespacedName{Name: fmt.Sprintf("%s-kubeconfig", cluster.GetName()), Namespace: cluster.GetNamespace()}, kubeconfigSecret)
   701  	if err != nil {
   702  		r.log.Progressf("failed to obtain workload cluster kubeconfig resource. Re-queuing...")
   703  		return nil, err
   704  	}
   705  	kubeconfig, ok := kubeconfigSecret.Data["value"]
   706  	if !ok {
   707  		r.log.Error(err, "failed to read kubeconfig from resource")
   708  		return nil, fmt.Errorf("Unable to read kubeconfig from retrieved cluster resource")
   709  	}
   710  
   711  	return kubeconfig, nil
   712  }
   713  
   714  func (r *VerrazzanoManagedClusterReconciler) getWorkloadClusterClient(cluster *unstructured.Unstructured) (client.Client, error) {
   715  	// identify whether the workload cluster is using "untrusted" certs
   716  	kubeconfig, err := r.getWorkloadClusterKubeconfig(cluster)
   717  	if err != nil {
   718  		// requeue since we're waiting for cluster
   719  		return nil, err
   720  	}
   721  	// create a workload cluster client
   722  	// create workload cluster client
   723  	restConfig, err := clientcmd.RESTConfigFromKubeConfig(kubeconfig)
   724  	if err != nil {
   725  		r.log.Progress("Failed getting rest config from workload kubeconfig")
   726  		return nil, err
   727  	}
   728  	workloadClient, err := r.getClusterClient(restConfig)
   729  	if err != nil {
   730  		return nil, err
   731  	}
   732  	return workloadClient, nil
   733  }
   734  
   735  // Create a new Result that will cause a reconcile requeue after a short delay
   736  func newRequeueWithDelay() ctrl.Result {
   737  	return vzctrl.NewRequeueWithDelay(2, 3, time.Second)
   738  }
   739  
   740  func getClusterResourceName(cluster *unstructured.Unstructured, client client.Client) string {
   741  	// check for existence of a Rancher cluster management resource
   742  	rancherMgmtCluster := &unstructured.Unstructured{}
   743  	rancherMgmtCluster.SetGroupVersionKind(common.GetRancherMgmtAPIGVKForKind("Cluster"))
   744  	err := client.Get(context.TODO(), types.NamespacedName{Name: cluster.GetName(), Namespace: cluster.GetNamespace()}, rancherMgmtCluster)
   745  	if err != nil {
   746  		return cluster.GetName()
   747  	}
   748  	// return the display Name
   749  	return rancherMgmtCluster.UnstructuredContent()["spec"].(map[string]interface{})["displayName"].(string)
   750  }