github.com/verrazzano/verrazzano@v1.7.0/cluster-operator/controllers/vmc/vmc_controller.go (about) 1 // Copyright (c) 2021, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package vmc 5 6 import ( 7 "context" 8 goerrors "errors" 9 "fmt" 10 "time" 11 12 "github.com/verrazzano/verrazzano/pkg/k8sutil" 13 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/keycloak" 14 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/spi" 15 16 "github.com/prometheus/client_golang/prometheus" 17 "github.com/prometheus/client_golang/prometheus/promauto" 18 clustersv1alpha1 "github.com/verrazzano/verrazzano/cluster-operator/apis/clusters/v1alpha1" 19 "github.com/verrazzano/verrazzano/cluster-operator/internal/capi" 20 vzconstants "github.com/verrazzano/verrazzano/pkg/constants" 21 vzctrl "github.com/verrazzano/verrazzano/pkg/controller" 22 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 23 "github.com/verrazzano/verrazzano/pkg/rancherutil" 24 vzstring "github.com/verrazzano/verrazzano/pkg/string" 25 "github.com/verrazzano/verrazzano/platform-operator/apis/verrazzano/v1beta1" 26 "github.com/verrazzano/verrazzano/platform-operator/constants" 27 "go.uber.org/zap" 28 corev1 "k8s.io/api/core/v1" 29 rbacv1 "k8s.io/api/rbac/v1" 30 "k8s.io/apimachinery/pkg/api/errors" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 33 "k8s.io/apimachinery/pkg/runtime" 34 "k8s.io/apimachinery/pkg/types" 35 ctrl "sigs.k8s.io/controller-runtime" 36 "sigs.k8s.io/controller-runtime/pkg/client" 37 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 38 "sigs.k8s.io/controller-runtime/pkg/reconcile" 39 ) 40 41 const finalizerName = "managedcluster.verrazzano.io" 42 43 // VerrazzanoManagedClusterReconciler reconciles a VerrazzanoManagedCluster object. 44 // The reconciler will create a ServiceAcount, RoleBinding, and a Secret which 45 // contains the kubeconfig to be used by the Multi-Cluster Agent to access the admin cluster. 46 type VerrazzanoManagedClusterReconciler struct { 47 client.Client 48 Scheme *runtime.Scheme 49 RancherIngressHost string 50 log vzlog.VerrazzanoLogger 51 } 52 53 // bindingParams used to mutate the RoleBinding 54 type bindingParams struct { 55 vmc *clustersv1alpha1.VerrazzanoManagedCluster 56 roleName string 57 serviceAccountName string 58 } 59 60 var ( 61 reconcileTimeMetric = promauto.NewGauge(prometheus.GaugeOpts{ 62 Name: "vz_cluster_operator_reconcile_vmc_duration_seconds", 63 Help: "The duration of the reconcile process for cluster objects", 64 }) 65 reconcileErrorCount = promauto.NewCounter(prometheus.CounterOpts{ 66 Name: "vz_cluster_operator_reconcile_vmc_error_total", 67 Help: "The amount of errors encountered in the reconcile process", 68 }) 69 reconcileSuccessCount = promauto.NewCounter(prometheus.CounterOpts{ 70 Name: "vz_cluster_operator_reconcile_vmc_success_total", 71 Help: "The number of times the reconcile process succeeded", 72 }) 73 ) 74 75 func (r *VerrazzanoManagedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 76 // Time the reconcile process and set the metric with the elapsed time 77 startTime := time.Now() 78 defer reconcileTimeMetric.Set(time.Since(startTime).Seconds()) 79 80 if ctx == nil { 81 reconcileErrorCount.Inc() 82 return ctrl.Result{}, goerrors.New("context cannot be nil") 83 } 84 cr := &clustersv1alpha1.VerrazzanoManagedCluster{} 85 if err := r.Get(context.TODO(), req.NamespacedName, cr); err != nil { 86 // If the resource is not found, that means all of the finalizers have been removed, 87 // and the Verrazzano resource has been deleted, so there is nothing left to do. 88 if errors.IsNotFound(err) { 89 reconcileSuccessCount.Inc() 90 return reconcile.Result{}, nil 91 } 92 reconcileErrorCount.Inc() 93 zap.S().Errorf("Failed to fetch VerrazzanoManagedCluster resource: %v", err) 94 return newRequeueWithDelay(), nil 95 } 96 97 // Get the resource logger needed to log message using 'progress' and 'once' methods 98 log, err := vzlog.EnsureResourceLogger(&vzlog.ResourceConfig{ 99 Name: cr.Name, 100 Namespace: cr.Namespace, 101 ID: string(cr.UID), 102 Generation: cr.Generation, 103 ControllerName: "multicluster", 104 }) 105 if err != nil { 106 reconcileErrorCount.Inc() 107 zap.S().Errorf("Failed to create controller logger for VerrazzanoManagedCluster controller", err) 108 } 109 110 r.log = log 111 log.Oncef("Reconciling Verrazzano resource %v", req.NamespacedName) 112 res, err := r.doReconcile(ctx, log, cr) 113 if err != nil { 114 // Never return an error since it has already been logged and we don't want the 115 // controller runtime to log again (with stack trace). Just re-queue if there is an error. 116 reconcileErrorCount.Inc() 117 return newRequeueWithDelay(), nil 118 } 119 if vzctrl.ShouldRequeue(res) { 120 reconcileSuccessCount.Inc() 121 return res, nil 122 } 123 124 // The resource has been reconciled. 125 log.Oncef("Successfully reconciled VerrazzanoManagedCluster resource %v", req.NamespacedName) 126 127 reconcileSuccessCount.Inc() 128 return ctrl.Result{}, nil 129 } 130 131 // Reconcile reconciles a VerrazzanoManagedCluster object 132 func (r *VerrazzanoManagedClusterReconciler) doReconcile(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (ctrl.Result, error) { 133 134 if !vmc.ObjectMeta.DeletionTimestamp.IsZero() { 135 // Finalizer is present, so lets do the cluster deletion 136 if vzstring.SliceContainsString(vmc.ObjectMeta.Finalizers, finalizerName) { 137 if err := r.reconcileManagedClusterDelete(ctx, vmc); err != nil { 138 return reconcile.Result{}, err 139 } 140 141 // Remove the finalizer and update the Verrazzano resource if the deletion has finished. 142 log.Infof("Removing finalizer %s", finalizerName) 143 vmc.ObjectMeta.Finalizers = vzstring.RemoveStringFromSlice(vmc.ObjectMeta.Finalizers, finalizerName) 144 err := r.Update(ctx, vmc) 145 if err != nil && !errors.IsConflict(err) { 146 return reconcile.Result{}, err 147 } 148 } 149 return reconcile.Result{}, nil 150 } 151 152 // Add our finalizer if not already added 153 if !vzstring.SliceContainsString(vmc.ObjectMeta.Finalizers, finalizerName) { 154 log.Infof("Adding finalizer %s", finalizerName) 155 vmc.ObjectMeta.Finalizers = append(vmc.ObjectMeta.Finalizers, finalizerName) 156 if err := r.Update(ctx, vmc); err != nil { 157 return ctrl.Result{}, err 158 } 159 } 160 161 // Sync the service account 162 log.Debugf("Syncing the ServiceAccount for VMC %s", vmc.Name) 163 err := r.syncServiceAccount(vmc) 164 if err != nil { 165 r.handleError(ctx, vmc, "Failed to sync the ServiceAccount", err, log) 166 return newRequeueWithDelay(), err 167 } 168 169 log.Debugf("Syncing the RoleBinding for VMC %s", vmc.Name) 170 _, err = r.syncManagedRoleBinding(vmc) 171 if err != nil { 172 r.handleError(ctx, vmc, "Failed to sync the RoleBinding", err, log) 173 return newRequeueWithDelay(), err 174 } 175 176 log.Debugf("Syncing the Agent secret for VMC %s", vmc.Name) 177 err = r.syncAgentSecret(vmc) 178 if err != nil { 179 r.handleError(ctx, vmc, "Failed to sync the agent secret", err, log) 180 return newRequeueWithDelay(), err 181 } 182 183 log.Debugf("Syncing the Registration secret for VMC %s", vmc.Name) 184 err = r.syncRegistrationSecret(vmc) 185 if err != nil { 186 r.handleError(ctx, vmc, "Failed to sync the registration secret", err, log) 187 return newRequeueWithDelay(), err 188 } 189 190 log.Debugf("Syncing the Manifest secret for VMC %s", vmc.Name) 191 vzVMCWaitingForClusterID, err := r.syncManifestSecret(ctx, vmc) 192 if err != nil { 193 r.handleError(ctx, vmc, "Failed to sync the Manifest secret", err, log) 194 return newRequeueWithDelay(), err 195 } 196 if vzVMCWaitingForClusterID { 197 // waiting for the cluster ID to be set in the status, so requeue and try again 198 return newRequeueWithDelay(), nil 199 } 200 201 // create/update a secret with the CA cert from the managed cluster (if any errors occur we just log and continue) 202 syncedCert, err := r.syncCACertSecret(vmc) 203 if err != nil { 204 msg := fmt.Sprintf("Unable to get CA cert from managed cluster %s with id %s: %v", vmc.Name, vmc.Status.RancherRegistration.ClusterID, err) 205 r.log.Infof(msg) 206 r.setStatusConditionManagedCARetrieved(vmc, corev1.ConditionFalse, msg) 207 } else { 208 if syncedCert { 209 r.setStatusConditionManagedCARetrieved(vmc, corev1.ConditionTrue, "Managed cluster CA cert retrieved successfully") 210 } 211 } 212 213 log.Debugf("Updating Rancher ClusterRoleBindingTemplate for VMC %s", vmc.Name) 214 err = r.updateRancherClusterRoleBindingTemplate(vmc) 215 if err != nil { 216 r.handleError(ctx, vmc, "Failed to update Rancher ClusterRoleBindingTemplate", err, log) 217 return newRequeueWithDelay(), err 218 } 219 220 log.Debugf("Pushing the Manifest objects for VMC %s", vmc.Name) 221 pushedManifest, err := r.pushManifestObjects(vmc) 222 if err != nil { 223 r.handleError(ctx, vmc, "Failed to push the Manifest objects", err, log) 224 r.setStatusConditionManifestPushed(vmc, corev1.ConditionFalse, fmt.Sprintf("Failed to push the manifest objects to the managed cluster: %v", err)) 225 return newRequeueWithDelay(), err 226 } 227 if pushedManifest { 228 r.log.Info("Manifest objects have been successfully pushed to the managed cluster") 229 r.setStatusConditionManifestPushed(vmc, corev1.ConditionTrue, "Manifest objects pushed to the managed cluster") 230 } 231 232 log.Debugf("Registering ArgoCD for VMC %s", vmc.Name) 233 var argoCDRegistration *clustersv1alpha1.ArgoCDRegistration 234 argoCDEnabled, err := r.isArgoCDEnabled() 235 if err != nil { 236 return newRequeueWithDelay(), err 237 } 238 rancherEnabled, err := r.isRancherEnabled() 239 if err != nil { 240 return newRequeueWithDelay(), err 241 } 242 if argoCDEnabled && rancherEnabled { 243 argoCDRegistration, err = r.registerManagedClusterWithArgoCD(vmc) 244 if err != nil { 245 r.handleError(ctx, vmc, "Failed to register managed cluster with Argo CD", err, log) 246 return newRequeueWithDelay(), err 247 } 248 vmc.Status.ArgoCDRegistration = *argoCDRegistration 249 } 250 if !rancherEnabled && argoCDEnabled { 251 now := metav1.Now() 252 vmc.Status.ArgoCDRegistration = clustersv1alpha1.ArgoCDRegistration{ 253 Status: clustersv1alpha1.RegistrationPendingRancher, 254 Timestamp: &now, 255 Message: "Skipping Argo CD cluster registration due to Rancher not installed"} 256 } 257 258 r.setStatusConditionReady(vmc, "Ready") 259 statusErr := r.updateStatus(ctx, vmc) 260 261 if statusErr != nil { 262 log.Errorf("Failed to update status to ready for VMC %s: %v", vmc.Name, statusErr) 263 } 264 265 if err := r.syncManagedMetrics(ctx, log, vmc); err != nil { 266 return newRequeueWithDelay(), err 267 } 268 269 log.Debugf("Creating or updating keycloak client for %s", vmc.Name) 270 err = r.createManagedClusterKeycloakClient(vmc) 271 if err != nil { 272 r.handleError(ctx, vmc, "Failed to create or update Keycloak client for managed cluster", err, log) 273 return newRequeueWithDelay(), err 274 } 275 276 return ctrl.Result{Requeue: true, RequeueAfter: constants.ReconcileLoopRequeueInterval}, nil 277 } 278 279 func (r *VerrazzanoManagedClusterReconciler) syncServiceAccount(vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 280 // Create or update the service account 281 _, serviceAccount, err := r.createOrUpdateServiceAccount(context.TODO(), vmc) 282 if err != nil { 283 return err 284 } 285 286 if len(serviceAccount.Secrets) == 0 { 287 _, err = r.createServiceAccountTokenSecret(context.TODO(), serviceAccount) 288 if err != nil { 289 return err 290 } 291 } 292 293 // Does the VerrazzanoManagedCluster object contain the service account name? 294 saName := generateManagedResourceName(vmc.Name) 295 if vmc.Spec.ServiceAccount != saName { 296 r.log.Oncef("Updating ServiceAccount from %s to %s", vmc.Spec.ServiceAccount, saName) 297 vmc.Spec.ServiceAccount = saName 298 err = r.Update(context.TODO(), vmc) 299 if err != nil { 300 return err 301 } 302 } 303 304 return nil 305 } 306 307 // Create or update the ServiceAccount for a VerrazzanoManagedCluster 308 func (r *VerrazzanoManagedClusterReconciler) createOrUpdateServiceAccount(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (controllerutil.OperationResult, *corev1.ServiceAccount, error) { 309 var serviceAccount corev1.ServiceAccount 310 serviceAccount.Namespace = vmc.Namespace 311 serviceAccount.Name = generateManagedResourceName(vmc.Name) 312 313 operationResult, err := controllerutil.CreateOrUpdate(ctx, r.Client, &serviceAccount, func() error { 314 r.mutateServiceAccount(vmc, &serviceAccount) 315 // This SetControllerReference call will trigger garbage collection i.e. the serviceAccount 316 // will automatically get deleted when the VerrazzanoManagedCluster is deleted 317 return controllerutil.SetControllerReference(vmc, &serviceAccount, r.Scheme) 318 }) 319 return operationResult, &serviceAccount, err 320 } 321 322 func (r *VerrazzanoManagedClusterReconciler) mutateServiceAccount(vmc *clustersv1alpha1.VerrazzanoManagedCluster, serviceAccount *corev1.ServiceAccount) { 323 serviceAccount.Name = generateManagedResourceName(vmc.Name) 324 } 325 326 func (r *VerrazzanoManagedClusterReconciler) createServiceAccountTokenSecret(ctx context.Context, serviceAccount *corev1.ServiceAccount) (controllerutil.OperationResult, error) { 327 var secret corev1.Secret 328 secret.Name = serviceAccount.Name + "-token" 329 secret.Namespace = serviceAccount.Namespace 330 secret.Type = corev1.SecretTypeServiceAccountToken 331 secret.Annotations = map[string]string{ 332 corev1.ServiceAccountNameKey: serviceAccount.Name, 333 } 334 335 return controllerutil.CreateOrUpdate(ctx, r.Client, &secret, func() error { 336 // This SetControllerReference call will trigger garbage collection i.e. the token secret 337 // will automatically get deleted when the service account is deleted 338 return controllerutil.SetControllerReference(serviceAccount, &secret, r.Scheme) 339 }) 340 } 341 342 // syncManagedRoleBinding syncs the RoleBinding that binds the service account used by the managed cluster 343 // to the role containing the permission 344 func (r *VerrazzanoManagedClusterReconciler) syncManagedRoleBinding(vmc *clustersv1alpha1.VerrazzanoManagedCluster) (controllerutil.OperationResult, error) { 345 var roleBinding rbacv1.RoleBinding 346 roleBinding.Namespace = vmc.Namespace 347 roleBinding.Name = generateManagedResourceName(vmc.Name) 348 349 return controllerutil.CreateOrUpdate(context.TODO(), r.Client, &roleBinding, func() error { 350 mutateBinding(&roleBinding, bindingParams{ 351 vmc: vmc, 352 roleName: constants.MCClusterRole, 353 serviceAccountName: vmc.Spec.ServiceAccount, 354 }) 355 // This SetControllerReference call will trigger garbage collection i.e. the roleBinding 356 // will automatically get deleted when the VerrazzanoManagedCluster is deleted 357 return controllerutil.SetControllerReference(vmc, &roleBinding, r.Scheme) 358 }) 359 } 360 361 // syncMultiClusterCASecret gets the CA secret in the VMC from the managed cluster and populates the CA secret for metrics scraping 362 func (r *VerrazzanoManagedClusterReconciler) syncMultiClusterCASecret(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (corev1.Secret, error) { 363 var secret corev1.Secret 364 365 // read the configuration secret specified if it exists 366 if len(vmc.Spec.CASecret) > 0 { 367 secretNsn := types.NamespacedName{ 368 Namespace: vmc.Namespace, 369 Name: vmc.Spec.CASecret, 370 } 371 372 // validate secret if it exists 373 if err := r.Get(context.TODO(), secretNsn, &secret); err != nil { 374 return secret, log.ErrorfNewErr("failed to fetch the managed cluster CA secret %s/%s, %v", vmc.Namespace, vmc.Spec.CASecret, err) 375 } 376 } 377 if err := r.mutateManagedClusterCACertsSecret(ctx, vmc, &secret); err != nil { 378 return secret, log.ErrorfNewErr("Failed to sync the managed cluster CA certs for VMC %s: %v", vmc.Name, err) 379 } 380 return secret, nil 381 } 382 383 // mutateManagedClusterCACertsSecret adds and removes managed cluster CA certs to/from the managed cluster CA certs secret 384 func (r *VerrazzanoManagedClusterReconciler) mutateManagedClusterCACertsSecret(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, cacrtSecret *corev1.Secret) error { 385 ns := &corev1.Namespace{} 386 err := r.Client.Get(ctx, types.NamespacedName{Name: constants.VerrazzanoMonitoringNamespace}, ns) 387 if errors.IsNotFound(err) { 388 r.log.Infof("namespace %s does not exist", constants.VerrazzanoMonitoringNamespace) 389 return nil 390 } 391 secret := &corev1.Secret{ 392 ObjectMeta: metav1.ObjectMeta{ 393 Name: constants.PromManagedClusterCACertsSecretName, 394 Namespace: constants.VerrazzanoMonitoringNamespace, 395 }, 396 } 397 398 if _, err := controllerutil.CreateOrUpdate(ctx, r.Client, secret, func() error { 399 if secret.Data == nil { 400 secret.Data = make(map[string][]byte) 401 } 402 if cacrtSecret != nil && cacrtSecret.Data != nil && len(cacrtSecret.Data["cacrt"]) > 0 { 403 secret.Data[getCAKey(vmc)] = cacrtSecret.Data["cacrt"] 404 } else { 405 delete(secret.Data, getCAKey(vmc)) 406 } 407 return nil 408 }); err != nil { 409 return err 410 } 411 412 return nil 413 } 414 415 // syncManagedMetrics syncs the metrics federation for managed clusters 416 // There are currently two ways of federating metrics from managed clusters: 417 // 1. Creating a Scrape config for the managed cluster on the admin cluster Prometheus 418 // 2. Creating a Store in Thanos so that managed cluster metrics can be accessed by the admin cluster Query 419 // These scenarios are mutually exclusive and the Thanos Query method takes precedence 420 // There are two conditions that enable the Thanos query method 421 // 1. Thanos is enabled on the managed cluster 422 // a. This manifests as the ThanosHost field in the VMC being populated 423 // 2. Thanos is enabled on the managed cluster 424 // 425 // If these two conditions are not met, the Prometheus federation will be enabled 426 func (r *VerrazzanoManagedClusterReconciler) syncManagedMetrics(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 427 // We need to sync the multicluster CA secret for Prometheus and Thanos 428 caSecret, err := r.syncMultiClusterCASecret(ctx, log, vmc) 429 if err != nil { 430 r.handleError(ctx, vmc, "Failed to sync the multicluster CA secret", err, log) 431 } 432 433 thanosEnabled, err := r.isThanosEnabled() 434 if err != nil { 435 r.handleError(ctx, vmc, "Failed to verify if Thanos is enabled", err, log) 436 return err 437 } 438 // If the Thanos multicluster requirements are met, set up the Thanos Query store 439 if vmc.Status.ThanosQueryStore != "" && thanosEnabled { 440 err = r.syncThanosQuery(ctx, vmc) 441 if err != nil { 442 r.handleError(ctx, vmc, "Failed to update Thanos Query endpoint managed cluster", err, log) 443 return err 444 } 445 446 // If we successfully sync the managed cluster Thanos Query store, we should remove the federated Prometheus to avoid duplication 447 r.log.Oncef("Thanos Query synced for VMC %s. Removing the Prometheus scraper", vmc.Name) 448 err = r.deleteClusterPrometheusConfiguration(ctx, vmc) 449 if err != nil { 450 r.handleError(ctx, vmc, "Failed to remove the Prometheus scrape config", err, log) 451 return err 452 } 453 return nil 454 } 455 456 // If Thanos multicluster is disabled, attempt to delete left over resources 457 err = r.syncThanosQueryEndpointDelete(ctx, vmc) 458 if err != nil { 459 r.handleError(ctx, vmc, "Failed to delete Thanos Query endpoint managed cluster", err, log) 460 return err 461 } 462 463 // If the Prometheus host is not populated, skip federation and do nothing 464 if vmc.Status.PrometheusHost == "" { 465 // If reached, the managed cluster metrics are not populated, so we should remove the CA cert from the secret 466 err := r.mutateManagedClusterCACertsSecret(ctx, vmc, nil) 467 if err != nil { 468 r.handleError(ctx, vmc, "Failed to delete the managed cluster CA cert from the secret", err, log) 469 return err 470 } 471 log.Oncef("Managed cluster Prometheus Host not found in VMC Status for VMC %s. Waiting for VMC to be registered...", vmc.Name) 472 return nil 473 } 474 475 // Sync the Prometheus Scraper if Thanos multicluster is disabled and the host is populated 476 log.Debugf("Syncing the prometheus scraper for VMC %s", vmc.Name) 477 err = r.syncPrometheusScraper(ctx, vmc, &caSecret) 478 if err != nil { 479 r.handleError(ctx, vmc, "Failed to setup the prometheus scraper for managed cluster", err, log) 480 return err 481 } 482 483 return nil 484 } 485 486 // mutateBinding mutates the RoleBinding to ensure it has the valid params 487 func mutateBinding(binding *rbacv1.RoleBinding, p bindingParams) { 488 binding.Name = generateManagedResourceName(p.vmc.Name) 489 binding.Namespace = p.vmc.Namespace 490 binding.Labels = p.vmc.Labels 491 492 binding.RoleRef = rbacv1.RoleRef{ 493 APIGroup: "rbac.authorization.k8s.io", 494 Kind: "ClusterRole", 495 Name: p.roleName, 496 } 497 binding.Subjects = []rbacv1.Subject{ 498 { 499 Kind: "ServiceAccount", 500 Name: p.serviceAccountName, 501 Namespace: constants.VerrazzanoMultiClusterNamespace, 502 }, 503 } 504 } 505 506 // Generate the common name used by all resources specific to a given managed cluster 507 func generateManagedResourceName(clusterName string) string { 508 return fmt.Sprintf("verrazzano-cluster-%s", clusterName) 509 } 510 511 // SetupWithManager creates a new controller and adds it to the manager 512 func (r *VerrazzanoManagedClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { 513 return ctrl.NewControllerManagedBy(mgr). 514 For(&clustersv1alpha1.VerrazzanoManagedCluster{}). 515 Complete(r) 516 } 517 518 // reconcileManagedClusterDelete performs all necessary cleanup during cluster deletion 519 func (r *VerrazzanoManagedClusterReconciler) reconcileManagedClusterDelete(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 520 if err := r.deleteClusterPrometheusConfiguration(ctx, vmc); err != nil { 521 return err 522 } 523 if err := r.unregisterClusterFromArgoCD(ctx, vmc); err != nil { 524 return err 525 } 526 if err := r.syncThanosQueryEndpointDelete(ctx, vmc); err != nil { 527 return err 528 } 529 if err := r.mutateManagedClusterCACertsSecret(ctx, vmc, nil); err != nil { 530 return err 531 } 532 return r.deleteClusterFromRancher(ctx, vmc) 533 } 534 535 // deleteClusterFromRancher calls the Rancher API to delete the cluster associated with the VMC if the VMC has a cluster id set in the status. 536 func (r *VerrazzanoManagedClusterReconciler) deleteClusterFromRancher(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 537 clusterID := vmc.Status.RancherRegistration.ClusterID 538 if clusterID == "" { 539 r.log.Debugf("VMC %s/%s has no Rancher cluster id, skipping delete", vmc.Namespace, vmc.Name) 540 return nil 541 } 542 543 rc, err := rancherutil.NewAdminRancherConfig(r.Client, r.RancherIngressHost, r.log) 544 if err != nil { 545 msg := "Failed to create Rancher API client" 546 r.updateRancherStatus(ctx, vmc, clustersv1alpha1.DeleteFailed, clusterID, msg) 547 r.log.Errorf("Unable to connect to Rancher API on admin cluster while attempting delete operation: %v", err) 548 return err 549 } 550 if _, err = DeleteClusterFromRancher(rc, clusterID, r.log); err != nil { 551 msg := "Failed deleting cluster" 552 r.updateRancherStatus(ctx, vmc, clustersv1alpha1.DeleteFailed, clusterID, msg) 553 r.log.Errorf("Unable to delete Rancher cluster %s/%s: %v", vmc.Namespace, vmc.Name, err) 554 return err 555 } 556 557 return nil 558 } 559 560 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionManagedCARetrieved(vmc *clustersv1alpha1.VerrazzanoManagedCluster, value corev1.ConditionStatus, msg string) { 561 now := metav1.Now() 562 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: value, Type: clustersv1alpha1.ConditionManagedCARetrieved, Message: msg, LastTransitionTime: &now}, false) 563 } 564 565 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionManifestPushed(vmc *clustersv1alpha1.VerrazzanoManagedCluster, value corev1.ConditionStatus, msg string) { 566 now := metav1.Now() 567 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: value, Type: clustersv1alpha1.ConditionManifestPushed, Message: msg, LastTransitionTime: &now}, true) 568 } 569 570 // setStatusConditionNotReady sets the status condition Ready = false on the VMC in memory - does NOT update the status in the cluster 571 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionNotReady(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string) { 572 now := metav1.Now() 573 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: corev1.ConditionFalse, Type: clustersv1alpha1.ConditionReady, Message: msg, LastTransitionTime: &now}, false) 574 } 575 576 // setStatusConditionReady sets the status condition Ready = true on the VMC in memory - does NOT update the status in the cluster 577 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionReady(vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string) { 578 now := metav1.Now() 579 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: corev1.ConditionTrue, Type: clustersv1alpha1.ConditionReady, Message: msg, LastTransitionTime: &now}, false) 580 } 581 582 func (r *VerrazzanoManagedClusterReconciler) handleError(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string, err error, log vzlog.VerrazzanoLogger) { 583 fullMsg := fmt.Sprintf("%s: %v", msg, err) 584 log.ErrorfThrottled(fullMsg) 585 r.setStatusConditionNotReady(ctx, vmc, fullMsg) 586 statusErr := r.updateStatus(ctx, vmc) 587 if statusErr != nil { 588 log.ErrorfThrottled("Failed to update status for VMC %s: %v", vmc.Name, statusErr) 589 } 590 } 591 592 // setStatusCondition updates the VMC status conditions based and replaces already created status conditions 593 // the onTime flag updates the status condition if the time has changed 594 func (r *VerrazzanoManagedClusterReconciler) setStatusCondition(vmc *clustersv1alpha1.VerrazzanoManagedCluster, condition clustersv1alpha1.Condition, onTime bool) { 595 r.log.Debugf("Entered setStatusCondition for VMC %s for condition %s = %s, existing conditions = %v", 596 vmc.Name, condition.Type, condition.Status, vmc.Status.Conditions) 597 var matchingCondition *clustersv1alpha1.Condition 598 var conditionExists bool 599 for i, existingCondition := range vmc.Status.Conditions { 600 if condition.Type == existingCondition.Type && 601 condition.Status == existingCondition.Status && 602 condition.Message == existingCondition.Message && 603 (!onTime || condition.LastTransitionTime == existingCondition.LastTransitionTime) { 604 // the exact same condition already exists, don't update 605 conditionExists = true 606 break 607 } 608 if condition.Type == existingCondition.Type { 609 // use the index here since "existingCondition" is a copy and won't point to the object in the slice 610 matchingCondition = &vmc.Status.Conditions[i] 611 break 612 } 613 } 614 if !conditionExists { 615 616 if matchingCondition == nil { 617 vmc.Status.Conditions = append(vmc.Status.Conditions, condition) 618 } else { 619 matchingCondition.Message = condition.Message 620 matchingCondition.Status = condition.Status 621 matchingCondition.LastTransitionTime = condition.LastTransitionTime 622 } 623 } 624 } 625 626 // updateStatus updates the status of the VMC in the cluster, with all provided conditions, after setting the vmc.Status.State field for the cluster 627 func (r *VerrazzanoManagedClusterReconciler) updateStatus(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 628 if err := r.updateState(vmc); err != nil { 629 return err 630 } 631 632 // Fetch the existing VMC to avoid conflicts in the status update 633 existingVMC := &clustersv1alpha1.VerrazzanoManagedCluster{} 634 err := r.Get(context.TODO(), types.NamespacedName{Namespace: vmc.Namespace, Name: vmc.Name}, existingVMC) 635 if err != nil { 636 return err 637 } 638 639 // Replace the existing status conditions and state with the conditions generated from this reconcile 640 for _, genCondition := range vmc.Status.Conditions { 641 r.setStatusCondition(existingVMC, genCondition, genCondition.Type == clustersv1alpha1.ConditionManifestPushed) 642 } 643 existingVMC.Status.State = vmc.Status.State 644 existingVMC.Status.ArgoCDRegistration = vmc.Status.ArgoCDRegistration 645 646 r.log.Debugf("Updating Status of VMC %s: %v", vmc.Name, vmc.Status.Conditions) 647 return r.Status().Update(ctx, existingVMC) 648 } 649 650 // updateState sets the vmc.Status.State for the given VMC. 651 // The state field functions differently according to whether this VMC references an underlying ClusterAPI cluster. 652 func (r *VerrazzanoManagedClusterReconciler) updateState(vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 653 // If there is no underlying CAPI cluster, set the state field based on the lastAgentConnectTime 654 if vmc.Status.ClusterRef == nil { 655 r.updateStateFromLastAgentConnectTime(vmc) 656 return nil 657 } 658 659 // If there is an underlying CAPI cluster, set the state field according to the phase of the CAPI cluster. 660 capiClusterPhase, err := r.getCAPIClusterPhase(vmc.Status.ClusterRef) 661 if err != nil { 662 return err 663 } 664 if capiClusterPhase != "" { 665 vmc.Status.State = capiClusterPhase 666 } 667 return nil 668 } 669 670 // updateStateFromLastAgentConnectTime sets the vmc.Status.State according to the lastAgentConnectTime, 671 // setting possible values of Active, Inactive, or Pending. 672 func (r *VerrazzanoManagedClusterReconciler) updateStateFromLastAgentConnectTime(vmc *clustersv1alpha1.VerrazzanoManagedCluster) { 673 if vmc.Status.LastAgentConnectTime != nil { 674 currentTime := metav1.Now() 675 // Using the current plus added time to find the difference with lastAgentConnectTime to validate 676 // if it exceeds the max allowed time before changing the state of the vmc resource. 677 maxPollingTime := currentTime.Add(vzconstants.VMCAgentPollingTimeInterval * vzconstants.MaxTimesVMCAgentPollingTime) 678 timeDiff := maxPollingTime.Sub(vmc.Status.LastAgentConnectTime.Time) 679 if int(timeDiff.Minutes()) > vzconstants.MaxTimesVMCAgentPollingTime { 680 vmc.Status.State = clustersv1alpha1.StateInactive 681 } else if vmc.Status.State == "" { 682 vmc.Status.State = clustersv1alpha1.StatePending 683 } else { 684 vmc.Status.State = clustersv1alpha1.StateActive 685 } 686 } 687 } 688 689 // getCAPIClusterPhase returns the phase reported by the CAPI Cluster CR which is referenced by clusterRef. 690 func (r *VerrazzanoManagedClusterReconciler) getCAPIClusterPhase(clusterRef *clustersv1alpha1.ClusterReference) (clustersv1alpha1.StateType, error) { 691 // Get the CAPI Cluster CR 692 cluster := &unstructured.Unstructured{} 693 cluster.SetGroupVersionKind(capi.GVKCAPICluster) 694 clusterNamespacedName := types.NamespacedName{ 695 Name: clusterRef.Name, 696 Namespace: clusterRef.Namespace, 697 } 698 if err := r.Get(context.TODO(), clusterNamespacedName, cluster); err != nil { 699 if errors.IsNotFound(err) { 700 return "", nil 701 } 702 return "", err 703 } 704 705 // Get the state 706 phase, found, err := unstructured.NestedString(cluster.Object, "status", "phase") 707 if !found { 708 r.log.Progressf("could not find status.phase field inside cluster %s: %v", clusterNamespacedName, err) 709 return "", nil 710 } 711 if err != nil { 712 r.log.Progressf("error while looking for status.phase field for cluster %s: %v", clusterNamespacedName, err) 713 return "", nil 714 } 715 716 // Validate that the CAPI Phase is a proper StateType for the VMC 717 switch state := clustersv1alpha1.StateType(phase); state { 718 case clustersv1alpha1.StatePending, 719 clustersv1alpha1.StateProvisioning, 720 clustersv1alpha1.StateProvisioned, 721 clustersv1alpha1.StateDeleting, 722 clustersv1alpha1.StateUnknown, 723 clustersv1alpha1.StateFailed: 724 return state, nil 725 default: 726 r.log.Progressf("retrieved an invalid ClusterAPI Cluster phase of %s", state) 727 return clustersv1alpha1.StateUnknown, nil 728 } 729 } 730 731 // getVerrazzanoResource gets the installed Verrazzano resource in the cluster (of which only one is expected) 732 func (r *VerrazzanoManagedClusterReconciler) getVerrazzanoResource() (*v1beta1.Verrazzano, error) { 733 // Get the Verrazzano resource 734 verrazzano := v1beta1.VerrazzanoList{} 735 err := r.Client.List(context.TODO(), &verrazzano, &client.ListOptions{}) 736 if err != nil || len(verrazzano.Items) == 0 { 737 return nil, r.log.ErrorfNewErr("Verrazzano must be installed: %v", err) 738 739 } 740 return &verrazzano.Items[0], nil 741 } 742 743 // leveraged to replace method (unit testing) 744 var createClient = func(r *VerrazzanoManagedClusterReconciler, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 745 const prometheusHostPrefix = "prometheus.vmi.system" 746 promHost := vmc.Status.PrometheusHost 747 // Skip Keycloak client generation if Prometheus isn't present in VMC status 748 // MCAgent on the managed cluster will set this if/when it is ready 749 if len(promHost) == 0 { 750 r.log.Debug("Skipping Prometheus Keycloak client creation: VMC Prometheus not found") 751 return nil 752 } 753 754 // login to keycloak 755 cfg, cli, err := k8sutil.ClientConfig() 756 if err != nil { 757 return err 758 } 759 760 // create a context that can be leveraged by keycloak method 761 ctx, err := spi.NewMinimalContext(r.Client, r.log) 762 if err != nil { 763 return err 764 } 765 766 err = keycloak.LoginKeycloak(ctx, cfg, cli) 767 if err != nil { 768 return err 769 } 770 771 dnsSubdomain := promHost[len(prometheusHostPrefix)+1:] 772 clientID := fmt.Sprintf("verrazzano-%s", vmc.Name) 773 err = keycloak.CreateOrUpdateClient(ctx, cfg, cli, clientID, keycloak.ManagedClusterClientTmpl, keycloak.ManagedClusterClientUrisTemplate, false, &dnsSubdomain) 774 if err != nil { 775 return err 776 } 777 778 return nil 779 } 780 781 // createManagedClusterKeycloakClient creates a Keycloak client for the managed cluster 782 func (r *VerrazzanoManagedClusterReconciler) createManagedClusterKeycloakClient(vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 783 return createClient(r, vmc) 784 } 785 786 // Create a new Result that will cause a reconcile requeue after a short delay 787 func newRequeueWithDelay() ctrl.Result { 788 return vzctrl.NewRequeueWithDelay(2, 3, time.Second) 789 }