github.com/verrazzano/verrazzano@v1.7.1/cluster-operator/controllers/vmc/vmc_controller.go (about) 1 // Copyright (c) 2021, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package vmc 5 6 import ( 7 "context" 8 goerrors "errors" 9 "fmt" 10 "time" 11 12 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/common" 13 appsv1 "k8s.io/api/apps/v1" 14 netv1 "k8s.io/api/networking/v1" 15 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 16 "k8s.io/client-go/rest" 17 "k8s.io/client-go/tools/clientcmd" 18 19 "github.com/verrazzano/verrazzano/pkg/k8sutil" 20 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/keycloak" 21 "github.com/verrazzano/verrazzano/platform-operator/controllers/verrazzano/component/spi" 22 23 "github.com/prometheus/client_golang/prometheus" 24 "github.com/prometheus/client_golang/prometheus/promauto" 25 clustersv1alpha1 "github.com/verrazzano/verrazzano/cluster-operator/apis/clusters/v1alpha1" 26 vzctrl "github.com/verrazzano/verrazzano/pkg/controller" 27 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 28 "github.com/verrazzano/verrazzano/pkg/rancherutil" 29 vzstring "github.com/verrazzano/verrazzano/pkg/string" 30 "github.com/verrazzano/verrazzano/platform-operator/apis/verrazzano/v1beta1" 31 "github.com/verrazzano/verrazzano/platform-operator/constants" 32 "go.uber.org/zap" 33 corev1 "k8s.io/api/core/v1" 34 rbacv1 "k8s.io/api/rbac/v1" 35 "k8s.io/apimachinery/pkg/api/errors" 36 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 37 "k8s.io/apimachinery/pkg/runtime" 38 "k8s.io/apimachinery/pkg/types" 39 ctrl "sigs.k8s.io/controller-runtime" 40 "sigs.k8s.io/controller-runtime/pkg/client" 41 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 42 "sigs.k8s.io/controller-runtime/pkg/reconcile" 43 ) 44 45 const finalizerName = "managedcluster.verrazzano.io" 46 47 // VerrazzanoManagedClusterReconciler reconciles a VerrazzanoManagedCluster object. 48 // The reconciler will create a ServiceAcount, RoleBinding, and a Secret which 49 // contains the kubeconfig to be used by the Multi-Cluster Agent to access the admin cluster. 50 type VerrazzanoManagedClusterReconciler struct { 51 client.Client 52 Scheme *runtime.Scheme 53 RancherIngressHost string 54 log vzlog.VerrazzanoLogger 55 } 56 57 // bindingParams used to mutate the RoleBinding 58 type bindingParams struct { 59 vmc *clustersv1alpha1.VerrazzanoManagedCluster 60 roleName string 61 serviceAccountName string 62 } 63 64 var ( 65 reconcileTimeMetric = promauto.NewGauge(prometheus.GaugeOpts{ 66 Name: "vz_cluster_operator_reconcile_vmc_duration_seconds", 67 Help: "The duration of the reconcile process for cluster objects", 68 }) 69 reconcileErrorCount = promauto.NewCounter(prometheus.CounterOpts{ 70 Name: "vz_cluster_operator_reconcile_vmc_error_total", 71 Help: "The amount of errors encountered in the reconcile process", 72 }) 73 reconcileSuccessCount = promauto.NewCounter(prometheus.CounterOpts{ 74 Name: "vz_cluster_operator_reconcile_vmc_success_total", 75 Help: "The number of times the reconcile process succeeded", 76 }) 77 ) 78 79 func (r *VerrazzanoManagedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 80 // Time the reconcile process and set the metric with the elapsed time 81 startTime := time.Now() 82 defer func() { reconcileTimeMetric.Set(time.Since(startTime).Seconds()) }() 83 84 if ctx == nil { 85 reconcileErrorCount.Inc() 86 return ctrl.Result{}, goerrors.New("context cannot be nil") 87 } 88 cr := &clustersv1alpha1.VerrazzanoManagedCluster{} 89 if err := r.Get(context.TODO(), req.NamespacedName, cr); err != nil { 90 // If the resource is not found, that means all of the finalizers have been removed, 91 // and the Verrazzano resource has been deleted, so there is nothing left to do. 92 if errors.IsNotFound(err) { 93 reconcileSuccessCount.Inc() 94 return reconcile.Result{}, nil 95 } 96 reconcileErrorCount.Inc() 97 zap.S().Errorf("Failed to fetch VerrazzanoManagedCluster resource: %v", err) 98 return newRequeueWithDelay(), nil 99 } 100 101 // Get the resource logger needed to log message using 'progress' and 'once' methods 102 log, err := vzlog.EnsureResourceLogger(&vzlog.ResourceConfig{ 103 Name: cr.Name, 104 Namespace: cr.Namespace, 105 ID: string(cr.UID), 106 Generation: cr.Generation, 107 ControllerName: "multicluster", 108 }) 109 if err != nil { 110 reconcileErrorCount.Inc() 111 zap.S().Errorf("Failed to create controller logger for VerrazzanoManagedCluster controller", err) 112 } 113 114 r.log = log 115 log.Oncef("Reconciling VerrazzanoManagedCluster resource %v", req.NamespacedName) 116 res, err := r.doReconcile(ctx, log, cr) 117 if err != nil { 118 // Never return an error since it has already been logged and we don't want the 119 // controller runtime to log again (with stack trace). Just re-queue if there is an error. 120 reconcileErrorCount.Inc() 121 return newRequeueWithDelay(), nil 122 } 123 if vzctrl.ShouldRequeue(res) { 124 reconcileSuccessCount.Inc() 125 return res, nil 126 } 127 128 // The resource has been reconciled. 129 log.Oncef("Successfully reconciled VerrazzanoManagedCluster resource %v", req.NamespacedName) 130 131 reconcileSuccessCount.Inc() 132 return ctrl.Result{}, nil 133 } 134 135 // Reconcile reconciles a VerrazzanoManagedCluster object 136 func (r *VerrazzanoManagedClusterReconciler) doReconcile(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (ctrl.Result, error) { 137 138 if !vmc.ObjectMeta.DeletionTimestamp.IsZero() { 139 // Finalizer is present, so lets do the cluster deletion 140 if vzstring.SliceContainsString(vmc.ObjectMeta.Finalizers, finalizerName) { 141 if err := r.reconcileManagedClusterDelete(ctx, vmc); err != nil { 142 return reconcile.Result{}, err 143 } 144 145 // Remove the finalizer and update the Verrazzano resource if the deletion has finished. 146 log.Infof("Removing finalizer %s", finalizerName) 147 vmc.ObjectMeta.Finalizers = vzstring.RemoveStringFromSlice(vmc.ObjectMeta.Finalizers, finalizerName) 148 err := r.Update(ctx, vmc) 149 if err != nil && !errors.IsConflict(err) { 150 return reconcile.Result{}, err 151 } 152 } 153 return reconcile.Result{}, nil 154 } 155 156 // Add our finalizer if not already added 157 if !vzstring.SliceContainsString(vmc.ObjectMeta.Finalizers, finalizerName) { 158 log.Infof("Adding finalizer %s", finalizerName) 159 vmc.ObjectMeta.Finalizers = append(vmc.ObjectMeta.Finalizers, finalizerName) 160 if err := r.Update(ctx, vmc); err != nil { 161 return ctrl.Result{}, err 162 } 163 } 164 165 // Sync the service account 166 log.Debugf("Syncing the ServiceAccount for VMC %s", vmc.Name) 167 err := r.syncServiceAccount(vmc) 168 if err != nil { 169 r.handleError(ctx, vmc, "Failed to sync the ServiceAccount", err, log) 170 return newRequeueWithDelay(), err 171 } 172 173 log.Debugf("Syncing the RoleBinding for VMC %s", vmc.Name) 174 _, err = r.syncManagedRoleBinding(vmc) 175 if err != nil { 176 r.handleError(ctx, vmc, "Failed to sync the RoleBinding", err, log) 177 return newRequeueWithDelay(), err 178 } 179 180 log.Debugf("Syncing the Agent secret for VMC %s", vmc.Name) 181 err = r.syncAgentSecret(vmc) 182 if err != nil { 183 r.handleError(ctx, vmc, "Failed to sync the agent secret", err, log) 184 return newRequeueWithDelay(), err 185 } 186 187 log.Debugf("Syncing the Registration secret for VMC %s", vmc.Name) 188 err = r.syncRegistrationSecret(vmc) 189 if err != nil { 190 r.handleError(ctx, vmc, "Failed to sync the registration secret", err, log) 191 return newRequeueWithDelay(), err 192 } 193 194 rancherEnabled, err := r.isRancherEnabled() 195 if err != nil { 196 return newRequeueWithDelay(), err 197 } 198 199 log.Debugf("Syncing the Manifest secret for VMC %s", vmc.Name) 200 vzVMCWaitingForClusterID, err := r.syncManifestSecret(ctx, rancherEnabled, vmc) 201 if err != nil { 202 r.handleError(ctx, vmc, "Failed to sync the Manifest secret", err, log) 203 return newRequeueWithDelay(), err 204 } 205 206 // create/update a secret with the CA cert from the managed cluster (if any errors occur we just log and continue) 207 syncedCert, err := r.syncCACertSecret(ctx, vmc, rancherEnabled) 208 if err != nil { 209 msg := fmt.Sprintf("Unable to get CA cert from managed cluster %s with id %s: %v", vmc.Name, vmc.Status.RancherRegistration.ClusterID, err) 210 r.log.Infof(msg) 211 r.setStatusConditionManagedCARetrieved(vmc, corev1.ConditionFalse, msg) 212 } else { 213 if syncedCert { 214 r.setStatusConditionManagedCARetrieved(vmc, corev1.ConditionTrue, "Managed cluster CA cert retrieved successfully") 215 } 216 } 217 218 log.Debugf("Updating Rancher ClusterRoleBindingTemplate for VMC %s", vmc.Name) 219 err = r.updateRancherClusterRoleBindingTemplate(vmc) 220 if err != nil { 221 r.handleError(ctx, vmc, "Failed to update Rancher ClusterRoleBindingTemplate", err, log) 222 return newRequeueWithDelay(), err 223 } 224 225 log.Debugf("Pushing the Manifest objects for VMC %s", vmc.Name) 226 pushedManifest, err := r.pushManifestObjects(ctx, rancherEnabled, vmc) 227 if err != nil { 228 r.handleError(ctx, vmc, "Failed to push the Manifest objects", err, log) 229 r.setStatusConditionManifestPushed(vmc, corev1.ConditionFalse, fmt.Sprintf("Failed to push the manifest objects to the managed cluster: %v", err)) 230 return newRequeueWithDelay(), err 231 } 232 if pushedManifest { 233 r.log.Oncef("Manifest objects have been successfully pushed to the managed cluster") 234 r.setStatusConditionManifestPushed(vmc, corev1.ConditionTrue, "Manifest objects pushed to the managed cluster") 235 } 236 237 log.Debugf("Registering ArgoCD for VMC %s", vmc.Name) 238 var argoCDRegistration *clustersv1alpha1.ArgoCDRegistration 239 argoCDEnabled, err := r.isArgoCDEnabled() 240 if err != nil { 241 return newRequeueWithDelay(), err 242 } 243 if argoCDEnabled && rancherEnabled { 244 argoCDRegistration, err = r.registerManagedClusterWithArgoCD(vmc) 245 if err != nil { 246 r.handleError(ctx, vmc, "Failed to register managed cluster with Argo CD", err, log) 247 return newRequeueWithDelay(), err 248 } 249 vmc.Status.ArgoCDRegistration = *argoCDRegistration 250 } 251 if !rancherEnabled && argoCDEnabled { 252 now := metav1.Now() 253 vmc.Status.ArgoCDRegistration = clustersv1alpha1.ArgoCDRegistration{ 254 Status: clustersv1alpha1.RegistrationPendingRancher, 255 Timestamp: &now, 256 Message: "Skipping Argo CD cluster registration due to Rancher not installed"} 257 } 258 259 if !vzVMCWaitingForClusterID { 260 r.setStatusConditionReady(vmc, "Ready") 261 statusErr := r.updateStatus(ctx, vmc) 262 263 if statusErr != nil { 264 log.Errorf("Failed to update status to ready for VMC %s: %v", vmc.Name, statusErr) 265 } 266 } 267 268 if err := r.syncManagedMetrics(ctx, log, vmc); err != nil { 269 return newRequeueWithDelay(), err 270 } 271 272 log.Debugf("Creating or updating keycloak client for %s", vmc.Name) 273 err = r.createManagedClusterKeycloakClient(vmc) 274 if err != nil { 275 r.handleError(ctx, vmc, "Failed to create or update Keycloak client for managed cluster", err, log) 276 return newRequeueWithDelay(), err 277 } 278 279 return ctrl.Result{Requeue: true, RequeueAfter: constants.ReconcileLoopRequeueInterval}, nil 280 } 281 282 func (r *VerrazzanoManagedClusterReconciler) syncServiceAccount(vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 283 // Create or update the service account 284 _, serviceAccount, err := r.createOrUpdateServiceAccount(context.TODO(), vmc) 285 if err != nil { 286 return err 287 } 288 289 if len(serviceAccount.Secrets) == 0 { 290 _, err = r.createServiceAccountTokenSecret(context.TODO(), serviceAccount) 291 if err != nil { 292 return err 293 } 294 } 295 296 // Does the VerrazzanoManagedCluster object contain the service account name? 297 saName := generateManagedResourceName(vmc.Name) 298 if vmc.Spec.ServiceAccount != saName { 299 r.log.Oncef("Updating ServiceAccount from %s to %s", vmc.Spec.ServiceAccount, saName) 300 vmc.Spec.ServiceAccount = saName 301 err = r.Update(context.TODO(), vmc) 302 if err != nil { 303 return err 304 } 305 } 306 307 return nil 308 } 309 310 // Create or update the ServiceAccount for a VerrazzanoManagedCluster 311 func (r *VerrazzanoManagedClusterReconciler) createOrUpdateServiceAccount(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (controllerutil.OperationResult, *corev1.ServiceAccount, error) { 312 var serviceAccount corev1.ServiceAccount 313 serviceAccount.Namespace = vmc.Namespace 314 serviceAccount.Name = generateManagedResourceName(vmc.Name) 315 316 operationResult, err := controllerutil.CreateOrUpdate(ctx, r.Client, &serviceAccount, func() error { 317 r.mutateServiceAccount(vmc, &serviceAccount) 318 // This SetControllerReference call will trigger garbage collection i.e. the serviceAccount 319 // will automatically get deleted when the VerrazzanoManagedCluster is deleted 320 return controllerutil.SetControllerReference(vmc, &serviceAccount, r.Scheme) 321 }) 322 return operationResult, &serviceAccount, err 323 } 324 325 func (r *VerrazzanoManagedClusterReconciler) mutateServiceAccount(vmc *clustersv1alpha1.VerrazzanoManagedCluster, serviceAccount *corev1.ServiceAccount) { 326 serviceAccount.Name = generateManagedResourceName(vmc.Name) 327 } 328 329 func (r *VerrazzanoManagedClusterReconciler) createServiceAccountTokenSecret(ctx context.Context, serviceAccount *corev1.ServiceAccount) (controllerutil.OperationResult, error) { 330 var secret corev1.Secret 331 secret.Name = serviceAccount.Name + "-token" 332 secret.Namespace = serviceAccount.Namespace 333 secret.Type = corev1.SecretTypeServiceAccountToken 334 secret.Annotations = map[string]string{ 335 corev1.ServiceAccountNameKey: serviceAccount.Name, 336 } 337 338 return controllerutil.CreateOrUpdate(ctx, r.Client, &secret, func() error { 339 // This SetControllerReference call will trigger garbage collection i.e. the token secret 340 // will automatically get deleted when the service account is deleted 341 return controllerutil.SetControllerReference(serviceAccount, &secret, r.Scheme) 342 }) 343 } 344 345 // syncManagedRoleBinding syncs the RoleBinding that binds the service account used by the managed cluster 346 // to the role containing the permission 347 func (r *VerrazzanoManagedClusterReconciler) syncManagedRoleBinding(vmc *clustersv1alpha1.VerrazzanoManagedCluster) (controllerutil.OperationResult, error) { 348 var roleBinding rbacv1.RoleBinding 349 roleBinding.Namespace = vmc.Namespace 350 roleBinding.Name = generateManagedResourceName(vmc.Name) 351 352 return controllerutil.CreateOrUpdate(context.TODO(), r.Client, &roleBinding, func() error { 353 mutateBinding(&roleBinding, bindingParams{ 354 vmc: vmc, 355 roleName: constants.MCClusterRole, 356 serviceAccountName: vmc.Spec.ServiceAccount, 357 }) 358 // This SetControllerReference call will trigger garbage collection i.e. the roleBinding 359 // will automatically get deleted when the VerrazzanoManagedCluster is deleted 360 return controllerutil.SetControllerReference(vmc, &roleBinding, r.Scheme) 361 }) 362 } 363 364 // syncMultiClusterCASecret gets the CA secret in the VMC from the managed cluster and populates the CA secret for metrics scraping 365 func (r *VerrazzanoManagedClusterReconciler) syncMultiClusterCASecret(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) (corev1.Secret, error) { 366 var secret corev1.Secret 367 368 // read the configuration secret specified if it exists 369 if len(vmc.Spec.CASecret) > 0 { 370 secretNsn := types.NamespacedName{ 371 Namespace: vmc.Namespace, 372 Name: vmc.Spec.CASecret, 373 } 374 375 // validate secret if it exists 376 if err := r.Get(context.TODO(), secretNsn, &secret); err != nil { 377 return secret, log.ErrorfNewErr("failed to fetch the managed cluster CA secret %s/%s, %v", vmc.Namespace, vmc.Spec.CASecret, err) 378 } 379 } 380 if err := r.mutateManagedClusterCACertsSecret(ctx, vmc, &secret); err != nil { 381 return secret, log.ErrorfNewErr("Failed to sync the managed cluster CA certs for VMC %s: %v", vmc.Name, err) 382 } 383 return secret, nil 384 } 385 386 // mutateManagedClusterCACertsSecret adds and removes managed cluster CA certs to/from the managed cluster CA certs secret 387 func (r *VerrazzanoManagedClusterReconciler) mutateManagedClusterCACertsSecret(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, cacrtSecret *corev1.Secret) error { 388 ns := &corev1.Namespace{} 389 err := r.Client.Get(ctx, types.NamespacedName{Name: constants.VerrazzanoMonitoringNamespace}, ns) 390 if errors.IsNotFound(err) { 391 r.log.Infof("namespace %s does not exist", constants.VerrazzanoMonitoringNamespace) 392 return nil 393 } 394 secret := &corev1.Secret{ 395 ObjectMeta: metav1.ObjectMeta{ 396 Name: constants.PromManagedClusterCACertsSecretName, 397 Namespace: constants.VerrazzanoMonitoringNamespace, 398 }, 399 } 400 401 if _, err := controllerutil.CreateOrUpdate(ctx, r.Client, secret, func() error { 402 if secret.Data == nil { 403 secret.Data = make(map[string][]byte) 404 } 405 if cacrtSecret != nil && cacrtSecret.Data != nil && len(cacrtSecret.Data["cacrt"]) > 0 { 406 secret.Data[getCAKey(vmc)] = cacrtSecret.Data["cacrt"] 407 } else { 408 delete(secret.Data, getCAKey(vmc)) 409 } 410 return nil 411 }); err != nil { 412 return err 413 } 414 415 return nil 416 } 417 418 // syncManagedMetrics syncs the metrics federation for managed clusters 419 // There are currently two ways of federating metrics from managed clusters: 420 // 1. Creating a Scrape config for the managed cluster on the admin cluster Prometheus 421 // 2. Creating a Store in Thanos so that managed cluster metrics can be accessed by the admin cluster Query 422 // These scenarios are mutually exclusive and the Thanos Query method takes precedence 423 // There are two conditions that enable the Thanos query method 424 // 1. Thanos is enabled on the managed cluster 425 // a. This manifests as the ThanosHost field in the VMC being populated 426 // 2. Thanos is enabled on the managed cluster 427 // 428 // If these two conditions are not met, the Prometheus federation will be enabled 429 func (r *VerrazzanoManagedClusterReconciler) syncManagedMetrics(ctx context.Context, log vzlog.VerrazzanoLogger, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 430 // We need to sync the multicluster CA secret for Prometheus and Thanos 431 caSecret, err := r.syncMultiClusterCASecret(ctx, log, vmc) 432 if err != nil { 433 r.handleError(ctx, vmc, "Failed to sync the multicluster CA secret", err, log) 434 } 435 436 thanosEnabled, err := r.isThanosEnabled() 437 if err != nil { 438 r.handleError(ctx, vmc, "Failed to verify if Thanos is enabled", err, log) 439 return err 440 } 441 // If the Thanos multicluster requirements are met, set up the Thanos Query store 442 if vmc.Status.ThanosQueryStore != "" && thanosEnabled { 443 err = r.syncThanosQuery(ctx, vmc) 444 if err != nil { 445 r.handleError(ctx, vmc, "Failed to update Thanos Query endpoint managed cluster", err, log) 446 return err 447 } 448 449 // If we successfully sync the managed cluster Thanos Query store, we should remove the federated Prometheus to avoid duplication 450 r.log.Oncef("Thanos Query synced for VMC %s. Removing the Prometheus scraper", vmc.Name) 451 err = r.deleteClusterPrometheusConfiguration(ctx, vmc) 452 if err != nil { 453 r.handleError(ctx, vmc, "Failed to remove the Prometheus scrape config", err, log) 454 return err 455 } 456 return nil 457 } 458 459 // If Thanos multicluster is disabled, attempt to delete left over resources 460 err = r.syncThanosQueryEndpointDelete(ctx, vmc) 461 if err != nil { 462 r.handleError(ctx, vmc, "Failed to delete Thanos Query endpoint managed cluster", err, log) 463 return err 464 } 465 466 // If the Prometheus host is not populated, skip federation and do nothing 467 if vmc.Status.PrometheusHost == "" { 468 // If reached, the managed cluster metrics are not populated, so we should remove the CA cert from the secret 469 err := r.mutateManagedClusterCACertsSecret(ctx, vmc, nil) 470 if err != nil { 471 r.handleError(ctx, vmc, "Failed to delete the managed cluster CA cert from the secret", err, log) 472 return err 473 } 474 log.Oncef("Managed cluster Prometheus Host not found in VMC Status for VMC %s. Waiting for VMC to be registered...", vmc.Name) 475 return nil 476 } 477 478 // Sync the Prometheus Scraper if Thanos multicluster is disabled and the host is populated 479 log.Debugf("Syncing the prometheus scraper for VMC %s", vmc.Name) 480 err = r.syncPrometheusScraper(ctx, vmc, &caSecret) 481 if err != nil { 482 r.handleError(ctx, vmc, "Failed to setup the prometheus scraper for managed cluster", err, log) 483 return err 484 } 485 486 return nil 487 } 488 489 // mutateBinding mutates the RoleBinding to ensure it has the valid params 490 func mutateBinding(binding *rbacv1.RoleBinding, p bindingParams) { 491 binding.Name = generateManagedResourceName(p.vmc.Name) 492 binding.Namespace = p.vmc.Namespace 493 binding.Labels = p.vmc.Labels 494 495 binding.RoleRef = rbacv1.RoleRef{ 496 APIGroup: "rbac.authorization.k8s.io", 497 Kind: "ClusterRole", 498 Name: p.roleName, 499 } 500 binding.Subjects = []rbacv1.Subject{ 501 { 502 Kind: "ServiceAccount", 503 Name: p.serviceAccountName, 504 Namespace: constants.VerrazzanoMultiClusterNamespace, 505 }, 506 } 507 } 508 509 // Generate the common name used by all resources specific to a given managed cluster 510 func generateManagedResourceName(clusterName string) string { 511 return fmt.Sprintf("verrazzano-cluster-%s", clusterName) 512 } 513 514 // SetupWithManager creates a new controller and adds it to the manager 515 func (r *VerrazzanoManagedClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { 516 return ctrl.NewControllerManagedBy(mgr). 517 For(&clustersv1alpha1.VerrazzanoManagedCluster{}). 518 Complete(r) 519 } 520 521 // reconcileManagedClusterDelete performs all necessary cleanup during cluster deletion 522 func (r *VerrazzanoManagedClusterReconciler) reconcileManagedClusterDelete(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 523 if err := r.deleteClusterPrometheusConfiguration(ctx, vmc); err != nil { 524 return err 525 } 526 if err := r.unregisterClusterFromArgoCD(ctx, vmc); err != nil { 527 return err 528 } 529 if err := r.syncThanosQueryEndpointDelete(ctx, vmc); err != nil { 530 return err 531 } 532 if err := r.mutateManagedClusterCACertsSecret(ctx, vmc, nil); err != nil { 533 return err 534 } 535 return r.deleteClusterFromRancher(ctx, vmc) 536 } 537 538 // deleteClusterFromRancher calls the Rancher API to delete the cluster associated with the VMC if the VMC has a cluster id set in the status. 539 func (r *VerrazzanoManagedClusterReconciler) deleteClusterFromRancher(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 540 clusterID := vmc.Status.RancherRegistration.ClusterID 541 if clusterID == "" { 542 r.log.Debugf("VMC %s/%s has no Rancher cluster id, skipping delete", vmc.Namespace, vmc.Name) 543 return nil 544 } 545 546 rc, err := rancherutil.NewAdminRancherConfig(r.Client, r.RancherIngressHost, r.log) 547 if err != nil { 548 msg := "Failed to create Rancher API client" 549 r.updateRancherStatus(ctx, vmc, clustersv1alpha1.DeleteFailed, clusterID, msg) 550 r.log.Errorf("Unable to connect to Rancher API on admin cluster while attempting delete operation: %v", err) 551 return err 552 } 553 if _, err = DeleteClusterFromRancher(rc, clusterID, r.log); err != nil { 554 msg := "Failed deleting cluster" 555 r.updateRancherStatus(ctx, vmc, clustersv1alpha1.DeleteFailed, clusterID, msg) 556 r.log.Errorf("Unable to delete Rancher cluster %s/%s: %v", vmc.Namespace, vmc.Name, err) 557 return err 558 } 559 560 return nil 561 } 562 563 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionManagedCARetrieved(vmc *clustersv1alpha1.VerrazzanoManagedCluster, value corev1.ConditionStatus, msg string) { 564 now := metav1.Now() 565 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: value, Type: clustersv1alpha1.ConditionManagedCARetrieved, Message: msg, LastTransitionTime: &now}, false) 566 } 567 568 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionManifestPushed(vmc *clustersv1alpha1.VerrazzanoManagedCluster, value corev1.ConditionStatus, msg string) { 569 now := metav1.Now() 570 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: value, Type: clustersv1alpha1.ConditionManifestPushed, Message: msg, LastTransitionTime: &now}, true) 571 } 572 573 // setStatusConditionNotReady sets the status condition Ready = false on the VMC in memory - does NOT update the status in the cluster 574 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionNotReady(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string) { 575 now := metav1.Now() 576 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: corev1.ConditionFalse, Type: clustersv1alpha1.ConditionReady, Message: msg, LastTransitionTime: &now}, false) 577 } 578 579 // setStatusConditionReady sets the status condition Ready = true on the VMC in memory - does NOT update the status in the cluster 580 func (r *VerrazzanoManagedClusterReconciler) setStatusConditionReady(vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string) { 581 now := metav1.Now() 582 r.setStatusCondition(vmc, clustersv1alpha1.Condition{Status: corev1.ConditionTrue, Type: clustersv1alpha1.ConditionReady, Message: msg, LastTransitionTime: &now}, false) 583 } 584 585 func (r *VerrazzanoManagedClusterReconciler) handleError(ctx context.Context, vmc *clustersv1alpha1.VerrazzanoManagedCluster, msg string, err error, log vzlog.VerrazzanoLogger) { 586 fullMsg := fmt.Sprintf("%s: %v", msg, err) 587 log.ErrorfThrottled(fullMsg) 588 r.setStatusConditionNotReady(ctx, vmc, fullMsg) 589 statusErr := r.updateStatus(ctx, vmc) 590 if statusErr != nil { 591 log.ErrorfThrottled("Failed to update status for VMC %s: %v", vmc.Name, statusErr) 592 } 593 } 594 595 // setStatusCondition updates the VMC status conditions based and replaces already created status conditions 596 // the onTime flag updates the status condition if the time has changed 597 func (r *VerrazzanoManagedClusterReconciler) setStatusCondition(vmc *clustersv1alpha1.VerrazzanoManagedCluster, condition clustersv1alpha1.Condition, onTime bool) { 598 r.log.Debugf("Entered setStatusCondition for VMC %s for condition %s = %s, existing conditions = %v", 599 vmc.Name, condition.Type, condition.Status, vmc.Status.Conditions) 600 var matchingCondition *clustersv1alpha1.Condition 601 var conditionExists bool 602 for i, existingCondition := range vmc.Status.Conditions { 603 if condition.Type == existingCondition.Type && 604 condition.Status == existingCondition.Status && 605 condition.Message == existingCondition.Message && 606 (!onTime || condition.LastTransitionTime == existingCondition.LastTransitionTime) { 607 // the exact same condition already exists, don't update 608 conditionExists = true 609 break 610 } 611 if condition.Type == existingCondition.Type { 612 // use the index here since "existingCondition" is a copy and won't point to the object in the slice 613 matchingCondition = &vmc.Status.Conditions[i] 614 break 615 } 616 } 617 if !conditionExists { 618 619 if matchingCondition == nil { 620 vmc.Status.Conditions = append(vmc.Status.Conditions, condition) 621 } else { 622 matchingCondition.Message = condition.Message 623 matchingCondition.Status = condition.Status 624 matchingCondition.LastTransitionTime = condition.LastTransitionTime 625 } 626 } 627 } 628 629 // getVerrazzanoResource gets the installed Verrazzano resource in the cluster (of which only one is expected) 630 func (r *VerrazzanoManagedClusterReconciler) getVerrazzanoResource() (*v1beta1.Verrazzano, error) { 631 // Get the Verrazzano resource 632 verrazzano := v1beta1.VerrazzanoList{} 633 err := r.Client.List(context.TODO(), &verrazzano, &client.ListOptions{}) 634 if err != nil || len(verrazzano.Items) == 0 { 635 return nil, r.log.ErrorfNewErr("Verrazzano must be installed: %v", err) 636 637 } 638 return &verrazzano.Items[0], nil 639 } 640 641 // leveraged to replace method (unit testing) 642 var createClient = func(r *VerrazzanoManagedClusterReconciler, vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 643 const prometheusHostPrefix = "prometheus.vmi.system" 644 promHost := vmc.Status.PrometheusHost 645 // Skip Keycloak client generation if Prometheus isn't present in VMC status 646 // MCAgent on the managed cluster will set this if/when it is ready 647 if len(promHost) == 0 { 648 r.log.Debug("Skipping Prometheus Keycloak client creation: VMC Prometheus not found") 649 return nil 650 } 651 652 // login to keycloak 653 cfg, cli, err := k8sutil.ClientConfig() 654 if err != nil { 655 return err 656 } 657 658 // create a context that can be leveraged by keycloak method 659 ctx, err := spi.NewMinimalContext(r.Client, r.log) 660 if err != nil { 661 return err 662 } 663 664 err = keycloak.LoginKeycloak(ctx, cfg, cli) 665 if err != nil { 666 return err 667 } 668 669 dnsSubdomain := promHost[len(prometheusHostPrefix)+1:] 670 clientID := fmt.Sprintf("verrazzano-%s", vmc.Name) 671 err = keycloak.CreateOrUpdateClient(ctx, cfg, cli, clientID, keycloak.ManagedClusterClientTmpl, keycloak.ManagedClusterClientUrisTemplate, false, &dnsSubdomain) 672 if err != nil { 673 return err 674 } 675 676 return nil 677 } 678 679 // createManagedClusterKeycloakClient creates a Keycloak client for the managed cluster 680 func (r *VerrazzanoManagedClusterReconciler) createManagedClusterKeycloakClient(vmc *clustersv1alpha1.VerrazzanoManagedCluster) error { 681 return createClient(r, vmc) 682 } 683 684 // getClusterClient returns a controller runtime client configured for the workload cluster 685 func (r *VerrazzanoManagedClusterReconciler) getClusterClient(restConfig *rest.Config) (client.Client, error) { 686 scheme := runtime.NewScheme() 687 _ = rbacv1.AddToScheme(scheme) 688 _ = corev1.AddToScheme(scheme) 689 _ = netv1.AddToScheme(scheme) 690 _ = appsv1.AddToScheme(scheme) 691 _ = clustersv1alpha1.AddToScheme(scheme) 692 693 return client.New(restConfig, client.Options{Scheme: scheme}) 694 } 695 696 // getWorkloadClusterKubeconfig returns a kubeconfig for accessing the workload cluster 697 func (r *VerrazzanoManagedClusterReconciler) getWorkloadClusterKubeconfig(cluster *unstructured.Unstructured) ([]byte, error) { 698 // get the cluster kubeconfig 699 kubeconfigSecret := &corev1.Secret{} 700 err := r.Client.Get(context.TODO(), types.NamespacedName{Name: fmt.Sprintf("%s-kubeconfig", cluster.GetName()), Namespace: cluster.GetNamespace()}, kubeconfigSecret) 701 if err != nil { 702 r.log.Progressf("failed to obtain workload cluster kubeconfig resource. Re-queuing...") 703 return nil, err 704 } 705 kubeconfig, ok := kubeconfigSecret.Data["value"] 706 if !ok { 707 r.log.Error(err, "failed to read kubeconfig from resource") 708 return nil, fmt.Errorf("Unable to read kubeconfig from retrieved cluster resource") 709 } 710 711 return kubeconfig, nil 712 } 713 714 func (r *VerrazzanoManagedClusterReconciler) getWorkloadClusterClient(cluster *unstructured.Unstructured) (client.Client, error) { 715 // identify whether the workload cluster is using "untrusted" certs 716 kubeconfig, err := r.getWorkloadClusterKubeconfig(cluster) 717 if err != nil { 718 // requeue since we're waiting for cluster 719 return nil, err 720 } 721 // create a workload cluster client 722 // create workload cluster client 723 restConfig, err := clientcmd.RESTConfigFromKubeConfig(kubeconfig) 724 if err != nil { 725 r.log.Progress("Failed getting rest config from workload kubeconfig") 726 return nil, err 727 } 728 workloadClient, err := r.getClusterClient(restConfig) 729 if err != nil { 730 return nil, err 731 } 732 return workloadClient, nil 733 } 734 735 // Create a new Result that will cause a reconcile requeue after a short delay 736 func newRequeueWithDelay() ctrl.Result { 737 return vzctrl.NewRequeueWithDelay(2, 3, time.Second) 738 } 739 740 func getClusterResourceName(cluster *unstructured.Unstructured, client client.Client) string { 741 // check for existence of a Rancher cluster management resource 742 rancherMgmtCluster := &unstructured.Unstructured{} 743 rancherMgmtCluster.SetGroupVersionKind(common.GetRancherMgmtAPIGVKForKind("Cluster")) 744 err := client.Get(context.TODO(), types.NamespacedName{Name: cluster.GetName(), Namespace: cluster.GetNamespace()}, rancherMgmtCluster) 745 if err != nil { 746 return cluster.GetName() 747 } 748 // return the display Name 749 return rancherMgmtCluster.UnstructuredContent()["spec"].(map[string]interface{})["displayName"].(string) 750 }