github.com/verrazzano/verrazzano@v1.7.0/application-operator/mcagent/mcagent.go (about) 1 // Copyright (c) 2021, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package mcagent 5 6 import ( 7 "context" 8 "fmt" 9 "os" 10 11 apierrors "k8s.io/apimachinery/pkg/api/errors" 12 ctrl "sigs.k8s.io/controller-runtime" 13 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 14 "sigs.k8s.io/controller-runtime/pkg/event" 15 "sigs.k8s.io/controller-runtime/pkg/predicate" 16 17 oamv1alpha2 "github.com/crossplane/oam-kubernetes-runtime/apis/core/v1alpha2" 18 clustersv1alpha1 "github.com/verrazzano/verrazzano/application-operator/apis/clusters/v1alpha1" 19 "github.com/verrazzano/verrazzano/application-operator/constants" 20 "github.com/verrazzano/verrazzano/application-operator/controllers/clusters" 21 "github.com/verrazzano/verrazzano/cluster-operator/apis/clusters/v1alpha1" 22 "github.com/verrazzano/verrazzano/pkg/mcconstants" 23 "go.uber.org/zap" 24 corev1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/runtime" 26 "k8s.io/apimachinery/pkg/types" 27 "k8s.io/client-go/tools/clientcmd" 28 "sigs.k8s.io/controller-runtime/pkg/client" 29 ) 30 31 // ENV VAR for registration secret version 32 const ( 33 registrationSecretVersion = "REGISTRATION_SECRET_VERSION" 34 cattleAgentHashData = "cattle-agent-hash" // the data field name for the cattleAgentHash in the agent state configmap 35 requeueDelayMinSeconds = 50 36 requeueDelayMaxSeconds = 70 37 ) 38 39 // Name of config map that stores mc agent state 40 var mcAgentStateConfigMapName = types.NamespacedName{Name: "mc-agent-state", Namespace: constants.VerrazzanoMultiClusterNamespace} 41 42 var getAdminClientFunc = createAdminClient 43 44 var mcAppConfCRDName = fmt.Sprintf("%s.%s", clustersv1alpha1.MultiClusterAppConfigResource, clustersv1alpha1.SchemeGroupVersion.Group) 45 46 // Reconciler reconciles one iteration of the Managed cluster agent 47 type Reconciler struct { 48 client.Client 49 Log *zap.SugaredLogger 50 Scheme *runtime.Scheme 51 AgentChannel chan clusters.StatusUpdateMessage 52 } 53 54 // SetupWithManager registers our controller with the manager 55 func (r *Reconciler) SetupWithManager(mgr ctrl.Manager) error { 56 return ctrl.NewControllerManagedBy(mgr). 57 For(&corev1.Secret{}). 58 WithEventFilter(r.createAgentPredicate()). 59 Complete(r) 60 } 61 62 func (r *Reconciler) createAgentPredicate() predicate.Predicate { 63 return predicate.Funcs{ 64 CreateFunc: func(e event.CreateEvent) bool { 65 return r.isAgentSecret(e.Object) 66 }, 67 DeleteFunc: func(e event.DeleteEvent) bool { 68 return r.isAgentSecret(e.Object) 69 }, 70 UpdateFunc: func(e event.UpdateEvent) bool { 71 return r.isAgentSecret(e.ObjectNew) 72 }, 73 } 74 } 75 76 func (r *Reconciler) isAgentSecret(object client.Object) bool { 77 return object.GetNamespace() == constants.VerrazzanoSystemNamespace && object.GetName() == constants.MCAgentSecret 78 } 79 80 func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 81 // Get the agent secret 82 agentSecret := corev1.Secret{} 83 if err := r.Get(ctx, req.NamespacedName, &agentSecret); err != nil { 84 // there is no admin cluster we are connected to, so nowhere to send any status updates 85 // received - discard them 86 discardStatusMessages(r.AgentChannel) 87 return clusters.IgnoreNotFoundWithLog(err, r.Log) 88 } 89 if agentSecret.DeletionTimestamp != nil { 90 r.Log.Debugf("the secret %v was deleted", req.NamespacedName) 91 // there is no admin cluster we are connected to, so nowhere to send any status updates 92 // received - discard them 93 discardStatusMessages(r.AgentChannel) 94 return clusters.NewRequeueWithRandomDelay(requeueDelayMinSeconds, requeueDelayMaxSeconds), nil 95 } 96 if err := validateAgentSecret(&agentSecret); err != nil { 97 // agent secret is invalid - log and also discard status messages on the channel since there 98 // is no valid admin cluster to send status updates to 99 discardStatusMessages(r.AgentChannel) 100 return clusters.NewRequeueWithRandomDelay(requeueDelayMinSeconds, requeueDelayMaxSeconds), fmt.Errorf("Agent secret validation failed: %v", err) 101 } 102 r.Log.Debug("Reconciling multi-cluster agent") 103 104 // Process one iteration of the agent thread 105 err := r.doReconcile(ctx, agentSecret) 106 if err != nil { 107 r.Log.Errorf("failed processing multi-cluster resources: %v", err) 108 } 109 return clusters.NewRequeueWithRandomDelay(requeueDelayMinSeconds, requeueDelayMaxSeconds), nil 110 } 111 112 // doReconcile - process one iteration of the agent thread 113 func (r *Reconciler) doReconcile(ctx context.Context, agentSecret corev1.Secret) error { 114 managedClusterName := string(agentSecret.Data[constants.ClusterNameData]) 115 116 // Create the discovery client for the managed cluster 117 localDiscoveryClient, err := getDiscoveryClientFunc() 118 if err != nil { 119 return fmt.Errorf("failed to get discovery client for this workload cluster: %v", err) 120 } 121 122 // Initialize the syncer object 123 s := &Syncer{ 124 LocalClient: r.Client, 125 LocalDiscoveryClient: localDiscoveryClient, 126 Log: r.Log, 127 Context: ctx, 128 ProjectNamespaces: []string{}, 129 StatusUpdateChannel: r.AgentChannel, 130 ManagedClusterName: managedClusterName, 131 } 132 133 // Read current agent state from config map 134 mcAgentStateConfigMap := corev1.ConfigMap{Data: map[string]string{}} 135 if err := r.Get(ctx, mcAgentStateConfigMapName, &mcAgentStateConfigMap); client.IgnoreNotFound(err) != nil { 136 return fmt.Errorf("failed to get the agent state config map %v: %v", mcAgentStateConfigMapName, err) 137 } 138 139 // Create the client for accessing the admin cluster 140 adminClient, err := getAdminClientFunc(&agentSecret) 141 // If we are unauthorized to create a client on the admin cluster 142 // the cluster must have been deregistered 143 if apierrors.IsUnauthorized(err) { 144 return s.syncDeregistration() 145 } 146 if err != nil { 147 return fmt.Errorf("failed to get the client for cluster %q with error %v", managedClusterName, err) 148 } 149 s.AdminClient = adminClient 150 151 // Sync cattle-cluster-agent deployment which will set the new cattleAgentHash on the Syncer 152 cattleAgentHashValue, err := s.syncCattleClusterAgent(mcAgentStateConfigMap.Data[cattleAgentHashData], "") 153 if err != nil { 154 // we couldn't sync the cattle-cluster-agent - but we should keep going with the rest of the work 155 r.Log.Errorf("Failed to synchronize cattle-cluster-agent: %v", err) 156 } 157 158 // Update mc-agent-state config map with the managed cluster name or cattle agent hash if needed 159 if err := r.updateMCAgentStateConfigMap(ctx, managedClusterName, cattleAgentHashValue); err != nil { 160 return err 161 } 162 163 // Update all Prometheus monitors relabel configs in all namespaces with new cluster name if needed 164 err = s.updatePrometheusMonitorsClusterName() 165 if err != nil { 166 return fmt.Errorf("failed to update the cluster name to %s on Prometheus monitor resources with error %v", s.ManagedClusterName, err) 167 } 168 169 // Update the status of our VMC on the admin cluster to record the last time we connected 170 // and update other fields of in the VMC status 171 err = s.updateVMCStatus() 172 if err != nil { 173 // we couldn't update status of the VMC - but we should keep going with the rest of the work 174 r.Log.Errorf("Failed to update VMC status on admin cluster: %v", err) 175 } 176 177 // Sync multi-cluster objects 178 s.SyncMultiClusterResources() 179 180 // Delete the managed cluster resources if deregistration occurs 181 err = s.syncDeregistration() 182 if err != nil { 183 // we couldn't delete the managed cluster resources - but we should keep going with the rest of the work 184 r.Log.Errorf("Failed to sync the deregistration process: %v", err) 185 } 186 187 // Check whether the admin or local clusters' CA certs have rolled, and sync as necessary 188 _, err = s.syncClusterCAs() 189 if err != nil { 190 // we couldn't sync the cluster CAs - but we should keep going with the rest of the work 191 r.Log.Errorf("Failed to synchronize cluster CA certificates: %v", err) 192 } 193 194 return nil 195 } 196 197 // updateMCAgentStateConfigMap updates the managed cluster name and cattle agent hash in the 198 // agent state config map if those have changed from what was there before 199 func (r *Reconciler) updateMCAgentStateConfigMap(ctx context.Context, managedClusterName string, cattleAgentHashValue string) error { 200 // create the ConfigMap's namespace if it doesn't already exist 201 mcAgentStateNamespace := corev1.Namespace{} 202 mcAgentStateNamespace.Name = mcAgentStateConfigMapName.Namespace 203 _, err := controllerutil.CreateOrUpdate(ctx, r.Client, &mcAgentStateNamespace, func() error { return nil }) 204 if err != nil { 205 return fmt.Errorf("failed to create namespace %s: %v", mcAgentStateConfigMapName.Namespace, err) 206 } 207 208 mcAgentStateConfigMap := corev1.ConfigMap{} 209 mcAgentStateConfigMap.Name = mcAgentStateConfigMapName.Name 210 mcAgentStateConfigMap.Namespace = mcAgentStateConfigMapName.Namespace 211 _, err = controllerutil.CreateOrUpdate(ctx, r.Client, &mcAgentStateConfigMap, func() error { 212 if mcAgentStateConfigMap.Data == nil { 213 mcAgentStateConfigMap.Data = map[string]string{} 214 } 215 existingClusterName := mcAgentStateConfigMap.Data[constants.ClusterNameData] 216 if existingClusterName != managedClusterName { 217 // Log the cluster name only if it changes 218 r.Log.Infof("Cluster name changed from '%q' to '%q', updating the agent state ConfigMap", existingClusterName, managedClusterName) 219 mcAgentStateConfigMap.Data[constants.ClusterNameData] = managedClusterName 220 } 221 existingCattleAgentHash := mcAgentStateConfigMap.Data[cattleAgentHashData] 222 if existingCattleAgentHash != cattleAgentHashValue { 223 // Log that the cattle agent hash has changed 224 r.Log.Infof("The %s has changed, updating the agent state ConfigMap", cattleAgentHashData) 225 mcAgentStateConfigMap.Data[cattleAgentHashData] = cattleAgentHashValue 226 } 227 return nil 228 }) 229 if err != nil { 230 return fmt.Errorf("failed to update agent state in ConfigMap %v: %v", mcAgentStateConfigMapName, err) 231 } 232 return nil 233 } 234 235 // Validate the agent secret 236 func validateAgentSecret(secret *corev1.Secret) error { 237 // The secret must contain a cluster name 238 _, ok := secret.Data[constants.ClusterNameData] 239 if !ok { 240 return fmt.Errorf("the secret named %s in namespace %s is missing the required field %s", secret.Name, secret.Namespace, constants.ClusterNameData) 241 } 242 243 // The secret must contain a kubeconfig 244 _, ok = secret.Data[mcconstants.KubeconfigKey] 245 if !ok { 246 return fmt.Errorf("the secret named %s in namespace %s is missing the required field %s", secret.Name, secret.Namespace, mcconstants.KubeconfigKey) 247 } 248 249 return nil 250 } 251 252 // Get the clientset for accessing the admin cluster 253 func createAdminClient(secret *corev1.Secret) (client.Client, error) { 254 // Create a temp file that contains the kubeconfig 255 tmpFile, err := os.CreateTemp("", "kubeconfig") 256 if err != nil { 257 return nil, err 258 } 259 260 err = os.WriteFile(tmpFile.Name(), secret.Data[mcconstants.KubeconfigKey], 0600) 261 defer os.Remove(tmpFile.Name()) 262 if err != nil { 263 return nil, err 264 } 265 266 config, err := clientcmd.BuildConfigFromFlags("", tmpFile.Name()) 267 if err != nil { 268 return nil, err 269 } 270 scheme := runtime.NewScheme() 271 _ = clustersv1alpha1.AddToScheme(scheme) 272 _ = v1alpha1.AddToScheme(scheme) 273 _ = oamv1alpha2.SchemeBuilder.AddToScheme(scheme) 274 _ = corev1.SchemeBuilder.AddToScheme(scheme) 275 276 clientset, err := client.New(config, client.Options{Scheme: scheme}) 277 if err != nil { 278 return nil, err 279 } 280 281 return clientset, nil 282 } 283 284 func getEnvValue(containers *[]corev1.Container, envName string) string { 285 for _, container := range *containers { 286 for _, env := range container.Env { 287 if env.Name == envName { 288 return env.Value 289 } 290 } 291 } 292 return "" 293 } 294 295 func updateEnvValue(envs []corev1.EnvVar, envName string, newValue string) []corev1.EnvVar { 296 for i, env := range envs { 297 if env.Name == envName { 298 envs[i].Value = newValue 299 return envs 300 } 301 } 302 return append(envs, corev1.EnvVar{Name: envName, Value: newValue}) 303 } 304 305 // discardStatusMessages discards all messages in the statusUpdateChannel - this will 306 // prevent the channel buffer from filling up in the case of a non-managed cluster 307 func discardStatusMessages(statusUpdateChannel chan clusters.StatusUpdateMessage) { 308 length := len(statusUpdateChannel) 309 for i := 0; i < length; i++ { 310 <-statusUpdateChannel 311 } 312 }