istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/kube/multicluster/secretcontroller.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package multicluster 16 17 import ( 18 "bytes" 19 "crypto/sha256" 20 "fmt" 21 "time" 22 23 "github.com/hashicorp/go-multierror" 24 "go.uber.org/atomic" 25 corev1 "k8s.io/api/core/v1" 26 "k8s.io/apimachinery/pkg/types" 27 "k8s.io/client-go/kubernetes" 28 "k8s.io/client-go/rest" 29 30 "istio.io/istio/pilot/pkg/features" 31 "istio.io/istio/pkg/cluster" 32 "istio.io/istio/pkg/config/mesh" 33 "istio.io/istio/pkg/kube" 34 "istio.io/istio/pkg/kube/controllers" 35 "istio.io/istio/pkg/kube/kclient" 36 "istio.io/istio/pkg/log" 37 "istio.io/istio/pkg/monitoring" 38 ) 39 40 const ( 41 MultiClusterSecretLabel = "istio/multiCluster" 42 ) 43 44 var ( 45 clusterLabel = monitoring.CreateLabel("cluster") 46 timeouts = monitoring.NewSum( 47 "remote_cluster_sync_timeouts_total", 48 "Number of times remote clusters took too long to sync, causing slow startup that excludes remote clusters.", 49 ) 50 51 clusterType = monitoring.CreateLabel("cluster_type") 52 53 clustersCount = monitoring.NewGauge( 54 "istiod_managed_clusters", 55 "Number of clusters managed by istiod", 56 ) 57 58 localClusters = clustersCount.With(clusterType.Value("local")) 59 remoteClusters = clustersCount.With(clusterType.Value("remote")) 60 ) 61 62 type handler interface { 63 clusterAdded(cluster *Cluster) ComponentConstraint 64 clusterUpdated(cluster *Cluster) ComponentConstraint 65 clusterDeleted(clusterID cluster.ID) 66 HasSynced() bool 67 } 68 69 // ClientBuilder builds a new kube.Client from a kubeconfig. Mocked out for testing 70 type ClientBuilder = func(kubeConfig []byte, clusterId cluster.ID, configOverrides ...func(*rest.Config)) (kube.Client, error) 71 72 // Controller is the controller implementation for Secret resources 73 type Controller struct { 74 namespace string 75 configClusterID cluster.ID 76 configCluster *Cluster 77 configClusterSyncers []ComponentConstraint 78 79 ClientBuilder ClientBuilder 80 81 queue controllers.Queue 82 secrets kclient.Client[*corev1.Secret] 83 configOverrides []func(*rest.Config) 84 85 cs *ClusterStore 86 87 meshWatcher mesh.Watcher 88 handlers []handler 89 } 90 91 // NewController returns a new secret controller 92 func NewController(kubeclientset kube.Client, namespace string, clusterID cluster.ID, 93 meshWatcher mesh.Watcher, configOverrides ...func(*rest.Config), 94 ) *Controller { 95 informerClient := kubeclientset 96 97 // When these two are set to true, Istiod will be watching the namespace in which 98 // Istiod is running on the external cluster. Use the inCluster credentials to 99 // create a kubeclientset 100 if features.LocalClusterSecretWatcher && features.ExternalIstiod { 101 config, err := kube.InClusterConfig(configOverrides...) 102 if err != nil { 103 log.Errorf("Could not get istiod incluster configuration: %v", err) 104 return nil 105 } 106 log.Info("Successfully retrieved incluster config.") 107 108 localKubeClient, err := kube.NewClient(kube.NewClientConfigForRestConfig(config), clusterID) 109 if err != nil { 110 log.Errorf("Could not create a client to access local cluster API server: %v", err) 111 return nil 112 } 113 log.Infof("Successfully created in cluster kubeclient at %s", localKubeClient.RESTConfig().Host) 114 informerClient = localKubeClient 115 } 116 117 secrets := kclient.NewFiltered[*corev1.Secret](informerClient, kclient.Filter{ 118 Namespace: namespace, 119 LabelSelector: MultiClusterSecretLabel + "=true", 120 }) 121 122 // init gauges 123 localClusters.Record(1.0) 124 remoteClusters.Record(0.0) 125 126 controller := &Controller{ 127 ClientBuilder: DefaultBuildClientsFromConfig, 128 namespace: namespace, 129 configClusterID: clusterID, 130 configCluster: &Cluster{Client: kubeclientset, ID: clusterID}, 131 cs: newClustersStore(), 132 secrets: secrets, 133 configOverrides: configOverrides, 134 meshWatcher: meshWatcher, 135 } 136 137 // Queue does NOT retry. The only error that can occur is if the kubeconfig is 138 // malformed. This is a static analysis that cannot be resolved by retry. Actual 139 // connectivity issues would result in HasSynced returning false rather than an 140 // error. In this case, things will be retried automatically (via informers or 141 // others), and the time is capped by RemoteClusterTimeout). 142 controller.queue = controllers.NewQueue("multicluster secret", 143 controllers.WithReconciler(controller.processItem)) 144 145 secrets.AddEventHandler(controllers.ObjectHandler(controller.queue.AddObject)) 146 return controller 147 } 148 149 type ComponentBuilder interface { 150 registerHandler(h handler) 151 } 152 153 // BuildMultiClusterComponent constructs a new multicluster component. For each cluster, the constructor will be called. 154 // If the cluster is removed, the T.Close() method will be called. 155 // Constructors MUST not do blocking IO; they will block other operations. 156 // During a cluster update, a new component is constructed before the old one is removed for seamless migration. 157 func BuildMultiClusterComponent[T ComponentConstraint](c ComponentBuilder, constructor func(cluster *Cluster) T) *Component[T] { 158 comp := &Component[T]{ 159 constructor: constructor, 160 clusters: make(map[cluster.ID]T), 161 } 162 c.registerHandler(comp) 163 return comp 164 } 165 166 func (c *Controller) registerHandler(h handler) { 167 // Intentionally no lock. The controller today requires that handlers are registered before execution and not in parallel. 168 c.handlers = append(c.handlers, h) 169 } 170 171 // Run starts the controller until it receives a message over stopCh 172 func (c *Controller) Run(stopCh <-chan struct{}) error { 173 // run handlers for the config cluster; do not store this *Cluster in the ClusterStore or give it a SyncTimeout 174 // this is done outside the goroutine, we should block other Run/startFuncs until this is registered 175 c.configClusterSyncers = c.handleAdd(c.configCluster) 176 go func() { 177 t0 := time.Now() 178 log.Info("Starting multicluster remote secrets controller") 179 // we need to start here when local cluster secret watcher enabled 180 if features.LocalClusterSecretWatcher && features.ExternalIstiod { 181 c.secrets.Start(stopCh) 182 } 183 if !kube.WaitForCacheSync("multicluster remote secrets", stopCh, c.secrets.HasSynced) { 184 return 185 } 186 log.Infof("multicluster remote secrets controller cache synced in %v", time.Since(t0)) 187 c.queue.Run(stopCh) 188 c.handleDelete(c.configClusterID) 189 }() 190 return nil 191 } 192 193 func (c *Controller) HasSynced() bool { 194 if !c.queue.HasSynced() { 195 log.Debug("secret controller did not sync secrets presented at startup") 196 // we haven't finished processing the secrets that were present at startup 197 return false 198 } 199 // Check all config cluster components are synced 200 // c.ConfigClusterHandler.HasSynced does not work; config cluster is handle specially 201 if !kube.AllSynced(c.configClusterSyncers) { 202 return false 203 } 204 // Check all remote clusters are synced (or timed out) 205 return c.cs.HasSynced() 206 } 207 208 func (c *Controller) processItem(key types.NamespacedName) error { 209 log.Infof("processing secret event for secret %s", key) 210 scrt := c.secrets.Get(key.Name, key.Namespace) 211 if scrt != nil { 212 log.Debugf("secret %s exists in informer cache, processing it", key) 213 if err := c.addSecret(key, scrt); err != nil { 214 return fmt.Errorf("error adding secret %s: %v", key, err) 215 } 216 } else { 217 log.Debugf("secret %s does not exist in informer cache, deleting it", key) 218 c.deleteSecret(key.String()) 219 } 220 remoteClusters.Record(float64(c.cs.Len())) 221 222 return nil 223 } 224 225 // DefaultBuildClientsFromConfig creates kube.Clients from the provided kubeconfig. This is overridden for testing only 226 func DefaultBuildClientsFromConfig(kubeConfig []byte, clusterID cluster.ID, configOverrides ...func(*rest.Config)) (kube.Client, error) { 227 restConfig, err := kube.NewUntrustedRestConfig(kubeConfig, configOverrides...) 228 if err != nil { 229 return nil, err 230 } 231 232 clients, err := kube.NewClient(kube.NewClientConfigForRestConfig(restConfig), clusterID) 233 if err != nil { 234 return nil, fmt.Errorf("failed to create kube clients: %v", err) 235 } 236 if features.WorkloadEntryCrossCluster { 237 clients = kube.EnableCrdWatcher(clients) 238 } 239 240 return clients, nil 241 } 242 243 func (c *Controller) createRemoteCluster(kubeConfig []byte, clusterID string) (*Cluster, error) { 244 clients, err := c.ClientBuilder(kubeConfig, cluster.ID(clusterID), c.configOverrides...) 245 if err != nil { 246 return nil, err 247 } 248 return &Cluster{ 249 ID: cluster.ID(clusterID), 250 Client: clients, 251 stop: make(chan struct{}), 252 // for use inside the package, to close on cleanup 253 initialSync: atomic.NewBool(false), 254 initialSyncTimeout: atomic.NewBool(false), 255 kubeConfigSha: sha256.Sum256(kubeConfig), 256 }, nil 257 } 258 259 func (c *Controller) addSecret(name types.NamespacedName, s *corev1.Secret) error { 260 secretKey := name.String() 261 // First delete clusters 262 existingClusters := c.cs.GetExistingClustersFor(secretKey) 263 for _, existingCluster := range existingClusters { 264 if _, ok := s.Data[string(existingCluster.ID)]; !ok { 265 c.deleteCluster(secretKey, existingCluster) 266 } 267 } 268 269 var errs *multierror.Error 270 for clusterID, kubeConfig := range s.Data { 271 logger := log.WithLabels("cluster", clusterID, "secret", secretKey) 272 if cluster.ID(clusterID) == c.configClusterID { 273 logger.Infof("ignoring cluster as it would overwrite the config cluster") 274 continue 275 } 276 277 action := Add 278 if prev := c.cs.Get(secretKey, cluster.ID(clusterID)); prev != nil { 279 action = Update 280 // clusterID must be unique even across multiple secrets 281 kubeConfigSha := sha256.Sum256(kubeConfig) 282 if bytes.Equal(kubeConfigSha[:], prev.kubeConfigSha[:]) { 283 logger.Infof("skipping update (kubeconfig are identical)") 284 continue 285 } 286 // stop previous remote cluster 287 prev.Stop() 288 } else if c.cs.Contains(cluster.ID(clusterID)) { 289 // if the cluster has been registered before by another secret, ignore the new one. 290 logger.Warnf("cluster has already been registered") 291 continue 292 } 293 logger.Infof("%s cluster", action) 294 295 remoteCluster, err := c.createRemoteCluster(kubeConfig, clusterID) 296 if err != nil { 297 logger.Errorf("%s cluster: create remote cluster failed: %v", action, err) 298 errs = multierror.Append(errs, err) 299 continue 300 } 301 // We run cluster async so we do not block, as this requires actually connecting to the cluster and loading configuration. 302 c.cs.Store(secretKey, remoteCluster.ID, remoteCluster) 303 go func() { 304 remoteCluster.Run(c.meshWatcher, c.handlers, action) 305 }() 306 } 307 308 log.Infof("Number of remote clusters: %d", c.cs.Len()) 309 return errs.ErrorOrNil() 310 } 311 312 func (c *Controller) deleteSecret(secretKey string) { 313 for _, cluster := range c.cs.GetExistingClustersFor(secretKey) { 314 if cluster.ID == c.configClusterID { 315 log.Infof("ignoring delete cluster %v from secret %v as it would overwrite the config cluster", c.configClusterID, secretKey) 316 continue 317 } 318 319 c.deleteCluster(secretKey, cluster) 320 } 321 322 log.Infof("Number of remote clusters: %d", c.cs.Len()) 323 } 324 325 func (c *Controller) deleteCluster(secretKey string, cluster *Cluster) { 326 log.Infof("Deleting cluster_id=%v configured by secret=%v", cluster.ID, secretKey) 327 cluster.Stop() 328 c.handleDelete(cluster.ID) 329 c.cs.Delete(secretKey, cluster.ID) 330 331 log.Infof("Number of remote clusters: %d", c.cs.Len()) 332 } 333 334 func (c *Controller) handleAdd(cluster *Cluster) []ComponentConstraint { 335 syncers := make([]ComponentConstraint, 0, len(c.handlers)) 336 for _, handler := range c.handlers { 337 syncers = append(syncers, handler.clusterAdded(cluster)) 338 } 339 return syncers 340 } 341 342 func (c *Controller) handleDelete(key cluster.ID) { 343 for _, handler := range c.handlers { 344 handler.clusterDeleted(key) 345 } 346 } 347 348 // ListRemoteClusters provides debug info about connected remote clusters. 349 func (c *Controller) ListRemoteClusters() []cluster.DebugInfo { 350 // Start with just the config cluster 351 configCluster := "syncing" 352 if kube.AllSynced(c.configClusterSyncers) { 353 configCluster = "synced" 354 } 355 out := []cluster.DebugInfo{{ 356 ID: c.configClusterID, 357 SyncStatus: configCluster, 358 }} 359 // Append each cluster derived from secrets 360 for secretName, clusters := range c.cs.All() { 361 for clusterID, c := range clusters { 362 syncStatus := "syncing" 363 if c.Closed() { 364 syncStatus = "closed" 365 } else if c.SyncDidTimeout() { 366 syncStatus = "timeout" 367 } else if c.HasSynced() { 368 syncStatus = "synced" 369 } 370 out = append(out, cluster.DebugInfo{ 371 ID: clusterID, 372 SecretName: secretName, 373 SyncStatus: syncStatus, 374 }) 375 } 376 } 377 return out 378 } 379 380 func (c *Controller) GetRemoteKubeClient(clusterID cluster.ID) kubernetes.Interface { 381 if remoteCluster := c.cs.GetByID(clusterID); remoteCluster != nil { 382 return remoteCluster.Client.Kube() 383 } 384 return nil 385 }