istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/kube/multicluster/secretcontroller.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package multicluster
    16  
    17  import (
    18  	"bytes"
    19  	"crypto/sha256"
    20  	"fmt"
    21  	"time"
    22  
    23  	"github.com/hashicorp/go-multierror"
    24  	"go.uber.org/atomic"
    25  	corev1 "k8s.io/api/core/v1"
    26  	"k8s.io/apimachinery/pkg/types"
    27  	"k8s.io/client-go/kubernetes"
    28  	"k8s.io/client-go/rest"
    29  
    30  	"istio.io/istio/pilot/pkg/features"
    31  	"istio.io/istio/pkg/cluster"
    32  	"istio.io/istio/pkg/config/mesh"
    33  	"istio.io/istio/pkg/kube"
    34  	"istio.io/istio/pkg/kube/controllers"
    35  	"istio.io/istio/pkg/kube/kclient"
    36  	"istio.io/istio/pkg/log"
    37  	"istio.io/istio/pkg/monitoring"
    38  )
    39  
    40  const (
    41  	MultiClusterSecretLabel = "istio/multiCluster"
    42  )
    43  
    44  var (
    45  	clusterLabel = monitoring.CreateLabel("cluster")
    46  	timeouts     = monitoring.NewSum(
    47  		"remote_cluster_sync_timeouts_total",
    48  		"Number of times remote clusters took too long to sync, causing slow startup that excludes remote clusters.",
    49  	)
    50  
    51  	clusterType = monitoring.CreateLabel("cluster_type")
    52  
    53  	clustersCount = monitoring.NewGauge(
    54  		"istiod_managed_clusters",
    55  		"Number of clusters managed by istiod",
    56  	)
    57  
    58  	localClusters  = clustersCount.With(clusterType.Value("local"))
    59  	remoteClusters = clustersCount.With(clusterType.Value("remote"))
    60  )
    61  
    62  type handler interface {
    63  	clusterAdded(cluster *Cluster) ComponentConstraint
    64  	clusterUpdated(cluster *Cluster) ComponentConstraint
    65  	clusterDeleted(clusterID cluster.ID)
    66  	HasSynced() bool
    67  }
    68  
    69  // ClientBuilder builds a new kube.Client from a kubeconfig. Mocked out for testing
    70  type ClientBuilder = func(kubeConfig []byte, clusterId cluster.ID, configOverrides ...func(*rest.Config)) (kube.Client, error)
    71  
    72  // Controller is the controller implementation for Secret resources
    73  type Controller struct {
    74  	namespace            string
    75  	configClusterID      cluster.ID
    76  	configCluster        *Cluster
    77  	configClusterSyncers []ComponentConstraint
    78  
    79  	ClientBuilder ClientBuilder
    80  
    81  	queue           controllers.Queue
    82  	secrets         kclient.Client[*corev1.Secret]
    83  	configOverrides []func(*rest.Config)
    84  
    85  	cs *ClusterStore
    86  
    87  	meshWatcher mesh.Watcher
    88  	handlers    []handler
    89  }
    90  
    91  // NewController returns a new secret controller
    92  func NewController(kubeclientset kube.Client, namespace string, clusterID cluster.ID,
    93  	meshWatcher mesh.Watcher, configOverrides ...func(*rest.Config),
    94  ) *Controller {
    95  	informerClient := kubeclientset
    96  
    97  	// When these two are set to true, Istiod will be watching the namespace in which
    98  	// Istiod is running on the external cluster. Use the inCluster credentials to
    99  	// create a kubeclientset
   100  	if features.LocalClusterSecretWatcher && features.ExternalIstiod {
   101  		config, err := kube.InClusterConfig(configOverrides...)
   102  		if err != nil {
   103  			log.Errorf("Could not get istiod incluster configuration: %v", err)
   104  			return nil
   105  		}
   106  		log.Info("Successfully retrieved incluster config.")
   107  
   108  		localKubeClient, err := kube.NewClient(kube.NewClientConfigForRestConfig(config), clusterID)
   109  		if err != nil {
   110  			log.Errorf("Could not create a client to access local cluster API server: %v", err)
   111  			return nil
   112  		}
   113  		log.Infof("Successfully created in cluster kubeclient at %s", localKubeClient.RESTConfig().Host)
   114  		informerClient = localKubeClient
   115  	}
   116  
   117  	secrets := kclient.NewFiltered[*corev1.Secret](informerClient, kclient.Filter{
   118  		Namespace:     namespace,
   119  		LabelSelector: MultiClusterSecretLabel + "=true",
   120  	})
   121  
   122  	// init gauges
   123  	localClusters.Record(1.0)
   124  	remoteClusters.Record(0.0)
   125  
   126  	controller := &Controller{
   127  		ClientBuilder:   DefaultBuildClientsFromConfig,
   128  		namespace:       namespace,
   129  		configClusterID: clusterID,
   130  		configCluster:   &Cluster{Client: kubeclientset, ID: clusterID},
   131  		cs:              newClustersStore(),
   132  		secrets:         secrets,
   133  		configOverrides: configOverrides,
   134  		meshWatcher:     meshWatcher,
   135  	}
   136  
   137  	// Queue does NOT retry. The only error that can occur is if the kubeconfig is
   138  	// malformed. This is a static analysis that cannot be resolved by retry. Actual
   139  	// connectivity issues would result in HasSynced returning false rather than an
   140  	// error. In this case, things will be retried automatically (via informers or
   141  	// others), and the time is capped by RemoteClusterTimeout).
   142  	controller.queue = controllers.NewQueue("multicluster secret",
   143  		controllers.WithReconciler(controller.processItem))
   144  
   145  	secrets.AddEventHandler(controllers.ObjectHandler(controller.queue.AddObject))
   146  	return controller
   147  }
   148  
   149  type ComponentBuilder interface {
   150  	registerHandler(h handler)
   151  }
   152  
   153  // BuildMultiClusterComponent constructs a new multicluster component. For each cluster, the constructor will be called.
   154  // If the cluster is removed, the T.Close() method will be called.
   155  // Constructors MUST not do blocking IO; they will block other operations.
   156  // During a cluster update, a new component is constructed before the old one is removed for seamless migration.
   157  func BuildMultiClusterComponent[T ComponentConstraint](c ComponentBuilder, constructor func(cluster *Cluster) T) *Component[T] {
   158  	comp := &Component[T]{
   159  		constructor: constructor,
   160  		clusters:    make(map[cluster.ID]T),
   161  	}
   162  	c.registerHandler(comp)
   163  	return comp
   164  }
   165  
   166  func (c *Controller) registerHandler(h handler) {
   167  	// Intentionally no lock. The controller today requires that handlers are registered before execution and not in parallel.
   168  	c.handlers = append(c.handlers, h)
   169  }
   170  
   171  // Run starts the controller until it receives a message over stopCh
   172  func (c *Controller) Run(stopCh <-chan struct{}) error {
   173  	// run handlers for the config cluster; do not store this *Cluster in the ClusterStore or give it a SyncTimeout
   174  	// this is done outside the goroutine, we should block other Run/startFuncs until this is registered
   175  	c.configClusterSyncers = c.handleAdd(c.configCluster)
   176  	go func() {
   177  		t0 := time.Now()
   178  		log.Info("Starting multicluster remote secrets controller")
   179  		// we need to start here when local cluster secret watcher enabled
   180  		if features.LocalClusterSecretWatcher && features.ExternalIstiod {
   181  			c.secrets.Start(stopCh)
   182  		}
   183  		if !kube.WaitForCacheSync("multicluster remote secrets", stopCh, c.secrets.HasSynced) {
   184  			return
   185  		}
   186  		log.Infof("multicluster remote secrets controller cache synced in %v", time.Since(t0))
   187  		c.queue.Run(stopCh)
   188  		c.handleDelete(c.configClusterID)
   189  	}()
   190  	return nil
   191  }
   192  
   193  func (c *Controller) HasSynced() bool {
   194  	if !c.queue.HasSynced() {
   195  		log.Debug("secret controller did not sync secrets presented at startup")
   196  		// we haven't finished processing the secrets that were present at startup
   197  		return false
   198  	}
   199  	// Check all config cluster components are synced
   200  	// c.ConfigClusterHandler.HasSynced does not work; config cluster is handle specially
   201  	if !kube.AllSynced(c.configClusterSyncers) {
   202  		return false
   203  	}
   204  	// Check all remote clusters are synced (or timed out)
   205  	return c.cs.HasSynced()
   206  }
   207  
   208  func (c *Controller) processItem(key types.NamespacedName) error {
   209  	log.Infof("processing secret event for secret %s", key)
   210  	scrt := c.secrets.Get(key.Name, key.Namespace)
   211  	if scrt != nil {
   212  		log.Debugf("secret %s exists in informer cache, processing it", key)
   213  		if err := c.addSecret(key, scrt); err != nil {
   214  			return fmt.Errorf("error adding secret %s: %v", key, err)
   215  		}
   216  	} else {
   217  		log.Debugf("secret %s does not exist in informer cache, deleting it", key)
   218  		c.deleteSecret(key.String())
   219  	}
   220  	remoteClusters.Record(float64(c.cs.Len()))
   221  
   222  	return nil
   223  }
   224  
   225  // DefaultBuildClientsFromConfig creates kube.Clients from the provided kubeconfig. This is overridden for testing only
   226  func DefaultBuildClientsFromConfig(kubeConfig []byte, clusterID cluster.ID, configOverrides ...func(*rest.Config)) (kube.Client, error) {
   227  	restConfig, err := kube.NewUntrustedRestConfig(kubeConfig, configOverrides...)
   228  	if err != nil {
   229  		return nil, err
   230  	}
   231  
   232  	clients, err := kube.NewClient(kube.NewClientConfigForRestConfig(restConfig), clusterID)
   233  	if err != nil {
   234  		return nil, fmt.Errorf("failed to create kube clients: %v", err)
   235  	}
   236  	if features.WorkloadEntryCrossCluster {
   237  		clients = kube.EnableCrdWatcher(clients)
   238  	}
   239  
   240  	return clients, nil
   241  }
   242  
   243  func (c *Controller) createRemoteCluster(kubeConfig []byte, clusterID string) (*Cluster, error) {
   244  	clients, err := c.ClientBuilder(kubeConfig, cluster.ID(clusterID), c.configOverrides...)
   245  	if err != nil {
   246  		return nil, err
   247  	}
   248  	return &Cluster{
   249  		ID:     cluster.ID(clusterID),
   250  		Client: clients,
   251  		stop:   make(chan struct{}),
   252  		// for use inside the package, to close on cleanup
   253  		initialSync:        atomic.NewBool(false),
   254  		initialSyncTimeout: atomic.NewBool(false),
   255  		kubeConfigSha:      sha256.Sum256(kubeConfig),
   256  	}, nil
   257  }
   258  
   259  func (c *Controller) addSecret(name types.NamespacedName, s *corev1.Secret) error {
   260  	secretKey := name.String()
   261  	// First delete clusters
   262  	existingClusters := c.cs.GetExistingClustersFor(secretKey)
   263  	for _, existingCluster := range existingClusters {
   264  		if _, ok := s.Data[string(existingCluster.ID)]; !ok {
   265  			c.deleteCluster(secretKey, existingCluster)
   266  		}
   267  	}
   268  
   269  	var errs *multierror.Error
   270  	for clusterID, kubeConfig := range s.Data {
   271  		logger := log.WithLabels("cluster", clusterID, "secret", secretKey)
   272  		if cluster.ID(clusterID) == c.configClusterID {
   273  			logger.Infof("ignoring cluster as it would overwrite the config cluster")
   274  			continue
   275  		}
   276  
   277  		action := Add
   278  		if prev := c.cs.Get(secretKey, cluster.ID(clusterID)); prev != nil {
   279  			action = Update
   280  			// clusterID must be unique even across multiple secrets
   281  			kubeConfigSha := sha256.Sum256(kubeConfig)
   282  			if bytes.Equal(kubeConfigSha[:], prev.kubeConfigSha[:]) {
   283  				logger.Infof("skipping update (kubeconfig are identical)")
   284  				continue
   285  			}
   286  			// stop previous remote cluster
   287  			prev.Stop()
   288  		} else if c.cs.Contains(cluster.ID(clusterID)) {
   289  			// if the cluster has been registered before by another secret, ignore the new one.
   290  			logger.Warnf("cluster has already been registered")
   291  			continue
   292  		}
   293  		logger.Infof("%s cluster", action)
   294  
   295  		remoteCluster, err := c.createRemoteCluster(kubeConfig, clusterID)
   296  		if err != nil {
   297  			logger.Errorf("%s cluster: create remote cluster failed: %v", action, err)
   298  			errs = multierror.Append(errs, err)
   299  			continue
   300  		}
   301  		// We run cluster async so we do not block, as this requires actually connecting to the cluster and loading configuration.
   302  		c.cs.Store(secretKey, remoteCluster.ID, remoteCluster)
   303  		go func() {
   304  			remoteCluster.Run(c.meshWatcher, c.handlers, action)
   305  		}()
   306  	}
   307  
   308  	log.Infof("Number of remote clusters: %d", c.cs.Len())
   309  	return errs.ErrorOrNil()
   310  }
   311  
   312  func (c *Controller) deleteSecret(secretKey string) {
   313  	for _, cluster := range c.cs.GetExistingClustersFor(secretKey) {
   314  		if cluster.ID == c.configClusterID {
   315  			log.Infof("ignoring delete cluster %v from secret %v as it would overwrite the config cluster", c.configClusterID, secretKey)
   316  			continue
   317  		}
   318  
   319  		c.deleteCluster(secretKey, cluster)
   320  	}
   321  
   322  	log.Infof("Number of remote clusters: %d", c.cs.Len())
   323  }
   324  
   325  func (c *Controller) deleteCluster(secretKey string, cluster *Cluster) {
   326  	log.Infof("Deleting cluster_id=%v configured by secret=%v", cluster.ID, secretKey)
   327  	cluster.Stop()
   328  	c.handleDelete(cluster.ID)
   329  	c.cs.Delete(secretKey, cluster.ID)
   330  
   331  	log.Infof("Number of remote clusters: %d", c.cs.Len())
   332  }
   333  
   334  func (c *Controller) handleAdd(cluster *Cluster) []ComponentConstraint {
   335  	syncers := make([]ComponentConstraint, 0, len(c.handlers))
   336  	for _, handler := range c.handlers {
   337  		syncers = append(syncers, handler.clusterAdded(cluster))
   338  	}
   339  	return syncers
   340  }
   341  
   342  func (c *Controller) handleDelete(key cluster.ID) {
   343  	for _, handler := range c.handlers {
   344  		handler.clusterDeleted(key)
   345  	}
   346  }
   347  
   348  // ListRemoteClusters provides debug info about connected remote clusters.
   349  func (c *Controller) ListRemoteClusters() []cluster.DebugInfo {
   350  	// Start with just the config cluster
   351  	configCluster := "syncing"
   352  	if kube.AllSynced(c.configClusterSyncers) {
   353  		configCluster = "synced"
   354  	}
   355  	out := []cluster.DebugInfo{{
   356  		ID:         c.configClusterID,
   357  		SyncStatus: configCluster,
   358  	}}
   359  	// Append each cluster derived from secrets
   360  	for secretName, clusters := range c.cs.All() {
   361  		for clusterID, c := range clusters {
   362  			syncStatus := "syncing"
   363  			if c.Closed() {
   364  				syncStatus = "closed"
   365  			} else if c.SyncDidTimeout() {
   366  				syncStatus = "timeout"
   367  			} else if c.HasSynced() {
   368  				syncStatus = "synced"
   369  			}
   370  			out = append(out, cluster.DebugInfo{
   371  				ID:         clusterID,
   372  				SecretName: secretName,
   373  				SyncStatus: syncStatus,
   374  			})
   375  		}
   376  	}
   377  	return out
   378  }
   379  
   380  func (c *Controller) GetRemoteKubeClient(clusterID cluster.ID) kubernetes.Interface {
   381  	if remoteCluster := c.cs.GetByID(clusterID); remoteCluster != nil {
   382  		return remoteCluster.Client.Kube()
   383  	}
   384  	return nil
   385  }