istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/serviceregistry/aggregate/controller.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package aggregate
    16  
    17  import (
    18  	"sync"
    19  
    20  	"istio.io/istio/pilot/pkg/features"
    21  	"istio.io/istio/pilot/pkg/model"
    22  	"istio.io/istio/pilot/pkg/serviceregistry"
    23  	"istio.io/istio/pilot/pkg/serviceregistry/provider"
    24  	"istio.io/istio/pkg/cluster"
    25  	"istio.io/istio/pkg/config/host"
    26  	"istio.io/istio/pkg/config/labels"
    27  	"istio.io/istio/pkg/config/mesh"
    28  	"istio.io/istio/pkg/log"
    29  	"istio.io/istio/pkg/maps"
    30  	"istio.io/istio/pkg/slices"
    31  	"istio.io/istio/pkg/util/sets"
    32  )
    33  
    34  // The aggregate controller does not implement serviceregistry.Instance since it may be comprised of various
    35  // providers and clusters.
    36  var (
    37  	_ model.ServiceDiscovery    = &Controller{}
    38  	_ model.AggregateController = &Controller{}
    39  )
    40  
    41  // Controller aggregates data across different registries and monitors for changes
    42  type Controller struct {
    43  	meshHolder mesh.Holder
    44  
    45  	// The lock is used to protect the registries and controller's running status.
    46  	storeLock  sync.RWMutex
    47  	registries []*registryEntry
    48  	// indicates whether the controller has run.
    49  	// if true, all the registries added later should be run manually.
    50  	running bool
    51  
    52  	handlers          model.ControllerHandlers
    53  	handlersByCluster map[cluster.ID]*model.ControllerHandlers
    54  	model.NetworkGatewaysHandler
    55  }
    56  
    57  func (c *Controller) ServicesForWaypoint(key model.WaypointKey) []model.ServiceInfo {
    58  	if !features.EnableAmbient {
    59  		return nil
    60  	}
    61  	var res []model.ServiceInfo
    62  	for _, p := range c.GetRegistries() {
    63  		res = append(res, p.ServicesForWaypoint(key)...)
    64  	}
    65  	return res
    66  }
    67  
    68  func (c *Controller) WorkloadsForWaypoint(key model.WaypointKey) []model.WorkloadInfo {
    69  	if !features.EnableAmbientWaypoints {
    70  		return nil
    71  	}
    72  	var res []model.WorkloadInfo
    73  	for _, p := range c.GetRegistries() {
    74  		res = append(res, p.WorkloadsForWaypoint(key)...)
    75  	}
    76  	return res
    77  }
    78  
    79  func (c *Controller) AdditionalPodSubscriptions(proxy *model.Proxy, addr, cur sets.String) sets.String {
    80  	if !features.EnableAmbient {
    81  		return nil
    82  	}
    83  	res := sets.New[string]()
    84  	for _, p := range c.GetRegistries() {
    85  		res = res.Merge(p.AdditionalPodSubscriptions(proxy, addr, cur))
    86  	}
    87  	return res
    88  }
    89  
    90  func (c *Controller) Policies(requested sets.Set[model.ConfigKey]) []model.WorkloadAuthorization {
    91  	var res []model.WorkloadAuthorization
    92  	if !features.EnableAmbient {
    93  		return res
    94  	}
    95  	for _, p := range c.GetRegistries() {
    96  		res = append(res, p.Policies(requested)...)
    97  	}
    98  	return res
    99  }
   100  
   101  func (c *Controller) AddressInformation(addresses sets.String) ([]model.AddressInfo, sets.String) {
   102  	i := []model.AddressInfo{}
   103  	if !features.EnableAmbient {
   104  		return i, nil
   105  	}
   106  	removed := sets.String{}
   107  	for _, p := range c.GetRegistries() {
   108  		wis, r := p.AddressInformation(addresses)
   109  		i = append(i, wis...)
   110  		removed.Merge(r)
   111  	}
   112  	// We may have 'removed' it in one registry but found it in another
   113  	for _, wl := range i {
   114  		// TODO(@hzxuzhonghu) This is not right for workload, we may search workload by ip, but the resource name is uid.
   115  		if removed.Contains(wl.ResourceName()) {
   116  			removed.Delete(wl.ResourceName())
   117  		}
   118  	}
   119  	return i, removed
   120  }
   121  
   122  type registryEntry struct {
   123  	serviceregistry.Instance
   124  	// stop if not nil is the per-registry stop chan. If null, the server stop chan should be used to Run the registry.
   125  	stop <-chan struct{}
   126  }
   127  
   128  type Options struct {
   129  	MeshHolder mesh.Holder
   130  }
   131  
   132  // NewController creates a new Aggregate controller
   133  func NewController(opt Options) *Controller {
   134  	return &Controller{
   135  		registries:        make([]*registryEntry, 0),
   136  		meshHolder:        opt.MeshHolder,
   137  		running:           false,
   138  		handlersByCluster: map[cluster.ID]*model.ControllerHandlers{},
   139  	}
   140  }
   141  
   142  func (c *Controller) addRegistry(registry serviceregistry.Instance, stop <-chan struct{}) {
   143  	added := false
   144  	if registry.Provider() == provider.Kubernetes {
   145  		for i, r := range c.registries {
   146  			if r.Provider() != provider.Kubernetes {
   147  				// insert the registry in the position of the first non kubernetes registry
   148  				c.registries = slices.Insert(c.registries, i, &registryEntry{Instance: registry, stop: stop})
   149  				added = true
   150  				break
   151  			}
   152  		}
   153  	}
   154  	if !added {
   155  		c.registries = append(c.registries, &registryEntry{Instance: registry, stop: stop})
   156  	}
   157  
   158  	// Observe the registry for events.
   159  	registry.AppendNetworkGatewayHandler(c.NotifyGatewayHandlers)
   160  	registry.AppendServiceHandler(c.handlers.NotifyServiceHandlers)
   161  	registry.AppendServiceHandler(func(prev, curr *model.Service, event model.Event) {
   162  		for _, handlers := range c.getClusterHandlers() {
   163  			handlers.NotifyServiceHandlers(prev, curr, event)
   164  		}
   165  	})
   166  }
   167  
   168  func (c *Controller) getClusterHandlers() []*model.ControllerHandlers {
   169  	c.storeLock.Lock()
   170  	defer c.storeLock.Unlock()
   171  	return maps.Values(c.handlersByCluster)
   172  }
   173  
   174  // AddRegistry adds registries into the aggregated controller.
   175  // If the aggregated controller is already Running, the given registry will never be started.
   176  func (c *Controller) AddRegistry(registry serviceregistry.Instance) {
   177  	c.storeLock.Lock()
   178  	defer c.storeLock.Unlock()
   179  	c.addRegistry(registry, nil)
   180  }
   181  
   182  // AddRegistryAndRun adds registries into the aggregated controller and makes sure it is Run.
   183  // If the aggregated controller is running, the given registry is Run immediately.
   184  // Otherwise, the given registry is Run when the aggregate controller is Run, using the given stop.
   185  func (c *Controller) AddRegistryAndRun(registry serviceregistry.Instance, stop <-chan struct{}) {
   186  	if stop == nil {
   187  		log.Warnf("nil stop channel passed to AddRegistryAndRun for registry %s/%s", registry.Provider(), registry.Cluster())
   188  	}
   189  	c.storeLock.Lock()
   190  	defer c.storeLock.Unlock()
   191  	c.addRegistry(registry, stop)
   192  	if c.running {
   193  		go registry.Run(stop)
   194  	}
   195  }
   196  
   197  // DeleteRegistry deletes specified registry from the aggregated controller
   198  func (c *Controller) DeleteRegistry(clusterID cluster.ID, providerID provider.ID) {
   199  	c.storeLock.Lock()
   200  	defer c.storeLock.Unlock()
   201  
   202  	if len(c.registries) == 0 {
   203  		log.Warnf("Registry list is empty, nothing to delete")
   204  		return
   205  	}
   206  	index, ok := c.getRegistryIndex(clusterID, providerID)
   207  	if !ok {
   208  		log.Warnf("Registry %s/%s is not found in the registries list, nothing to delete", providerID, clusterID)
   209  		return
   210  	}
   211  	c.registries[index] = nil
   212  	c.registries = append(c.registries[:index], c.registries[index+1:]...)
   213  	log.Infof("%s registry for the cluster %s has been deleted.", providerID, clusterID)
   214  }
   215  
   216  // GetRegistries returns a copy of all registries
   217  func (c *Controller) GetRegistries() []serviceregistry.Instance {
   218  	c.storeLock.RLock()
   219  	defer c.storeLock.RUnlock()
   220  
   221  	// copy registries to prevent race, no need to deep copy here.
   222  	out := make([]serviceregistry.Instance, len(c.registries))
   223  	for i := range c.registries {
   224  		out[i] = c.registries[i]
   225  	}
   226  	return out
   227  }
   228  
   229  func (c *Controller) getRegistryIndex(clusterID cluster.ID, provider provider.ID) (int, bool) {
   230  	for i, r := range c.registries {
   231  		if r.Cluster().Equals(clusterID) && r.Provider() == provider {
   232  			return i, true
   233  		}
   234  	}
   235  	return 0, false
   236  }
   237  
   238  // Services lists services from all platforms
   239  func (c *Controller) Services() []*model.Service {
   240  	// smap is a map of hostname (string) to service index, used to identify services that
   241  	// are installed in multiple clusters.
   242  	smap := make(map[host.Name]int)
   243  	index := 0
   244  	services := make([]*model.Service, 0)
   245  	// Locking Registries list while walking it to prevent inconsistent results
   246  	for _, r := range c.GetRegistries() {
   247  		svcs := r.Services()
   248  		if r.Provider() != provider.Kubernetes {
   249  			index += len(svcs)
   250  			services = append(services, svcs...)
   251  		} else {
   252  			for _, s := range svcs {
   253  				previous, ok := smap[s.Hostname]
   254  				if !ok {
   255  					// First time we see a service. The result will have a single service per hostname
   256  					// The first cluster will be listed first, so the services in the primary cluster
   257  					// will be used for default settings. If a service appears in multiple clusters,
   258  					// the order is less clear.
   259  					smap[s.Hostname] = index
   260  					index++
   261  					services = append(services, s)
   262  				} else {
   263  					// We must deepcopy before merge, and after merging, the ClusterVips length will be >= 2.
   264  					// This is an optimization to prevent deepcopy multi-times
   265  					if services[previous].ClusterVIPs.Len() < 2 {
   266  						// Deep copy before merging, otherwise there is a case
   267  						// a service in remote cluster can be deleted, but the ClusterIP left.
   268  						services[previous] = services[previous].DeepCopy()
   269  					}
   270  					// If it is seen second time, that means it is from a different cluster, update cluster VIPs.
   271  					mergeService(services[previous], s, r)
   272  				}
   273  			}
   274  		}
   275  	}
   276  	return services
   277  }
   278  
   279  // GetService retrieves a service by hostname if exists
   280  func (c *Controller) GetService(hostname host.Name) *model.Service {
   281  	var out *model.Service
   282  	for _, r := range c.GetRegistries() {
   283  		service := r.GetService(hostname)
   284  		if service == nil {
   285  			continue
   286  		}
   287  		if r.Provider() != provider.Kubernetes {
   288  			return service
   289  		}
   290  		if out == nil {
   291  			out = service.DeepCopy()
   292  		} else {
   293  			// If we are seeing the service for the second time, it means it is available in multiple clusters.
   294  			mergeService(out, service, r)
   295  		}
   296  	}
   297  	return out
   298  }
   299  
   300  // mergeService only merges two clusters' k8s services
   301  func mergeService(dst, src *model.Service, srcRegistry serviceregistry.Instance) {
   302  	if !src.Ports.Equals(dst.Ports) {
   303  		log.Debugf("service %s defined from cluster %s is different from others", src.Hostname, srcRegistry.Cluster())
   304  	}
   305  	// Prefer the k8s HostVIPs where possible
   306  	clusterID := srcRegistry.Cluster()
   307  	if len(dst.ClusterVIPs.GetAddressesFor(clusterID)) == 0 {
   308  		newAddresses := src.ClusterVIPs.GetAddressesFor(clusterID)
   309  		dst.ClusterVIPs.SetAddressesFor(clusterID, newAddresses)
   310  	}
   311  }
   312  
   313  // NetworkGateways merges the service-based cross-network gateways from each registry.
   314  func (c *Controller) NetworkGateways() []model.NetworkGateway {
   315  	var gws []model.NetworkGateway
   316  	for _, r := range c.GetRegistries() {
   317  		gws = append(gws, r.NetworkGateways()...)
   318  	}
   319  	return gws
   320  }
   321  
   322  func (c *Controller) MCSServices() []model.MCSServiceInfo {
   323  	var out []model.MCSServiceInfo
   324  	for _, r := range c.GetRegistries() {
   325  		out = append(out, r.MCSServices()...)
   326  	}
   327  	return out
   328  }
   329  
   330  func nodeClusterID(node *model.Proxy) cluster.ID {
   331  	if node.Metadata == nil || node.Metadata.ClusterID == "" {
   332  		return ""
   333  	}
   334  	return node.Metadata.ClusterID
   335  }
   336  
   337  // Skip the service registry when there won't be a match
   338  // because the proxy is in a different cluster.
   339  func skipSearchingRegistryForProxy(nodeClusterID cluster.ID, r serviceregistry.Instance) bool {
   340  	// Always search non-kube (usually serviceentry) registry.
   341  	// Check every registry if cluster ID isn't specified.
   342  	if r.Provider() != provider.Kubernetes || nodeClusterID == "" {
   343  		return false
   344  	}
   345  
   346  	return !r.Cluster().Equals(nodeClusterID)
   347  }
   348  
   349  // GetProxyServiceTargets lists service instances co-located with a given proxy
   350  func (c *Controller) GetProxyServiceTargets(node *model.Proxy) []model.ServiceTarget {
   351  	out := make([]model.ServiceTarget, 0)
   352  	nodeClusterID := nodeClusterID(node)
   353  	for _, r := range c.GetRegistries() {
   354  		if skipSearchingRegistryForProxy(nodeClusterID, r) {
   355  			log.Debugf("GetProxyServiceTargets(): not searching registry %v: proxy %v CLUSTER_ID is %v",
   356  				r.Cluster(), node.ID, nodeClusterID)
   357  			continue
   358  		}
   359  
   360  		instances := r.GetProxyServiceTargets(node)
   361  		if len(instances) > 0 {
   362  			out = append(out, instances...)
   363  		}
   364  	}
   365  
   366  	return out
   367  }
   368  
   369  func (c *Controller) GetProxyWorkloadLabels(proxy *model.Proxy) labels.Instance {
   370  	clusterID := nodeClusterID(proxy)
   371  	for _, r := range c.GetRegistries() {
   372  		// If proxy clusterID unset, we may find incorrect workload label.
   373  		// This can not happen in k8s env.
   374  		if clusterID == "" || clusterID == r.Cluster() {
   375  			lbls := r.GetProxyWorkloadLabels(proxy)
   376  			if lbls != nil {
   377  				return lbls
   378  			}
   379  		}
   380  	}
   381  
   382  	return nil
   383  }
   384  
   385  // Run starts all the controllers
   386  func (c *Controller) Run(stop <-chan struct{}) {
   387  	c.storeLock.Lock()
   388  	for _, r := range c.registries {
   389  		// prefer the per-registry stop channel
   390  		registryStop := stop
   391  		if s := r.stop; s != nil {
   392  			registryStop = s
   393  		}
   394  		go r.Run(registryStop)
   395  	}
   396  	c.running = true
   397  	c.storeLock.Unlock()
   398  
   399  	<-stop
   400  	log.Info("Registry Aggregator terminated")
   401  }
   402  
   403  // HasSynced returns true when all registries have synced
   404  func (c *Controller) HasSynced() bool {
   405  	for _, r := range c.GetRegistries() {
   406  		if !r.HasSynced() {
   407  			log.Debugf("registry %s is syncing", r.Cluster())
   408  			return false
   409  		}
   410  	}
   411  	return true
   412  }
   413  
   414  func (c *Controller) AppendServiceHandler(f model.ServiceHandler) {
   415  	c.handlers.AppendServiceHandler(f)
   416  }
   417  
   418  func (c *Controller) AppendWorkloadHandler(f func(*model.WorkloadInstance, model.Event)) {
   419  	// Currently, it is not used.
   420  	// Note: take care when you want to enable it, it will register the handlers to all registries
   421  	// c.handlers.AppendWorkloadHandler(f)
   422  }
   423  
   424  func (c *Controller) AppendServiceHandlerForCluster(id cluster.ID, f model.ServiceHandler) {
   425  	c.storeLock.Lock()
   426  	defer c.storeLock.Unlock()
   427  	handler, ok := c.handlersByCluster[id]
   428  	if !ok {
   429  		c.handlersByCluster[id] = &model.ControllerHandlers{}
   430  		handler = c.handlersByCluster[id]
   431  	}
   432  	handler.AppendServiceHandler(f)
   433  }
   434  
   435  func (c *Controller) UnRegisterHandlersForCluster(id cluster.ID) {
   436  	c.storeLock.Lock()
   437  	defer c.storeLock.Unlock()
   438  	delete(c.handlersByCluster, id)
   439  }