istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/serviceregistry/aggregate/controller.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package aggregate 16 17 import ( 18 "sync" 19 20 "istio.io/istio/pilot/pkg/features" 21 "istio.io/istio/pilot/pkg/model" 22 "istio.io/istio/pilot/pkg/serviceregistry" 23 "istio.io/istio/pilot/pkg/serviceregistry/provider" 24 "istio.io/istio/pkg/cluster" 25 "istio.io/istio/pkg/config/host" 26 "istio.io/istio/pkg/config/labels" 27 "istio.io/istio/pkg/config/mesh" 28 "istio.io/istio/pkg/log" 29 "istio.io/istio/pkg/maps" 30 "istio.io/istio/pkg/slices" 31 "istio.io/istio/pkg/util/sets" 32 ) 33 34 // The aggregate controller does not implement serviceregistry.Instance since it may be comprised of various 35 // providers and clusters. 36 var ( 37 _ model.ServiceDiscovery = &Controller{} 38 _ model.AggregateController = &Controller{} 39 ) 40 41 // Controller aggregates data across different registries and monitors for changes 42 type Controller struct { 43 meshHolder mesh.Holder 44 45 // The lock is used to protect the registries and controller's running status. 46 storeLock sync.RWMutex 47 registries []*registryEntry 48 // indicates whether the controller has run. 49 // if true, all the registries added later should be run manually. 50 running bool 51 52 handlers model.ControllerHandlers 53 handlersByCluster map[cluster.ID]*model.ControllerHandlers 54 model.NetworkGatewaysHandler 55 } 56 57 func (c *Controller) ServicesForWaypoint(key model.WaypointKey) []model.ServiceInfo { 58 if !features.EnableAmbient { 59 return nil 60 } 61 var res []model.ServiceInfo 62 for _, p := range c.GetRegistries() { 63 res = append(res, p.ServicesForWaypoint(key)...) 64 } 65 return res 66 } 67 68 func (c *Controller) WorkloadsForWaypoint(key model.WaypointKey) []model.WorkloadInfo { 69 if !features.EnableAmbientWaypoints { 70 return nil 71 } 72 var res []model.WorkloadInfo 73 for _, p := range c.GetRegistries() { 74 res = append(res, p.WorkloadsForWaypoint(key)...) 75 } 76 return res 77 } 78 79 func (c *Controller) AdditionalPodSubscriptions(proxy *model.Proxy, addr, cur sets.String) sets.String { 80 if !features.EnableAmbient { 81 return nil 82 } 83 res := sets.New[string]() 84 for _, p := range c.GetRegistries() { 85 res = res.Merge(p.AdditionalPodSubscriptions(proxy, addr, cur)) 86 } 87 return res 88 } 89 90 func (c *Controller) Policies(requested sets.Set[model.ConfigKey]) []model.WorkloadAuthorization { 91 var res []model.WorkloadAuthorization 92 if !features.EnableAmbient { 93 return res 94 } 95 for _, p := range c.GetRegistries() { 96 res = append(res, p.Policies(requested)...) 97 } 98 return res 99 } 100 101 func (c *Controller) AddressInformation(addresses sets.String) ([]model.AddressInfo, sets.String) { 102 i := []model.AddressInfo{} 103 if !features.EnableAmbient { 104 return i, nil 105 } 106 removed := sets.String{} 107 for _, p := range c.GetRegistries() { 108 wis, r := p.AddressInformation(addresses) 109 i = append(i, wis...) 110 removed.Merge(r) 111 } 112 // We may have 'removed' it in one registry but found it in another 113 for _, wl := range i { 114 // TODO(@hzxuzhonghu) This is not right for workload, we may search workload by ip, but the resource name is uid. 115 if removed.Contains(wl.ResourceName()) { 116 removed.Delete(wl.ResourceName()) 117 } 118 } 119 return i, removed 120 } 121 122 type registryEntry struct { 123 serviceregistry.Instance 124 // stop if not nil is the per-registry stop chan. If null, the server stop chan should be used to Run the registry. 125 stop <-chan struct{} 126 } 127 128 type Options struct { 129 MeshHolder mesh.Holder 130 } 131 132 // NewController creates a new Aggregate controller 133 func NewController(opt Options) *Controller { 134 return &Controller{ 135 registries: make([]*registryEntry, 0), 136 meshHolder: opt.MeshHolder, 137 running: false, 138 handlersByCluster: map[cluster.ID]*model.ControllerHandlers{}, 139 } 140 } 141 142 func (c *Controller) addRegistry(registry serviceregistry.Instance, stop <-chan struct{}) { 143 added := false 144 if registry.Provider() == provider.Kubernetes { 145 for i, r := range c.registries { 146 if r.Provider() != provider.Kubernetes { 147 // insert the registry in the position of the first non kubernetes registry 148 c.registries = slices.Insert(c.registries, i, ®istryEntry{Instance: registry, stop: stop}) 149 added = true 150 break 151 } 152 } 153 } 154 if !added { 155 c.registries = append(c.registries, ®istryEntry{Instance: registry, stop: stop}) 156 } 157 158 // Observe the registry for events. 159 registry.AppendNetworkGatewayHandler(c.NotifyGatewayHandlers) 160 registry.AppendServiceHandler(c.handlers.NotifyServiceHandlers) 161 registry.AppendServiceHandler(func(prev, curr *model.Service, event model.Event) { 162 for _, handlers := range c.getClusterHandlers() { 163 handlers.NotifyServiceHandlers(prev, curr, event) 164 } 165 }) 166 } 167 168 func (c *Controller) getClusterHandlers() []*model.ControllerHandlers { 169 c.storeLock.Lock() 170 defer c.storeLock.Unlock() 171 return maps.Values(c.handlersByCluster) 172 } 173 174 // AddRegistry adds registries into the aggregated controller. 175 // If the aggregated controller is already Running, the given registry will never be started. 176 func (c *Controller) AddRegistry(registry serviceregistry.Instance) { 177 c.storeLock.Lock() 178 defer c.storeLock.Unlock() 179 c.addRegistry(registry, nil) 180 } 181 182 // AddRegistryAndRun adds registries into the aggregated controller and makes sure it is Run. 183 // If the aggregated controller is running, the given registry is Run immediately. 184 // Otherwise, the given registry is Run when the aggregate controller is Run, using the given stop. 185 func (c *Controller) AddRegistryAndRun(registry serviceregistry.Instance, stop <-chan struct{}) { 186 if stop == nil { 187 log.Warnf("nil stop channel passed to AddRegistryAndRun for registry %s/%s", registry.Provider(), registry.Cluster()) 188 } 189 c.storeLock.Lock() 190 defer c.storeLock.Unlock() 191 c.addRegistry(registry, stop) 192 if c.running { 193 go registry.Run(stop) 194 } 195 } 196 197 // DeleteRegistry deletes specified registry from the aggregated controller 198 func (c *Controller) DeleteRegistry(clusterID cluster.ID, providerID provider.ID) { 199 c.storeLock.Lock() 200 defer c.storeLock.Unlock() 201 202 if len(c.registries) == 0 { 203 log.Warnf("Registry list is empty, nothing to delete") 204 return 205 } 206 index, ok := c.getRegistryIndex(clusterID, providerID) 207 if !ok { 208 log.Warnf("Registry %s/%s is not found in the registries list, nothing to delete", providerID, clusterID) 209 return 210 } 211 c.registries[index] = nil 212 c.registries = append(c.registries[:index], c.registries[index+1:]...) 213 log.Infof("%s registry for the cluster %s has been deleted.", providerID, clusterID) 214 } 215 216 // GetRegistries returns a copy of all registries 217 func (c *Controller) GetRegistries() []serviceregistry.Instance { 218 c.storeLock.RLock() 219 defer c.storeLock.RUnlock() 220 221 // copy registries to prevent race, no need to deep copy here. 222 out := make([]serviceregistry.Instance, len(c.registries)) 223 for i := range c.registries { 224 out[i] = c.registries[i] 225 } 226 return out 227 } 228 229 func (c *Controller) getRegistryIndex(clusterID cluster.ID, provider provider.ID) (int, bool) { 230 for i, r := range c.registries { 231 if r.Cluster().Equals(clusterID) && r.Provider() == provider { 232 return i, true 233 } 234 } 235 return 0, false 236 } 237 238 // Services lists services from all platforms 239 func (c *Controller) Services() []*model.Service { 240 // smap is a map of hostname (string) to service index, used to identify services that 241 // are installed in multiple clusters. 242 smap := make(map[host.Name]int) 243 index := 0 244 services := make([]*model.Service, 0) 245 // Locking Registries list while walking it to prevent inconsistent results 246 for _, r := range c.GetRegistries() { 247 svcs := r.Services() 248 if r.Provider() != provider.Kubernetes { 249 index += len(svcs) 250 services = append(services, svcs...) 251 } else { 252 for _, s := range svcs { 253 previous, ok := smap[s.Hostname] 254 if !ok { 255 // First time we see a service. The result will have a single service per hostname 256 // The first cluster will be listed first, so the services in the primary cluster 257 // will be used for default settings. If a service appears in multiple clusters, 258 // the order is less clear. 259 smap[s.Hostname] = index 260 index++ 261 services = append(services, s) 262 } else { 263 // We must deepcopy before merge, and after merging, the ClusterVips length will be >= 2. 264 // This is an optimization to prevent deepcopy multi-times 265 if services[previous].ClusterVIPs.Len() < 2 { 266 // Deep copy before merging, otherwise there is a case 267 // a service in remote cluster can be deleted, but the ClusterIP left. 268 services[previous] = services[previous].DeepCopy() 269 } 270 // If it is seen second time, that means it is from a different cluster, update cluster VIPs. 271 mergeService(services[previous], s, r) 272 } 273 } 274 } 275 } 276 return services 277 } 278 279 // GetService retrieves a service by hostname if exists 280 func (c *Controller) GetService(hostname host.Name) *model.Service { 281 var out *model.Service 282 for _, r := range c.GetRegistries() { 283 service := r.GetService(hostname) 284 if service == nil { 285 continue 286 } 287 if r.Provider() != provider.Kubernetes { 288 return service 289 } 290 if out == nil { 291 out = service.DeepCopy() 292 } else { 293 // If we are seeing the service for the second time, it means it is available in multiple clusters. 294 mergeService(out, service, r) 295 } 296 } 297 return out 298 } 299 300 // mergeService only merges two clusters' k8s services 301 func mergeService(dst, src *model.Service, srcRegistry serviceregistry.Instance) { 302 if !src.Ports.Equals(dst.Ports) { 303 log.Debugf("service %s defined from cluster %s is different from others", src.Hostname, srcRegistry.Cluster()) 304 } 305 // Prefer the k8s HostVIPs where possible 306 clusterID := srcRegistry.Cluster() 307 if len(dst.ClusterVIPs.GetAddressesFor(clusterID)) == 0 { 308 newAddresses := src.ClusterVIPs.GetAddressesFor(clusterID) 309 dst.ClusterVIPs.SetAddressesFor(clusterID, newAddresses) 310 } 311 } 312 313 // NetworkGateways merges the service-based cross-network gateways from each registry. 314 func (c *Controller) NetworkGateways() []model.NetworkGateway { 315 var gws []model.NetworkGateway 316 for _, r := range c.GetRegistries() { 317 gws = append(gws, r.NetworkGateways()...) 318 } 319 return gws 320 } 321 322 func (c *Controller) MCSServices() []model.MCSServiceInfo { 323 var out []model.MCSServiceInfo 324 for _, r := range c.GetRegistries() { 325 out = append(out, r.MCSServices()...) 326 } 327 return out 328 } 329 330 func nodeClusterID(node *model.Proxy) cluster.ID { 331 if node.Metadata == nil || node.Metadata.ClusterID == "" { 332 return "" 333 } 334 return node.Metadata.ClusterID 335 } 336 337 // Skip the service registry when there won't be a match 338 // because the proxy is in a different cluster. 339 func skipSearchingRegistryForProxy(nodeClusterID cluster.ID, r serviceregistry.Instance) bool { 340 // Always search non-kube (usually serviceentry) registry. 341 // Check every registry if cluster ID isn't specified. 342 if r.Provider() != provider.Kubernetes || nodeClusterID == "" { 343 return false 344 } 345 346 return !r.Cluster().Equals(nodeClusterID) 347 } 348 349 // GetProxyServiceTargets lists service instances co-located with a given proxy 350 func (c *Controller) GetProxyServiceTargets(node *model.Proxy) []model.ServiceTarget { 351 out := make([]model.ServiceTarget, 0) 352 nodeClusterID := nodeClusterID(node) 353 for _, r := range c.GetRegistries() { 354 if skipSearchingRegistryForProxy(nodeClusterID, r) { 355 log.Debugf("GetProxyServiceTargets(): not searching registry %v: proxy %v CLUSTER_ID is %v", 356 r.Cluster(), node.ID, nodeClusterID) 357 continue 358 } 359 360 instances := r.GetProxyServiceTargets(node) 361 if len(instances) > 0 { 362 out = append(out, instances...) 363 } 364 } 365 366 return out 367 } 368 369 func (c *Controller) GetProxyWorkloadLabels(proxy *model.Proxy) labels.Instance { 370 clusterID := nodeClusterID(proxy) 371 for _, r := range c.GetRegistries() { 372 // If proxy clusterID unset, we may find incorrect workload label. 373 // This can not happen in k8s env. 374 if clusterID == "" || clusterID == r.Cluster() { 375 lbls := r.GetProxyWorkloadLabels(proxy) 376 if lbls != nil { 377 return lbls 378 } 379 } 380 } 381 382 return nil 383 } 384 385 // Run starts all the controllers 386 func (c *Controller) Run(stop <-chan struct{}) { 387 c.storeLock.Lock() 388 for _, r := range c.registries { 389 // prefer the per-registry stop channel 390 registryStop := stop 391 if s := r.stop; s != nil { 392 registryStop = s 393 } 394 go r.Run(registryStop) 395 } 396 c.running = true 397 c.storeLock.Unlock() 398 399 <-stop 400 log.Info("Registry Aggregator terminated") 401 } 402 403 // HasSynced returns true when all registries have synced 404 func (c *Controller) HasSynced() bool { 405 for _, r := range c.GetRegistries() { 406 if !r.HasSynced() { 407 log.Debugf("registry %s is syncing", r.Cluster()) 408 return false 409 } 410 } 411 return true 412 } 413 414 func (c *Controller) AppendServiceHandler(f model.ServiceHandler) { 415 c.handlers.AppendServiceHandler(f) 416 } 417 418 func (c *Controller) AppendWorkloadHandler(f func(*model.WorkloadInstance, model.Event)) { 419 // Currently, it is not used. 420 // Note: take care when you want to enable it, it will register the handlers to all registries 421 // c.handlers.AppendWorkloadHandler(f) 422 } 423 424 func (c *Controller) AppendServiceHandlerForCluster(id cluster.ID, f model.ServiceHandler) { 425 c.storeLock.Lock() 426 defer c.storeLock.Unlock() 427 handler, ok := c.handlersByCluster[id] 428 if !ok { 429 c.handlersByCluster[id] = &model.ControllerHandlers{} 430 handler = c.handlersByCluster[id] 431 } 432 handler.AppendServiceHandler(f) 433 } 434 435 func (c *Controller) UnRegisterHandlersForCluster(id cluster.ID) { 436 c.storeLock.Lock() 437 defer c.storeLock.Unlock() 438 delete(c.handlersByCluster, id) 439 }