github.com/cilium/cilium@v1.16.2/pkg/bgpv1/manager/reconciler/service.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package reconciler 5 6 import ( 7 "context" 8 "fmt" 9 "net/netip" 10 "slices" 11 12 "github.com/cilium/hive/cell" 13 "golang.org/x/exp/maps" 14 corev1 "k8s.io/api/core/v1" 15 "k8s.io/apimachinery/pkg/util/sets" 16 17 "github.com/cilium/cilium/pkg/bgpv1/manager/instance" 18 "github.com/cilium/cilium/pkg/bgpv1/manager/store" 19 "github.com/cilium/cilium/pkg/bgpv1/types" 20 "github.com/cilium/cilium/pkg/k8s" 21 v2alpha1api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1" 22 "github.com/cilium/cilium/pkg/k8s/resource" 23 slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1" 24 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/labels" 25 slim_metav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1" 26 ciliumslices "github.com/cilium/cilium/pkg/slices" 27 ) 28 29 type LBServiceReconcilerOut struct { 30 cell.Out 31 32 Reconciler ConfigReconciler `group:"bgp-config-reconciler"` 33 } 34 35 type ServiceReconciler struct { 36 diffStore store.DiffStore[*slim_corev1.Service] 37 epDiffStore store.DiffStore[*k8s.Endpoints] 38 } 39 40 // LBServiceReconcilerMetadata keeps a map of services to the respective advertised Paths 41 type LBServiceReconcilerMetadata map[resource.Key][]*types.Path 42 43 type localServices map[k8s.ServiceID]struct{} 44 45 func NewServiceReconciler(diffStore store.DiffStore[*slim_corev1.Service], epDiffStore store.DiffStore[*k8s.Endpoints]) LBServiceReconcilerOut { 46 if diffStore == nil { 47 return LBServiceReconcilerOut{} 48 } 49 50 return LBServiceReconcilerOut{ 51 Reconciler: &ServiceReconciler{ 52 diffStore: diffStore, 53 epDiffStore: epDiffStore, 54 }, 55 } 56 } 57 58 func (r *ServiceReconciler) Name() string { 59 return "Service" 60 } 61 62 func (r *ServiceReconciler) Priority() int { 63 return 40 64 } 65 66 func (r *ServiceReconciler) Init(sc *instance.ServerWithConfig) error { 67 if sc == nil { 68 return fmt.Errorf("BUG: service reconciler initialization with nil ServerWithConfig") 69 } 70 r.diffStore.InitDiff(r.diffID(sc.ASN)) 71 r.epDiffStore.InitDiff(r.diffID(sc.ASN)) 72 return nil 73 } 74 75 func (r *ServiceReconciler) Cleanup(sc *instance.ServerWithConfig) { 76 if sc != nil { 77 r.diffStore.CleanupDiff(r.diffID(sc.ASN)) 78 r.epDiffStore.CleanupDiff(r.diffID(sc.ASN)) 79 } 80 } 81 82 func (r *ServiceReconciler) Reconcile(ctx context.Context, p ReconcileParams) error { 83 if p.CiliumNode == nil { 84 return fmt.Errorf("attempted service reconciliation with nil local CiliumNode") 85 } 86 87 ls, err := r.populateLocalServices(p.CiliumNode.Name) 88 if err != nil { 89 return err 90 } 91 92 if r.requiresFullReconciliation(p) { 93 return r.fullReconciliation(ctx, p.CurrentServer, p.DesiredConfig, ls) 94 } 95 return r.svcDiffReconciliation(ctx, p.CurrentServer, p.DesiredConfig, ls) 96 } 97 98 func (r *ServiceReconciler) getMetadata(sc *instance.ServerWithConfig) LBServiceReconcilerMetadata { 99 if _, found := sc.ReconcilerMetadata[r.Name()]; !found { 100 sc.ReconcilerMetadata[r.Name()] = make(LBServiceReconcilerMetadata) 101 } 102 return sc.ReconcilerMetadata[r.Name()].(LBServiceReconcilerMetadata) 103 } 104 105 func (r *ServiceReconciler) resolveSvcFromEndpoints(eps *k8s.Endpoints) (*slim_corev1.Service, bool, error) { 106 k := resource.Key{ 107 Name: eps.ServiceID.Name, 108 Namespace: eps.ServiceID.Namespace, 109 } 110 return r.diffStore.GetByKey(k) 111 } 112 113 // requiresFullReconciliation returns true if the desired config requires full reconciliation 114 // (reconciliation of all services), false if partial (diff) reconciliation is sufficient. 115 func (r *ServiceReconciler) requiresFullReconciliation(p ReconcileParams) bool { 116 var existingSelector *slim_metav1.LabelSelector 117 if p.CurrentServer != nil && p.CurrentServer.Config != nil { 118 existingSelector = p.CurrentServer.Config.ServiceSelector 119 } else { 120 return true // the first reconciliation should be always full 121 } 122 // If the existing selector was updated, went from nil to something or something to nil, we need to perform full 123 // reconciliation and check if every existing announcement's service still matches the selector. 124 return (existingSelector != nil && p.DesiredConfig.ServiceSelector != nil && !p.DesiredConfig.ServiceSelector.DeepEqual(existingSelector)) || 125 ((existingSelector == nil) != (p.DesiredConfig.ServiceSelector == nil)) 126 } 127 128 // Populate locally available services used for externalTrafficPolicy=local handling 129 func (r *ServiceReconciler) populateLocalServices(localNodeName string) (localServices, error) { 130 ls := make(localServices) 131 132 epList, err := r.epDiffStore.List() 133 if err != nil { 134 return nil, fmt.Errorf("failed to list endpoints from diffstore: %w", err) 135 } 136 137 endpointsLoop: 138 for _, eps := range epList { 139 _, exists, err := r.resolveSvcFromEndpoints(eps) 140 if err != nil { 141 // Cannot resolve service from endpoints. We have nothing to do here. 142 continue 143 } 144 145 if !exists { 146 // No service associated with this endpoint. We're not interested in this. 147 continue 148 } 149 150 svcID := eps.ServiceID 151 152 for _, be := range eps.Backends { 153 if !be.Terminating && be.NodeName == localNodeName { 154 // At least one endpoint is available on this node. We 155 // can make unavailable to available. 156 if _, found := ls[svcID]; !found { 157 ls[svcID] = struct{}{} 158 } 159 continue endpointsLoop 160 } 161 } 162 } 163 164 return ls, nil 165 } 166 167 func hasLocalEndpoints(svc *slim_corev1.Service, ls localServices) bool { 168 _, found := ls[k8s.ServiceID{Name: svc.GetName(), Namespace: svc.GetNamespace()}] 169 return found 170 } 171 172 // fullReconciliation reconciles all services, this is a heavy operation due to the potential amount of services and 173 // thus should be avoided if partial reconciliation is an option. 174 func (r *ServiceReconciler) fullReconciliation(ctx context.Context, sc *instance.ServerWithConfig, newc *v2alpha1api.CiliumBGPVirtualRouter, ls localServices) error { 175 toReconcile, toWithdraw, err := r.fullReconciliationServiceList(sc) 176 if err != nil { 177 return err 178 } 179 for _, svc := range toReconcile { 180 if err := r.reconcileService(ctx, sc, newc, svc, ls); err != nil { 181 return fmt.Errorf("failed to reconcile service %s/%s: %w", svc.Namespace, svc.Name, err) 182 } 183 } 184 for _, svc := range toWithdraw { 185 if err := r.withdrawService(ctx, sc, svc); err != nil { 186 return fmt.Errorf("failed to withdraw service %s/%s: %w", svc.Namespace, svc.Name, err) 187 } 188 } 189 return nil 190 } 191 192 // svcDiffReconciliation performs reconciliation, only on services which have been created, updated or deleted since 193 // the last diff reconciliation. 194 func (r *ServiceReconciler) svcDiffReconciliation(ctx context.Context, sc *instance.ServerWithConfig, newc *v2alpha1api.CiliumBGPVirtualRouter, ls localServices) error { 195 toReconcile, toWithdraw, err := r.diffReconciliationServiceList(sc) 196 if err != nil { 197 return err 198 } 199 for _, svc := range toReconcile { 200 if err := r.reconcileService(ctx, sc, newc, svc, ls); err != nil { 201 return fmt.Errorf("failed to reconcile service %s/%s: %w", svc.Namespace, svc.Name, err) 202 } 203 } 204 // Loop over the deleted services 205 for _, svcKey := range toWithdraw { 206 if err := r.withdrawService(ctx, sc, svcKey); err != nil { 207 return fmt.Errorf("failed to withdraw service %s: %w", svcKey, err) 208 } 209 } 210 return nil 211 } 212 213 // fullReconciliationServiceList return a list of services to reconcile and to withdraw when performing 214 // full service reconciliation. 215 func (r *ServiceReconciler) fullReconciliationServiceList(sc *instance.ServerWithConfig) (toReconcile []*slim_corev1.Service, toWithdraw []resource.Key, err error) { 216 // Init diff in diffstores, so that it contains only changes since the last full reconciliation. 217 // Despite doing it in Init(), we still need this InitDiff to clean up the old diff when the instance is re-created 218 // by the preflight reconciler. Once Init() is called upon re-create by preflight, we can remove this. 219 r.diffStore.InitDiff(r.diffID(sc.ASN)) 220 r.epDiffStore.InitDiff(r.diffID(sc.ASN)) 221 222 // Loop over all existing announcements, find announcements for services which no longer exist 223 serviceAnnouncements := r.getMetadata(sc) 224 for svcKey := range serviceAnnouncements { 225 _, found, err := r.diffStore.GetByKey(svcKey) 226 if err != nil { 227 return nil, nil, fmt.Errorf("diffStore.GetByKey(): %w", err) 228 } 229 // if the service no longer exists, withdraw it 230 if !found { 231 toWithdraw = append(toWithdraw, svcKey) 232 } 233 } 234 235 // Reconcile all existing services 236 svcList, err := r.diffStore.List() 237 if err != nil { 238 return nil, nil, fmt.Errorf("failed to list services from diffstore: %w", err) 239 } 240 toReconcile = append(toReconcile, svcList...) 241 242 return toReconcile, toWithdraw, nil 243 } 244 245 // diffReconciliationServiceList returns a list of services to reconcile and to withdraw when 246 // performing partial (diff) service reconciliation. 247 func (r *ServiceReconciler) diffReconciliationServiceList(sc *instance.ServerWithConfig) (toReconcile []*slim_corev1.Service, toWithdraw []resource.Key, err error) { 248 upserted, deleted, err := r.diffStore.Diff(r.diffID(sc.ASN)) 249 if err != nil { 250 return nil, nil, fmt.Errorf("svc store diff: %w", err) 251 } 252 253 // For externalTrafficPolicy=local, we need to take care of 254 // the endpoint changes in addition to the service changes. 255 // Take a diff of the endpoints and get affected services. 256 // We don't handle service deletion here since we only see 257 // the key, we cannot resolve associated service, so we have 258 // nothing to do. 259 epsUpserted, _, err := r.epDiffStore.Diff(r.diffID(sc.ASN)) 260 if err != nil { 261 return nil, nil, fmt.Errorf("endpoints store diff: %w", err) 262 } 263 264 for _, eps := range epsUpserted { 265 svc, exists, err := r.resolveSvcFromEndpoints(eps) 266 if err != nil { 267 // Cannot resolve service from endpoints. We have nothing to do here. 268 continue 269 } 270 271 if !exists { 272 // No service associated with this endpoint. We're not interested in this. 273 continue 274 } 275 276 // We only need Endpoints tracking for externalTrafficPolicy=Local or internalTrafficPolicy=Local. 277 if svc.Spec.ExternalTrafficPolicy == slim_corev1.ServiceExternalTrafficPolicyLocal || 278 (svc.Spec.InternalTrafficPolicy != nil && *svc.Spec.InternalTrafficPolicy == slim_corev1.ServiceInternalTrafficPolicyLocal) { 279 upserted = append(upserted, svc) 280 } 281 } 282 283 // We may have duplicated services that changes happened for both of 284 // service and associated endpoints. 285 deduped := ciliumslices.UniqueFunc( 286 upserted, 287 func(i int) resource.Key { 288 return resource.Key{ 289 Name: upserted[i].GetName(), 290 Namespace: upserted[i].GetNamespace(), 291 } 292 }, 293 ) 294 295 return deduped, deleted, nil 296 } 297 298 // svcDesiredRoutes determines which, if any routes should be announced for the given service. This determines the 299 // desired state. 300 func (r *ServiceReconciler) svcDesiredRoutes(newc *v2alpha1api.CiliumBGPVirtualRouter, svc *slim_corev1.Service, ls localServices) ([]netip.Prefix, error) { 301 if newc.ServiceSelector == nil { 302 // If the vRouter has no service selector, there are no desired routes. 303 return nil, nil 304 } 305 306 // The vRouter has a service selector, so determine the desired routes. 307 svcSelector, err := slim_metav1.LabelSelectorAsSelector(newc.ServiceSelector) 308 if err != nil { 309 return nil, fmt.Errorf("labelSelectorAsSelector: %w", err) 310 } 311 312 // Ignore non matching services. 313 if !svcSelector.Matches(serviceLabelSet(svc)) { 314 return nil, nil 315 } 316 317 var desiredRoutes []netip.Prefix 318 // Loop over the service advertisements and determine the desired routes. 319 for _, svcAdv := range newc.ServiceAdvertisements { 320 switch svcAdv { 321 case v2alpha1api.BGPLoadBalancerIPAddr: 322 desiredRoutes = append(desiredRoutes, r.lbSvcDesiredRoutes(svc, ls)...) 323 case v2alpha1api.BGPClusterIPAddr: 324 desiredRoutes = append(desiredRoutes, r.clusterIPDesiredRoutes(svc, ls)...) 325 case v2alpha1api.BGPExternalIPAddr: 326 desiredRoutes = append(desiredRoutes, r.externalIPDesiredRoutes(svc, ls)...) 327 } 328 } 329 330 return desiredRoutes, err 331 } 332 333 func (r *ServiceReconciler) externalIPDesiredRoutes(svc *slim_corev1.Service, ls localServices) []netip.Prefix { 334 var desiredRoutes []netip.Prefix 335 // Ignore externalTrafficPolicy == Local && no local endpoints. 336 if svc.Spec.ExternalTrafficPolicy == slim_corev1.ServiceExternalTrafficPolicyLocal && 337 !hasLocalEndpoints(svc, ls) { 338 return desiredRoutes 339 } 340 for _, extIP := range svc.Spec.ExternalIPs { 341 if extIP == "" { 342 continue 343 } 344 addr, err := netip.ParseAddr(extIP) 345 if err != nil { 346 continue 347 } 348 desiredRoutes = append(desiredRoutes, netip.PrefixFrom(addr, addr.BitLen())) 349 } 350 return desiredRoutes 351 } 352 353 func (r *ServiceReconciler) clusterIPDesiredRoutes(svc *slim_corev1.Service, ls localServices) []netip.Prefix { 354 var desiredRoutes []netip.Prefix 355 // Ignore internalTrafficPolicy == Local && no local endpoints. 356 if svc.Spec.InternalTrafficPolicy != nil && *svc.Spec.InternalTrafficPolicy == slim_corev1.ServiceInternalTrafficPolicyLocal && 357 !hasLocalEndpoints(svc, ls) { 358 return desiredRoutes 359 } 360 if svc.Spec.ClusterIP == "" || len(svc.Spec.ClusterIPs) == 0 || svc.Spec.ClusterIP == corev1.ClusterIPNone { 361 return desiredRoutes 362 } 363 ips := sets.New[string]() 364 if svc.Spec.ClusterIP != "" { 365 ips.Insert(svc.Spec.ClusterIP) 366 } 367 for _, clusterIP := range svc.Spec.ClusterIPs { 368 if clusterIP == "" || clusterIP == corev1.ClusterIPNone { 369 continue 370 } 371 ips.Insert(clusterIP) 372 } 373 for _, ip := range sets.List(ips) { 374 addr, err := netip.ParseAddr(ip) 375 if err != nil { 376 continue 377 } 378 desiredRoutes = append(desiredRoutes, netip.PrefixFrom(addr, addr.BitLen())) 379 } 380 return desiredRoutes 381 } 382 383 func (r *ServiceReconciler) lbSvcDesiredRoutes(svc *slim_corev1.Service, ls localServices) []netip.Prefix { 384 var desiredRoutes []netip.Prefix 385 if svc.Spec.Type != slim_corev1.ServiceTypeLoadBalancer { 386 return desiredRoutes 387 } 388 // Ignore externalTrafficPolicy == Local && no local endpoints. 389 if svc.Spec.ExternalTrafficPolicy == slim_corev1.ServiceExternalTrafficPolicyLocal && 390 !hasLocalEndpoints(svc, ls) { 391 return desiredRoutes 392 } 393 // Ignore service managed by an unsupported LB class. 394 if svc.Spec.LoadBalancerClass != nil && *svc.Spec.LoadBalancerClass != v2alpha1api.BGPLoadBalancerClass { 395 // The service is managed by a different LB class. 396 return desiredRoutes 397 } 398 for _, ingress := range svc.Status.LoadBalancer.Ingress { 399 if ingress.IP == "" { 400 continue 401 } 402 addr, err := netip.ParseAddr(ingress.IP) 403 if err != nil { 404 continue 405 } 406 desiredRoutes = append(desiredRoutes, netip.PrefixFrom(addr, addr.BitLen())) 407 } 408 return desiredRoutes 409 } 410 411 // reconcileService gets the desired routes of a given service and makes sure that is what is being announced. 412 func (r *ServiceReconciler) reconcileService(ctx context.Context, sc *instance.ServerWithConfig, newc *v2alpha1api.CiliumBGPVirtualRouter, svc *slim_corev1.Service, ls localServices) error { 413 414 desiredRoutes, err := r.svcDesiredRoutes(newc, svc, ls) 415 if err != nil { 416 return fmt.Errorf("failed to retrieve svc desired routes: %w", err) 417 } 418 return r.reconcileServiceRoutes(ctx, sc, svc, desiredRoutes) 419 } 420 421 // reconcileServiceRoutes ensures that desired routes of a given service are announced, 422 // adding missing announcements or withdrawing unwanted ones. 423 func (r *ServiceReconciler) reconcileServiceRoutes(ctx context.Context, sc *instance.ServerWithConfig, svc *slim_corev1.Service, desiredRoutes []netip.Prefix) error { 424 serviceAnnouncements := r.getMetadata(sc) 425 svcKey := resource.NewKey(svc) 426 427 for _, desiredCidr := range desiredRoutes { 428 // If this route has already been announced, don't add it again 429 if slices.IndexFunc(serviceAnnouncements[svcKey], func(existing *types.Path) bool { 430 return desiredCidr.String() == existing.NLRI.String() 431 }) != -1 { 432 continue 433 } 434 435 // Advertise the new cidr 436 advertPathResp, err := sc.Server.AdvertisePath(ctx, types.PathRequest{ 437 Path: types.NewPathForPrefix(desiredCidr), 438 }) 439 if err != nil { 440 return fmt.Errorf("failed to advertise service route %v: %w", desiredCidr, err) 441 } 442 serviceAnnouncements[svcKey] = append(serviceAnnouncements[svcKey], advertPathResp.Path) 443 } 444 445 // Loop over announcements in reverse order so we can delete entries without effecting iteration. 446 for i := len(serviceAnnouncements[svcKey]) - 1; i >= 0; i-- { 447 announcement := serviceAnnouncements[svcKey][i] 448 // If the announcement is within the list of desired routes, don't remove it 449 if slices.IndexFunc(desiredRoutes, func(existing netip.Prefix) bool { 450 return existing.String() == announcement.NLRI.String() 451 }) != -1 { 452 continue 453 } 454 455 if err := sc.Server.WithdrawPath(ctx, types.PathRequest{Path: announcement}); err != nil { 456 return fmt.Errorf("failed to withdraw service route %s: %w", announcement.NLRI, err) 457 } 458 459 // Delete announcement from slice 460 serviceAnnouncements[svcKey] = slices.Delete(serviceAnnouncements[svcKey], i, i+1) 461 } 462 return nil 463 } 464 465 // withdrawService removes all announcements for the given service 466 func (r *ServiceReconciler) withdrawService(ctx context.Context, sc *instance.ServerWithConfig, key resource.Key) error { 467 serviceAnnouncements := r.getMetadata(sc) 468 advertisements := serviceAnnouncements[key] 469 // Loop in reverse order so we can delete without effect to the iteration. 470 for i := len(advertisements) - 1; i >= 0; i-- { 471 advertisement := advertisements[i] 472 if err := sc.Server.WithdrawPath(ctx, types.PathRequest{Path: advertisement}); err != nil { 473 // Persist remaining advertisements 474 serviceAnnouncements[key] = advertisements 475 return fmt.Errorf("failed to withdraw deleted service route: %v: %w", advertisement.NLRI, err) 476 } 477 478 // Delete the advertisement after each withdraw in case we error half way through 479 advertisements = slices.Delete(advertisements, i, i+1) 480 } 481 482 // If all were withdrawn without error, we can delete the whole svc from the map 483 delete(serviceAnnouncements, key) 484 485 return nil 486 } 487 488 func (r *ServiceReconciler) diffID(asn uint32) string { 489 return fmt.Sprintf("%s-%d", r.Name(), asn) 490 } 491 492 func serviceLabelSet(svc *slim_corev1.Service) labels.Labels { 493 svcLabels := maps.Clone(svc.Labels) 494 if svcLabels == nil { 495 svcLabels = make(map[string]string) 496 } 497 svcLabels["io.kubernetes.service.name"] = svc.Name 498 svcLabels["io.kubernetes.service.namespace"] = svc.Namespace 499 return labels.Set(svcLabels) 500 }