github.com/cilium/cilium@v1.16.2/pkg/l2announcer/l2announcer.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package l2announcer 5 6 import ( 7 "context" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "net/http" 12 "net/netip" 13 "regexp" 14 "slices" 15 "strings" 16 17 "github.com/cilium/hive/cell" 18 "github.com/cilium/hive/job" 19 "github.com/cilium/statedb" 20 "github.com/sirupsen/logrus" 21 "golang.org/x/exp/maps" 22 apierrors "k8s.io/apimachinery/pkg/api/errors" 23 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 "k8s.io/apimachinery/pkg/types" 25 "k8s.io/client-go/tools/leaderelection" 26 "k8s.io/client-go/tools/leaderelection/resourcelock" 27 28 daemon_k8s "github.com/cilium/cilium/daemon/k8s" 29 "github.com/cilium/cilium/pkg/datapath/tables" 30 "github.com/cilium/cilium/pkg/k8s" 31 v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 32 cilium_api_v2alpha1 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1" 33 k8sClient "github.com/cilium/cilium/pkg/k8s/client" 34 "github.com/cilium/cilium/pkg/k8s/resource" 35 slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1" 36 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/labels" 37 slim_meta_v1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1" 38 "github.com/cilium/cilium/pkg/k8s/utils" 39 "github.com/cilium/cilium/pkg/option" 40 "github.com/cilium/cilium/pkg/time" 41 ) 42 43 var Cell = cell.Module( 44 "l2-announcer", 45 "L2 Announcer", 46 47 cell.Provide(NewL2Announcer), 48 cell.Provide(l2AnnouncementPolicyResource), 49 ) 50 51 func l2AnnouncementPolicyResource(lc cell.Lifecycle, cs k8sClient.Clientset) (resource.Resource[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy], error) { 52 if !cs.IsEnabled() { 53 return nil, nil 54 } 55 lw := utils.ListerWatcherFromTyped( 56 cs.CiliumV2alpha1().CiliumL2AnnouncementPolicies(), 57 ) 58 return resource.New[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy](lc, lw, resource.WithMetric("CiliumL2AnnouncementPolicy")), nil 59 } 60 61 type l2AnnouncerParams struct { 62 cell.In 63 64 Lifecycle cell.Lifecycle 65 Logger logrus.FieldLogger 66 Health cell.Health 67 68 DaemonConfig *option.DaemonConfig 69 Clientset k8sClient.Clientset 70 Services resource.Resource[*slim_corev1.Service] 71 L2AnnouncementPolicy resource.Resource[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy] 72 LocalNodeResource daemon_k8s.LocalCiliumNodeResource 73 L2AnnounceTable statedb.RWTable[*tables.L2AnnounceEntry] 74 Devices statedb.Table[*tables.Device] 75 StateDB *statedb.DB 76 JobGroup job.Group 77 } 78 79 // L2Announcer takes all L2 announcement policies and filters down to those that match the labels of the local node. It 80 // then searches all services that match the selectors of the policies. For each service, we attempt to take a lease, 81 // the holder node persists all IPs and netdev combinations selected by the policy to the L2AnnounceTable. Datapath 82 // components consume them and handle traffic for the IP+netdev entries. 83 type L2Announcer struct { 84 params l2AnnouncerParams 85 86 svcStore resource.Store[*slim_corev1.Service] 87 policyStore resource.Store[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy] 88 localNode *v2.CiliumNode 89 90 scopedGroup job.ScopedGroup 91 92 leaderChannel chan leaderElectionEvent 93 devicesUpdatedSig chan struct{} 94 95 // selectedPolicies matching the current node. 96 selectedPolicies map[resource.Key]*selectedPolicy 97 // Services which are selected by one or more policies for which we thus want to participate in leader election. 98 // Indexed by service key. 99 selectedServices map[resource.Key]*selectedService 100 // A list of devices which can be matched by the policies 101 devices []string 102 } 103 104 func NewL2Announcer(params l2AnnouncerParams) *L2Announcer { 105 // These values were picked because it seemed right, change if necessary 106 const leaderElectionBufferSize = 16 107 announcer := &L2Announcer{ 108 params: params, 109 selectedServices: make(map[resource.Key]*selectedService), 110 selectedPolicies: make(map[resource.Key]*selectedPolicy), 111 leaderChannel: make(chan leaderElectionEvent, leaderElectionBufferSize), 112 devicesUpdatedSig: make(chan struct{}, 1), 113 } 114 115 // Can't operate or GC if client set is disabled 116 if !params.Clientset.IsEnabled() { 117 return announcer 118 } 119 120 announcer.scopedGroup = announcer.params.JobGroup.Scoped("leader-election") 121 122 if !params.DaemonConfig.EnableL2Announcements { 123 // If the L2 announcement feature is disabled, garbage collect any leases from previous runs when the feature 124 // might have been active. Just once, not on a timer. 125 announcer.params.JobGroup.Add(job.OneShot("l2-announcer lease-gc", announcer.leaseGC)) 126 return announcer 127 } 128 129 announcer.params.JobGroup.Add(job.OneShot("l2-announcer run", announcer.run)) 130 announcer.params.JobGroup.Add(job.Timer("l2-announcer lease-gc", func(ctx context.Context) error { 131 return announcer.leaseGC(ctx, nil) 132 }, time.Minute)) 133 134 return announcer 135 } 136 137 func (l2a *L2Announcer) run(ctx context.Context, health cell.Health) error { 138 var err error 139 l2a.svcStore, err = l2a.params.Services.Store(ctx) 140 if err != nil { 141 return fmt.Errorf("get service store: %w", err) 142 } 143 144 l2a.policyStore, err = l2a.params.L2AnnouncementPolicy.Store(ctx) 145 if err != nil { 146 return fmt.Errorf("get policy store: %w", err) 147 } 148 149 svcChan := l2a.params.Services.Events(ctx) 150 policyChan := l2a.params.L2AnnouncementPolicy.Events(ctx) 151 localNodeChan := l2a.params.LocalNodeResource.Events(ctx) 152 153 devices, watchDevices := tables.SelectedDevices(l2a.params.Devices, l2a.params.StateDB.ReadTxn()) 154 l2a.devices = tables.DeviceNames(devices) 155 156 // We have to first have a local node before we can start processing other events. 157 for { 158 event, more := <-localNodeChan 159 // resource closed, shutting down 160 if !more { 161 return nil 162 } 163 164 if err := l2a.processLocalNodeEvent(ctx, event); err != nil { 165 l2a.params.Logger.WithError(err).Warn("Error processing local node event") 166 } 167 168 if l2a.localNode != nil { 169 break 170 } 171 } 172 173 loop: 174 for { 175 select { 176 case <-ctx.Done(): 177 break loop 178 case event, more := <-svcChan: 179 // resource closed, shutting down 180 if !more { 181 break loop 182 } 183 184 if err := l2a.processSvcEvent(event); err != nil { 185 l2a.params.Logger.WithError(err).Warn("Error processing service event") 186 } 187 188 case event, more := <-policyChan: 189 // resource closed, shutting down 190 if !more { 191 break loop 192 } 193 194 if err := l2a.processPolicyEvent(ctx, event); err != nil { 195 l2a.params.Logger.WithError(err).Warn("Error processing policy event") 196 } 197 198 case event, more := <-localNodeChan: 199 // resource closed, shutting down 200 if !more { 201 break loop 202 } 203 204 if err := l2a.processLocalNodeEvent(ctx, event); err != nil { 205 l2a.params.Logger.WithError(err).Warn("Error processing local node event") 206 } 207 208 case event := <-l2a.leaderChannel: 209 if err := l2a.processLeaderEvent(event); err != nil { 210 l2a.params.Logger.WithError(err).Warn("Error processing leader event") 211 } 212 213 case <-watchDevices: 214 devices, watchDevices = tables.SelectedDevices(l2a.params.Devices, l2a.params.StateDB.ReadTxn()) 215 deviceNames := tables.DeviceNames(devices) 216 217 if slices.Equal(l2a.devices, deviceNames) { 218 continue 219 } 220 l2a.devices = deviceNames 221 if err := l2a.processDevicesChanged(ctx); err != nil { 222 l2a.params.Logger.WithError(err).Warn("Error processing devices changed signal") 223 } 224 } 225 } 226 227 return nil 228 } 229 230 // Called periodically to garbage collect any leases which are no longer held by any agent. 231 // This is needed since agents do not track leases for services that we no longer select. 232 func (l2a *L2Announcer) leaseGC(ctx context.Context, health cell.Health) error { 233 leaseClient := l2a.params.Clientset.CoordinationV1().Leases(l2a.leaseNamespace()) 234 list, err := leaseClient.List(ctx, metav1.ListOptions{}) 235 if err != nil { 236 var statusErr *apierrors.StatusError 237 if errors.As(err, &statusErr) && statusErr.Status().Code == http.StatusForbidden { 238 // LeaseGC can't check if L2 announcements were enabled before this run. 239 // So we assume. If the feature was never enabled, we get this forbidden error since 240 // the cluster role for the cilium agent will not have permission, this is expected. 241 return nil 242 } 243 244 return fmt.Errorf("leaseClient.List: %w", err) 245 } 246 247 for _, lease := range list.Items { 248 if !strings.HasPrefix(lease.Name, leasePrefix) { 249 continue 250 } 251 252 if lease.Spec.HolderIdentity != nil && *lease.Spec.HolderIdentity != "" { 253 continue 254 } 255 256 err = leaseClient.Delete(ctx, lease.Name, metav1.DeleteOptions{}) 257 if err != nil { 258 return fmt.Errorf("leaseClient.Delete(%s): %w", lease.Name, err) 259 } 260 } 261 262 return nil 263 } 264 265 func (l2a *L2Announcer) processDevicesChanged(ctx context.Context) error { 266 var errs error 267 268 // Upsert every known policy which will re-evaluate device matching 269 for _, selectedPolicy := range l2a.selectedPolicies { 270 if err := l2a.upsertPolicy(ctx, selectedPolicy.policy); err != nil { 271 errs = errors.Join(errs, fmt.Errorf("upsert policy: %w", err)) 272 } 273 } 274 275 return errs 276 } 277 278 func (l2a *L2Announcer) processPolicyEvent(ctx context.Context, event resource.Event[*cilium_api_v2alpha1.CiliumL2AnnouncementPolicy]) error { 279 var err error 280 switch event.Kind { 281 case resource.Upsert: 282 err = l2a.upsertPolicy(ctx, event.Object) 283 if err != nil { 284 err = fmt.Errorf("upsert policy: %w", err) 285 } 286 287 case resource.Delete: 288 err = l2a.delPolicy(event.Key) 289 if err != nil { 290 err = fmt.Errorf("delete policy: %w", err) 291 } 292 293 case resource.Sync: 294 } 295 296 // if `err` is not nil, the event will be retried by the resource. 297 event.Done(err) 298 return err 299 } 300 301 func (l2a *L2Announcer) upsertSvc(svc *slim_corev1.Service) error { 302 key := serviceKey(svc) 303 304 // Ignore services if there is no noExternal or LB IP assigned. 305 noExternal := svc.Spec.ExternalIPs == nil 306 noLB := true 307 for _, v := range svc.Status.LoadBalancer.Ingress { 308 if v.IP != "" { 309 noLB = false 310 break 311 } 312 } 313 if noExternal && noLB { 314 return l2a.delSvc(key) 315 } 316 317 // Ignore services managed by an unsupported load balancer class. 318 if svc.Spec.LoadBalancerClass != nil && 319 *svc.Spec.LoadBalancerClass != cilium_api_v2alpha1.L2AnnounceLoadBalancerClass { 320 return l2a.delSvc(key) 321 } 322 323 ss, found := l2a.selectedServices[key] 324 if found { 325 // Update service object, labels or IPs may have changed 326 ss.svc = svc 327 328 // Since labels may have changed, remove all matching policies, re-match against all known policies. 329 ss.byPolicies = nil 330 for policyKey, selectedPolicy := range l2a.selectedPolicies { 331 if selectedPolicy.serviceSelector.Matches(svcAndMetaLabels(svc)) { 332 // Policy IP type and Service IP type must match 333 if (selectedPolicy.policy.Spec.ExternalIPs && !noExternal) || 334 (selectedPolicy.policy.Spec.LoadBalancerIPs && !noLB) { 335 ss.byPolicies = append(ss.byPolicies, policyKey) 336 } 337 } 338 } 339 340 // If no policies match anymore, delete the service 341 if len(ss.byPolicies) == 0 { 342 // gcOrphanedService deletes when a service has no policies, which is the case here. 343 // It also stops any lease subscription and reconciles the output table. 344 l2a.gcOrphanedService(ss) 345 return nil 346 } 347 348 // Since IPs may have changed, re-calculate its entries in the output table, if we are leader 349 err := l2a.recalculateL2EntriesTableEntries(ss) 350 if err != nil { 351 return fmt.Errorf("recalculateL2EntriesTableEntries: %w", err) 352 } 353 354 return nil 355 } 356 357 // Service is not selected, check if any policies match. 358 var matchingPolicies []resource.Key 359 for policyKey, selectedPolicy := range l2a.selectedPolicies { 360 if selectedPolicy.serviceSelector.Matches(svcAndMetaLabels(svc)) { 361 // Policy IP type and Service IP type must match 362 if (selectedPolicy.policy.Spec.ExternalIPs && !noExternal) || 363 (selectedPolicy.policy.Spec.LoadBalancerIPs && !noLB) { 364 matchingPolicies = append(matchingPolicies, policyKey) 365 } 366 } 367 } 368 369 // Add the services to list of selected services if at least 1 policy matches it. 370 if len(matchingPolicies) >= 1 { 371 l2a.addSelectedService(svc, matchingPolicies) 372 } 373 374 return nil 375 } 376 377 func (l2a *L2Announcer) delSvc(key resource.Key) error { 378 ss, found := l2a.selectedServices[key] 379 if !found { 380 return nil 381 } 382 383 // gcOrphanedService will delete the service if it has no policies that match, so remove the policy references 384 // and call gcOrphanedService. It will remove the service, stop leader election for it and reconcile the output 385 // table if we were leader for the service. 386 ss.byPolicies = nil 387 err := l2a.gcOrphanedService(ss) 388 if err != nil { 389 return fmt.Errorf("gcOrphanedService: %w", err) 390 } 391 392 return nil 393 } 394 395 func (l2a *L2Announcer) processSvcEvent(event resource.Event[*slim_corev1.Service]) error { 396 var err error 397 switch event.Kind { 398 case resource.Upsert: 399 err = l2a.upsertSvc(event.Object) 400 if err != nil { 401 err = fmt.Errorf("upsert service: %w", err) 402 } 403 404 case resource.Delete: 405 err = l2a.delSvc(event.Key) 406 if err != nil { 407 err = fmt.Errorf("delete service: %w", err) 408 } 409 410 case resource.Sync: 411 } 412 413 // if `err` is not nil, this will cause the resource to retry the event. 414 event.Done(err) 415 return err 416 } 417 418 func policyKey(policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy) resource.Key { 419 return resource.Key{Name: policy.Name} 420 } 421 422 func serviceKey(svc *slim_corev1.Service) resource.Key { 423 return resource.Key{Namespace: svc.Namespace, Name: svc.Name} 424 } 425 426 func (l2a *L2Announcer) upsertPolicy(ctx context.Context, policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy) error { 427 key := policyKey(policy) 428 429 // Remove all references to the old policy, since the new version might not match the service anymore. 430 for _, ss := range l2a.selectedServices { 431 idx := slices.Index(ss.byPolicies, key) 432 if idx != -1 { 433 ss.byPolicies = slices.Delete(ss.byPolicies, idx, idx+1) 434 } 435 } 436 437 if policy.Spec.NodeSelector != nil { 438 nodeselector, err := slim_meta_v1.LabelSelectorAsSelector(policy.Spec.NodeSelector) 439 if err != nil { 440 if err2 := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-node-selector", err); err2 != nil { 441 l2a.params.Logger.WithError(err2).Warn("updating policy status failed") 442 } 443 return fmt.Errorf("make node selector: %w", err) 444 } 445 if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-node-selector", nil); err != nil { 446 l2a.params.Logger.WithError(err).Warn("updating policy status failed") 447 } 448 449 // The new policy does not match the node selector 450 if !nodeselector.Matches(labels.Set(l2a.localNode.Labels)) { 451 err = l2a.delPolicy(key) 452 if err != nil { 453 return fmt.Errorf("del policy: %w", err) 454 } 455 return nil 456 } 457 } else { 458 // Clear any error status if it was set before 459 if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-node-selector", nil); err != nil { 460 l2a.params.Logger.WithError(err).Warn("updating policy status failed") 461 } 462 } 463 464 // If no interface regexes are given, all devices match. Otherwise only devices matching the policy 465 // will be selected. 466 var selectedDevices []string 467 if len(policy.Spec.Interfaces) == 0 { 468 selectedDevices = l2a.devices 469 } else { 470 for _, strRegex := range policy.Spec.Interfaces { 471 regex, err := regexp.Compile(strRegex) 472 if err != nil { 473 if err2 := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-interface-regex", err); err2 != nil { 474 l2a.params.Logger.WithError(err2).Warn("updating policy status failed") 475 } 476 return fmt.Errorf("policy compile interface regex: %w", err) 477 } 478 479 for _, device := range l2a.devices { 480 if slices.Contains(selectedDevices, device) { 481 continue 482 } 483 484 if regex.MatchString(device) { 485 selectedDevices = append(selectedDevices, device) 486 } 487 } 488 } 489 } 490 491 // Clear any error status if it was set before. 492 if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-interface-regex", nil); err != nil { 493 l2a.params.Logger.WithError(err).Warn("updating policy status failed") 494 } 495 496 // If no selector is specified, all services match. 497 serviceSelector := labels.Everything() 498 if policy.Spec.ServiceSelector != nil { 499 var err error 500 serviceSelector, err = slim_meta_v1.LabelSelectorAsSelector(policy.Spec.ServiceSelector) 501 if err != nil { 502 if err2 := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-service-selector", err); err2 != nil { 503 l2a.params.Logger.WithError(err2).Warn("updating policy status failed") 504 } 505 return fmt.Errorf("make service selector: %w", err) 506 } 507 } 508 509 // Clear any error status if it exists 510 if err := l2a.updatePolicyStatus(ctx, policy, "io.cilium/bad-service-selector", nil); err != nil { 511 l2a.params.Logger.WithError(err).Warn("updating policy status failed") 512 } 513 514 l2a.selectedPolicies[key] = &selectedPolicy{ 515 policy: policy, 516 serviceSelector: serviceSelector, 517 selectedDevices: selectedDevices, 518 } 519 520 // Check all services, if they match the policy, mark the selected service as matching this policy. 521 // Or add to the selected services if it was not there already. 522 for _, svc := range l2a.svcStore.List() { 523 if !serviceSelector.Matches(svcAndMetaLabels(svc)) { 524 continue 525 } 526 527 // Ignore services if there is no external or LB IP assigned. 528 noExternal := svc.Spec.ExternalIPs == nil 529 noLB := true 530 for _, v := range svc.Status.LoadBalancer.Ingress { 531 if v.IP != "" { 532 noLB = false 533 break 534 } 535 } 536 if noExternal && noLB { 537 continue 538 } 539 540 if !((policy.Spec.ExternalIPs && !noExternal) || 541 (policy.Spec.LoadBalancerIPs && !noLB)) { 542 continue 543 } 544 545 ss, found := l2a.selectedServices[serviceKey(svc)] 546 if found { 547 if slices.Index(ss.byPolicies, key) == -1 { 548 ss.byPolicies = append(ss.byPolicies, key) 549 } 550 551 // recalculate in case the policy update causes neighbor proxy entries to be generated differently 552 if err := l2a.recalculateL2EntriesTableEntries(ss); err != nil { 553 return fmt.Errorf("recalculateNeighborProxyTableEntries: %w", err) 554 } 555 556 continue 557 } 558 559 l2a.addSelectedService(svc, []resource.Key{key}) 560 } 561 562 err := l2a.gcOrphanedServices() 563 if err != nil { 564 return fmt.Errorf("gcOrphanedServices: %w", err) 565 } 566 567 return nil 568 } 569 570 const ( 571 // The string used in the FieldManager field on update options 572 ciliumFieldManager = "cilium-agent-l2-announcer" 573 ) 574 575 // updatePolicyStatus updates the policy status annotation of the given type, it is called every time an aspect of the 576 // policy has been checked. If `err` is nil, and no conditions exist, no action is taken. If `err` contains an actual 577 // error a condition is added or updated and if a condition exists and `err` == nil follows the condition is marked 578 // false 579 func (l2a *L2Announcer) updatePolicyStatus( 580 ctx context.Context, 581 policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy, 582 typ string, 583 err error, 584 ) error { 585 // Find an existing condition of the given type 586 idx := slices.IndexFunc(policy.Status.Conditions, func(c metav1.Condition) bool { 587 return c.Type == typ 588 }) 589 590 var cond *metav1.Condition 591 // If no condition of this type exists 592 if idx < 0 { 593 // If the update call was to clear an error, no action has to happen 594 if err == nil { 595 return nil 596 } 597 598 policy.Status.Conditions = append(policy.Status.Conditions, metav1.Condition{}) 599 idx = len(policy.Status.Conditions) - 1 600 } 601 cond = &policy.Status.Conditions[idx] 602 603 cond.Type = typ 604 cond.Status = metav1.ConditionTrue 605 if err == nil { 606 cond.Status = metav1.ConditionFalse 607 } 608 cond.LastTransitionTime = metav1.Now() 609 cond.ObservedGeneration = policy.GetGeneration() 610 if err == nil { 611 cond.Message = "" 612 } else { 613 cond.Message = err.Error() 614 } 615 cond.Reason = "error" 616 617 policyClient := l2a.params.Clientset.CiliumV2alpha1().CiliumL2AnnouncementPolicies() 618 619 replacePolicyStatus := []k8s.JSONPatch{ 620 { 621 OP: "replace", 622 Path: "/status", 623 Value: policy.Status, 624 }, 625 } 626 627 createStatusPatch, err := json.Marshal(replacePolicyStatus) 628 if err != nil { 629 return fmt.Errorf("json.Marshal(%v) failed: %w", replacePolicyStatus, err) 630 } 631 632 _, err = policyClient.Patch(ctx, policy.Name, 633 types.JSONPatchType, createStatusPatch, metav1.PatchOptions{ 634 FieldManager: ciliumFieldManager, 635 }, "status") 636 637 return err 638 } 639 640 func (l2a *L2Announcer) delPolicy(key resource.Key) error { 641 for _, ss := range l2a.selectedServices { 642 idx := slices.Index(ss.byPolicies, key) 643 if idx != -1 { 644 ss.byPolicies = slices.Delete(ss.byPolicies, idx, idx+1) 645 } 646 } 647 648 delete(l2a.selectedPolicies, key) 649 650 err := l2a.gcOrphanedServices() 651 if err != nil { 652 return fmt.Errorf("gcOrphanedServices: %w", err) 653 } 654 655 return nil 656 } 657 658 // The leaderelection library enforces sane timer values, this function verifiers that the user input follows the rules 659 // and overwrites to sane defaults if they don't. 660 func (l2a *L2Announcer) leaseTimings() (leaseDuration, renewDeadline, retryPeriod time.Duration) { 661 leaseDuration = l2a.params.DaemonConfig.L2AnnouncerLeaseDuration 662 renewDeadline = l2a.params.DaemonConfig.L2AnnouncerRenewDeadline 663 retryPeriod = l2a.params.DaemonConfig.L2AnnouncerRetryPeriod 664 665 log := l2a.params.Logger 666 667 if leaseDuration < 1*time.Second { 668 log.WithFields(logrus.Fields{ 669 "leaseDuration": leaseDuration, 670 }).Warnf( 671 "--%s must be greater than 1s, defaulting to 1s", 672 option.L2AnnouncerLeaseDuration, 673 ) 674 leaseDuration = time.Second 675 } 676 677 if renewDeadline < 1 { 678 log.WithFields(logrus.Fields{ 679 "renewDeadline": renewDeadline, 680 }).Warnf( 681 "--%s must be greater than 1ns, defaulting to 1s", 682 option.L2AnnouncerRenewDeadline, 683 ) 684 renewDeadline = time.Second 685 } 686 687 if retryPeriod < 1 { 688 log.WithFields(logrus.Fields{ 689 "retryPeriod": retryPeriod, 690 }).Warnf( 691 "--%s must be greater than 1ns, defaulting to 200ms", 692 option.L2AnnouncerRetryPeriod, 693 ) 694 retryPeriod = 200 * time.Millisecond 695 } 696 697 if leaseDuration <= renewDeadline { 698 log.WithFields(logrus.Fields{ 699 "leaseDuration": leaseDuration, 700 "renewDeadline": renewDeadline, 701 }).Warnf( 702 "--%s must be greater than --%s, defaulting to a 2/1 ratio", 703 option.L2AnnouncerLeaseDuration, 704 option.L2AnnouncerRenewDeadline, 705 ) 706 renewDeadline = leaseDuration / 2 707 } 708 709 if renewDeadline <= time.Duration(leaderelection.JitterFactor*float64(retryPeriod)) { 710 log.WithFields(logrus.Fields{ 711 "renewDeadline": renewDeadline, 712 "retryPeriod": retryPeriod, 713 }).Warnf( 714 "--%s must be greater than --%s * %.2f, defaulting to --%s / 2", 715 option.L2AnnouncerRenewDeadline, 716 option.L2AnnouncerRetryPeriod, 717 leaderelection.JitterFactor, 718 option.L2AnnouncerRetryPeriod, 719 ) 720 retryPeriod = renewDeadline / 2 721 } 722 723 return leaseDuration, renewDeadline, retryPeriod 724 } 725 726 func (l2a *L2Announcer) addSelectedService(svc *slim_corev1.Service, byPolicies []resource.Key) { 727 leaseDuration, renewDeadline, retryPeriod := l2a.leaseTimings() 728 ss := &selectedService{ 729 svc: svc, 730 byPolicies: byPolicies, 731 lock: l2a.newLeaseLock(svc), 732 done: make(chan struct{}), 733 leaderChannel: l2a.leaderChannel, 734 leaseDuration: leaseDuration, 735 renewDeadline: renewDeadline, 736 retryPeriod: retryPeriod, 737 } 738 739 l2a.selectedServices[serviceKey(svc)] = ss 740 741 // kick off leader election job 742 l2a.scopedGroup.Add(job.OneShot( 743 fmt.Sprintf("leader-election/%s/%s", svc.Namespace, svc.Name), 744 ss.serviceLeaderElection), 745 ) 746 } 747 748 func (l2a *L2Announcer) leaseNamespace() string { 749 ns := l2a.params.DaemonConfig.K8sNamespace 750 // If due to any reason the CILIUM_K8S_NAMESPACE is not set we assume the operator 751 // to be in default namespace. 752 if ns == "" { 753 ns = metav1.NamespaceDefault 754 } 755 756 return ns 757 } 758 759 const leasePrefix = "cilium-l2announce" 760 761 func (l2a *L2Announcer) newLeaseLock(svc *slim_corev1.Service) *resourcelock.LeaseLock { 762 return &resourcelock.LeaseLock{ 763 LeaseMeta: metav1.ObjectMeta{ 764 Namespace: l2a.leaseNamespace(), 765 Name: fmt.Sprintf("%s-%s-%s", leasePrefix, svc.Namespace, svc.Name), 766 }, 767 Client: l2a.params.Clientset.CoordinationV1(), 768 LockConfig: resourcelock.ResourceLockConfig{ 769 Identity: l2a.localNode.Name, 770 }, 771 } 772 } 773 774 // Check all selected services, delete services which are no longer selected by any of the policies. 775 func (l2a *L2Announcer) gcOrphanedServices() error { 776 for _, ss := range l2a.selectedServices { 777 err := l2a.gcOrphanedService(ss) 778 if err != nil { 779 return fmt.Errorf("gcOrphanedService: %w", err) 780 } 781 } 782 783 return nil 784 } 785 786 func (l2a *L2Announcer) gcOrphanedService(ss *selectedService) error { 787 // Only GC policies that have been orphaned (all policies that created it has gone away) 788 if len(ss.byPolicies) > 0 { 789 return nil 790 } 791 792 // Stop leader election routine 793 ss.stop() 794 795 // Recalculation will remove all entries since we stopped the leader election. 796 if err := l2a.recalculateL2EntriesTableEntries(ss); err != nil { 797 return fmt.Errorf("recalculateNeighborProxyTableEntries: %w", err) 798 } 799 800 // Remove service from selected services 801 delete(l2a.selectedServices, serviceKey(ss.svc)) 802 return nil 803 } 804 805 func (l2a *L2Announcer) processLocalNodeEvent(ctx context.Context, event resource.Event[*v2.CiliumNode]) error { 806 var err error 807 if event.Kind == resource.Upsert { 808 err = l2a.upsertLocalNode(ctx, event.Object) 809 if err != nil { 810 err = fmt.Errorf("upsert local node: %w", err) 811 } 812 } 813 814 event.Done(err) 815 return err 816 } 817 818 func (l2a *L2Announcer) upsertLocalNode(ctx context.Context, localNode *v2.CiliumNode) error { 819 // If the label set did not change, nothing to do. 820 if l2a.localNode != nil && labels.Equals(l2a.localNode.Labels, labels.Set(localNode.Labels)) { 821 return nil 822 } 823 824 l2a.localNode = localNode 825 826 // Delete any policies that no longer match the new label set 827 var errs error 828 for key, selectedPolicy := range l2a.selectedPolicies { 829 var nodeselector labels.Selector 830 if selectedPolicy.policy.Spec.NodeSelector == nil { 831 nodeselector = labels.Everything() 832 } else { 833 var err error 834 nodeselector, err = slim_meta_v1.LabelSelectorAsSelector(selectedPolicy.policy.Spec.NodeSelector) 835 if err != nil { 836 if err2 := l2a.updatePolicyStatus(ctx, selectedPolicy.policy, "io.cilium/bad-node-selector", err); err2 != nil { 837 l2a.params.Logger.WithError(err2).Warn("updating policy status failed") 838 } 839 return fmt.Errorf("make node selector: %w", err) 840 } 841 } 842 if err := l2a.updatePolicyStatus(ctx, selectedPolicy.policy, "io.cilium/bad-node-selector", nil); err != nil { 843 l2a.params.Logger.WithError(err).Warn("updating policy status failed") 844 } 845 846 if nodeselector.Matches(labels.Set(l2a.localNode.Labels)) { 847 continue 848 } 849 850 err := l2a.delPolicy(key) 851 if err != nil { 852 errors.Join(errs, fmt.Errorf("delete policy: %w", err)) 853 continue 854 } 855 } 856 857 // Upsert all policies, the upsert function checks if they match the new label set 858 for _, policy := range l2a.policyStore.List() { 859 err := l2a.upsertPolicy(ctx, policy) 860 if err != nil { 861 errors.Join(errs, fmt.Errorf("upsert policy: %w", err)) 862 continue 863 } 864 } 865 866 return errs 867 } 868 869 func (l2a *L2Announcer) processLeaderEvent(event leaderElectionEvent) error { 870 event.selectedService.currentlyLeader = event.typ == leaderElectionLeading 871 err := l2a.recalculateL2EntriesTableEntries(event.selectedService) 872 if err != nil { 873 return fmt.Errorf("recalculateNeighborProxyTableEntries: %w", err) 874 } 875 876 return nil 877 } 878 879 func (l2a *L2Announcer) recalculateL2EntriesTableEntries(ss *selectedService) error { 880 tbl := l2a.params.L2AnnounceTable 881 txn := l2a.params.StateDB.WriteTxn(tbl) 882 defer txn.Abort() 883 884 svcKey := serviceKey(ss.svc) 885 886 entriesIter := tbl.List(txn, tables.L2AnnounceOriginIndex.Query(svcKey)) 887 888 // If we are not the leader, we should not have any proxy entries for the service. 889 if !ss.currentlyLeader { 890 // Remove origin from entries, and delete if no origins left 891 err := statedb.ProcessEach(entriesIter, func(e *tables.L2AnnounceEntry, _ uint64) error { 892 // Copy, since modifying objects directly is not allowed. 893 e = e.DeepCopy() 894 895 idx := slices.Index(e.Origins, svcKey) 896 if idx != -1 { 897 e.Origins = slices.Delete(e.Origins, idx, idx+1) 898 } 899 900 if len(e.Origins) == 0 { 901 _, _, err := tbl.Delete(txn, e) 902 if err != nil { 903 return fmt.Errorf("delete in table: %w", err) 904 } 905 return nil 906 } 907 908 _, _, err := tbl.Insert(txn, e) 909 if err != nil { 910 return fmt.Errorf("update in table: %w", err) 911 } 912 return nil 913 }) 914 if err != nil { 915 return fmt.Errorf("failed to modify desired state: %w", err) 916 } 917 918 txn.Commit() 919 920 return nil 921 } 922 923 desiredEntries := l2a.desiredEntries(ss) 924 satisfiedEntries := make(map[string]bool) 925 for key := range desiredEntries { 926 satisfiedEntries[key] = false 927 } 928 929 // Loop over existing entries, delete undesired entries 930 err := statedb.ProcessEach(entriesIter, func(e *tables.L2AnnounceEntry, _ uint64) error { 931 key := fmt.Sprintf("%s/%s", e.IP, e.NetworkInterface) 932 933 _, desired := desiredEntries[key] 934 if desired { 935 // Iterator only contains entries which already have the origin of the current svc. 936 // So no need to add it in the second step. 937 satisfiedEntries[key] = true 938 return nil 939 } 940 941 // Entry is undesired. 942 943 // Copy, since modifying objects directly is not allowed. 944 e = e.DeepCopy() 945 946 idx := slices.Index(e.Origins, svcKey) 947 if idx != -1 { 948 e.Origins = slices.Delete(e.Origins, idx, idx+1) 949 } 950 951 if len(e.Origins) == 0 { 952 // Delete, if no services want this IP + NetDev anymore 953 tbl.Delete(txn, e) 954 return nil 955 } 956 957 _, _, err := tbl.Insert(txn, e) 958 if err != nil { 959 return fmt.Errorf("update in table: %w", err) 960 } 961 return nil 962 }) 963 if err != nil { 964 return fmt.Errorf("failed to modify desired state: %w", err) 965 } 966 967 // loop over the desired states, add any that are missing 968 for key, satisfied := range satisfiedEntries { 969 if satisfied { 970 continue 971 } 972 973 entry := desiredEntries[key] 974 existing, _, _ := tbl.Get(txn, tables.L2AnnounceIDIndex.Query(tables.L2AnnounceKey{ 975 IP: entry.IP, 976 NetworkInterface: entry.NetworkInterface, 977 })) 978 if err != nil { 979 return fmt.Errorf("first: %w", err) 980 } 981 982 if existing == nil { 983 existing = &tables.L2AnnounceEntry{ 984 L2AnnounceKey: tables.L2AnnounceKey{ 985 IP: entry.IP, 986 NetworkInterface: entry.NetworkInterface, 987 }, 988 } 989 } 990 991 // Add our new origin to the existing origins, or if existing is nil (no entry existed), nothing will change. 992 entry.Origins = append(existing.Origins, entry.Origins...) 993 994 // Insert or update 995 _, _, err = tbl.Insert(txn, entry) 996 if err != nil { 997 return fmt.Errorf("insert new: %w", err) 998 } 999 continue 1000 } 1001 1002 txn.Commit() 1003 1004 return nil 1005 } 1006 1007 func (l2a *L2Announcer) desiredEntries(ss *selectedService) map[string]*tables.L2AnnounceEntry { 1008 entries := make(map[string]*tables.L2AnnounceEntry) 1009 1010 for _, policyKey := range ss.byPolicies { 1011 selectedPolicy := l2a.selectedPolicies[policyKey] 1012 1013 var IPs []netip.Addr 1014 if selectedPolicy.policy.Spec.LoadBalancerIPs { 1015 for _, ingress := range ss.svc.Status.LoadBalancer.Ingress { 1016 if ingress.IP == "" { 1017 continue 1018 } 1019 1020 if addr, err := netip.ParseAddr(ingress.IP); err == nil { 1021 IPs = append(IPs, addr) 1022 } 1023 } 1024 } 1025 1026 if selectedPolicy.policy.Spec.ExternalIPs { 1027 for _, externalIP := range ss.svc.Spec.ExternalIPs { 1028 if addr, err := netip.ParseAddr(externalIP); err == nil { 1029 IPs = append(IPs, addr) 1030 } 1031 } 1032 } 1033 1034 for _, ip := range IPs { 1035 for _, iface := range selectedPolicy.selectedDevices { 1036 key := fmt.Sprintf("%s/%s", ip.String(), iface) 1037 entry, found := entries[key] 1038 if !found { 1039 entry = &tables.L2AnnounceEntry{ 1040 L2AnnounceKey: tables.L2AnnounceKey{ 1041 IP: ip, 1042 NetworkInterface: iface, 1043 }, 1044 Origins: []resource.Key{serviceKey(ss.svc)}, 1045 } 1046 } 1047 entries[key] = entry 1048 } 1049 } 1050 } 1051 1052 return entries 1053 } 1054 1055 const ( 1056 serviceNamespaceLabel = "io.kubernetes.service.namespace" 1057 serviceNameLabel = "io.kubernetes.service.name" 1058 ) 1059 1060 func svcAndMetaLabels(svc *slim_corev1.Service) labels.Set { 1061 labels := maps.Clone(svc.GetLabels()) 1062 if labels == nil { 1063 labels = make(map[string]string) 1064 } 1065 1066 labels[serviceNamespaceLabel] = svc.Namespace 1067 labels[serviceNameLabel] = svc.Name 1068 return labels 1069 } 1070 1071 type selectedService struct { 1072 // The last known version of the service 1073 svc *slim_corev1.Service 1074 // The policies which select this service. 1075 byPolicies []resource.Key 1076 1077 // lease parameters 1078 leaseDuration time.Duration 1079 renewDeadline time.Duration 1080 retryPeriod time.Duration 1081 1082 // The lock object used to perform leader election for this selected service 1083 lock *resourcelock.LeaseLock 1084 currentlyLeader bool 1085 leaderChannel chan leaderElectionEvent 1086 1087 // Leader election goroutine lifetime management 1088 ctx context.Context 1089 cancel context.CancelFunc 1090 done chan struct{} 1091 } 1092 1093 func (ss *selectedService) serviceLeaderElection(ctx context.Context, health cell.Health) error { 1094 defer close(ss.done) 1095 1096 ss.ctx, ss.cancel = context.WithCancel(ctx) 1097 1098 for { 1099 select { 1100 case <-ss.ctx.Done(): 1101 return nil 1102 default: 1103 leaderelection.RunOrDie(ss.ctx, leaderelection.LeaderElectionConfig{ 1104 Name: ss.lock.LeaseMeta.Name, 1105 Lock: ss.lock, 1106 ReleaseOnCancel: true, 1107 1108 LeaseDuration: ss.leaseDuration, 1109 RenewDeadline: ss.renewDeadline, 1110 RetryPeriod: ss.retryPeriod, 1111 1112 Callbacks: leaderelection.LeaderCallbacks{ 1113 OnStartedLeading: func(ctx context.Context) { 1114 ss.leaderChannel <- leaderElectionEvent{ 1115 typ: leaderElectionLeading, 1116 selectedService: ss, 1117 } 1118 }, 1119 OnStoppedLeading: func() { 1120 ss.leaderChannel <- leaderElectionEvent{ 1121 typ: leaderElectionStoppedLeading, 1122 selectedService: ss, 1123 } 1124 }, 1125 }, 1126 }) 1127 } 1128 } 1129 } 1130 1131 func (ss *selectedService) stop() { 1132 if ss.cancel != nil { 1133 ss.cancel() 1134 <-ss.done 1135 ss.currentlyLeader = false 1136 } 1137 } 1138 1139 type leaderElectionEventType int 1140 1141 const ( 1142 leaderElectionLeading leaderElectionEventType = iota 1143 leaderElectionStoppedLeading 1144 ) 1145 1146 type leaderElectionEvent struct { 1147 typ leaderElectionEventType 1148 selectedService *selectedService 1149 } 1150 1151 type selectedPolicy struct { 1152 policy *cilium_api_v2alpha1.CiliumL2AnnouncementPolicy 1153 // pre-compiled service selector 1154 serviceSelector labels.Selector 1155 // a cached list of network devices selected by this policy based on the regular expressions in the policy 1156 // and the latest known list of devices. 1157 selectedDevices []string 1158 }