github.com/cilium/cilium@v1.16.2/pkg/bgpv1/agent/controller.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package agent 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 11 "github.com/cilium/hive/cell" 12 "github.com/cilium/hive/job" 13 "github.com/sirupsen/logrus" 14 "k8s.io/apimachinery/pkg/util/wait" 15 16 daemon_k8s "github.com/cilium/cilium/daemon/k8s" 17 "github.com/cilium/cilium/pkg/bgpv1/agent/mode" 18 "github.com/cilium/cilium/pkg/bgpv1/agent/signaler" 19 "github.com/cilium/cilium/pkg/bgpv1/manager/store" 20 "github.com/cilium/cilium/pkg/hive" 21 v2_api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 22 v2alpha1api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1" 23 "github.com/cilium/cilium/pkg/k8s/resource" 24 slimlabels "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/labels" 25 slimmetav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1" 26 "github.com/cilium/cilium/pkg/logging" 27 "github.com/cilium/cilium/pkg/logging/logfields" 28 "github.com/cilium/cilium/pkg/option" 29 "github.com/cilium/cilium/pkg/time" 30 ) 31 32 var ( 33 log = logging.DefaultLogger.WithField(logfields.LogSubsys, "bgp-control-plane") 34 ) 35 36 var ( 37 // ErrMultiplePolicies is a static error typed when the controller encounters 38 // multiple policies which apply to its host. 39 ErrMultiplePolicies = fmt.Errorf("more then one CiliumBGPPeeringPolicy applies to this node, please ensure only a single Policy matches this node's labels") 40 41 // ErrBGPControlPlaneDisabled is set when the BGP control plane is disabled 42 ErrBGPControlPlaneDisabled = fmt.Errorf("BGP control plane is disabled") 43 ) 44 45 type policyLister interface { 46 List() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error) 47 } 48 49 type policyListerFunc func() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error) 50 51 func (plf policyListerFunc) List() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error) { 52 return plf() 53 } 54 55 // Controller is the agent side BGP Control Plane controller. 56 // 57 // Controller listens for events and drives BGP related sub-systems 58 // to maintain a desired state. 59 type Controller struct { 60 // CiliumNodeResource provides a stream of events for changes to the local CiliumNode resource. 61 CiliumNodeResource daemon_k8s.LocalCiliumNodeResource 62 // LocalCiliumNode is the CiliumNode object for the local node. 63 LocalCiliumNode *v2_api.CiliumNode 64 // PolicyResource provides a store of cached policies and allows us to observe changes to the objects in its 65 // store. 66 PolicyResource resource.Resource[*v2alpha1api.CiliumBGPPeeringPolicy] 67 // PolicyLister is an interface which allows for the listing of all known policies 68 PolicyLister policyLister 69 70 // BGP v2 node store 71 BGPNodeConfigStore store.BGPCPResourceStore[*v2alpha1api.CiliumBGPNodeConfig] 72 73 // Sig informs the Controller that a Kubernetes 74 // event of interest has occurred. 75 // 76 // The signal itself provides no other information, 77 // when it occurs the Controller will query each 78 // informer for the latest API information required 79 // to drive it's control loop. 80 Sig *signaler.BGPCPSignaler 81 // BGPMgr is an implementation of the BGPRouterManager interface 82 // and provides a declarative API for configuring BGP peers. 83 BGPMgr BGPRouterManager 84 85 // current configuration state 86 ConfigMode *mode.ConfigMode 87 } 88 89 // ControllerParams contains all parameters needed to construct a Controller 90 type ControllerParams struct { 91 cell.In 92 93 Lifecycle cell.Lifecycle 94 Health cell.Health 95 JobGroup job.Group 96 Shutdowner hive.Shutdowner 97 Sig *signaler.BGPCPSignaler 98 ConfigMode *mode.ConfigMode 99 RouteMgr BGPRouterManager 100 PolicyResource resource.Resource[*v2alpha1api.CiliumBGPPeeringPolicy] 101 BGPNodeConfigStore store.BGPCPResourceStore[*v2alpha1api.CiliumBGPNodeConfig] 102 DaemonConfig *option.DaemonConfig 103 LocalCiliumNodeResource daemon_k8s.LocalCiliumNodeResource 104 } 105 106 // NewController constructs a new BGP Control Plane Controller. 107 // 108 // When the constructor returns the Controller will be actively watching for 109 // events and configuring BGP related sub-systems. 110 // 111 // The constructor requires an implementation of BGPRouterManager to be provided. 112 // This implementation defines which BGP backend will be used (GoBGP, FRR, Bird, etc...) 113 // NOTE: only GoBGP currently implemented. 114 func NewController(params ControllerParams) (*Controller, error) { 115 // If the BGP control plane is disabled, just return nil. This way the hive dependency graph is always static 116 // regardless of config. The lifecycle has not been appended so no work will be done. 117 if !params.DaemonConfig.BGPControlPlaneEnabled() { 118 return nil, nil 119 } 120 121 c := &Controller{ 122 Sig: params.Sig, 123 ConfigMode: params.ConfigMode, 124 BGPMgr: params.RouteMgr, 125 PolicyResource: params.PolicyResource, 126 BGPNodeConfigStore: params.BGPNodeConfigStore, 127 CiliumNodeResource: params.LocalCiliumNodeResource, 128 } 129 130 params.JobGroup.Add( 131 job.OneShot("bgp-policy-observer", func(ctx context.Context, health cell.Health) (err error) { 132 for ev := range c.PolicyResource.Events(ctx) { 133 switch ev.Kind { 134 case resource.Upsert, resource.Delete: 135 // Signal the reconciliation logic. 136 c.Sig.Event(struct{}{}) 137 } 138 ev.Done(nil) 139 } 140 return nil 141 }), 142 143 job.OneShot("bgp-controller", 144 func(ctx context.Context, health cell.Health) (err error) { 145 // initialize PolicyLister used in the controller 146 policyStore, err := c.PolicyResource.Store(ctx) 147 if err != nil { 148 return fmt.Errorf("error creating CiliumBGPPeeringPolicy resource store: %w", err) 149 } 150 c.PolicyLister = policyListerFunc(func() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error) { 151 return policyStore.List(), nil 152 }) 153 154 // run the controller 155 c.Run(ctx) 156 return nil 157 }, 158 job.WithRetry(3, &job.ExponentialBackoff{Min: 100 * time.Millisecond, Max: time.Second}), 159 job.WithShutdown()), 160 ) 161 162 return c, nil 163 } 164 165 // Run places the Controller into its control loop. 166 // 167 // When new events trigger a signal the control loop will be evaluated. 168 // 169 // A cancel of the provided ctx will kill the control loop along with the running 170 // informers. 171 func (c *Controller) Run(ctx context.Context) { 172 var ( 173 l = log.WithFields(logrus.Fields{ 174 "component": "Controller.Run", 175 }) 176 ) 177 178 l.Info("Cilium BGP Control Plane Controller now running...") 179 ciliumNodeCh := c.CiliumNodeResource.Events(ctx) 180 for { 181 select { 182 case ev, ok := <-ciliumNodeCh: 183 if !ok { 184 l.Info("LocalCiliumNode resource channel closed, Cilium BGP Control Plane Controller shut down") 185 return 186 } 187 switch ev.Kind { 188 case resource.Upsert: 189 // Set the local CiliumNode. 190 c.LocalCiliumNode = ev.Object 191 // Signal the reconciliation logic. 192 c.Sig.Event(struct{}{}) 193 } 194 ev.Done(nil) 195 case <-ctx.Done(): 196 l.Info("Cilium BGP Control Plane Controller shut down") 197 return 198 case <-c.Sig.Sig: 199 if c.LocalCiliumNode == nil { 200 l.Debug("localCiliumNode has not been set yet") 201 } else if err := c.reconcileWithRetry(ctx); err != nil { 202 l.WithError(err).Error("Reconciliation with retries failed") 203 } else { 204 l.Debug("Successfully completed reconciliation") 205 } 206 } 207 } 208 } 209 210 // reconcileWithRetry runs Reconcile and retries if it fails until the iterations count defined in backoff is reached. 211 func (c *Controller) reconcileWithRetry(ctx context.Context) error { 212 // reconciliation will repeat for ~15 seconds 213 backoff := wait.Backoff{ 214 Duration: 500 * time.Millisecond, 215 Factor: 2, 216 Jitter: 0.5, 217 Steps: 5, 218 } 219 220 var err error 221 retryFn := func(ctx context.Context) (bool, error) { 222 err = c.Reconcile(ctx) 223 if err != nil { 224 log.WithError(err).Debug("Reconciliation failed") 225 return false, nil 226 } 227 return true, nil 228 } 229 230 if retryErr := wait.ExponentialBackoffWithContext(ctx, backoff, retryFn); retryErr != nil { 231 if wait.Interrupted(retryErr) && err != nil { 232 return err // return the actual reconciliation error 233 } 234 return retryErr 235 } 236 return nil 237 } 238 239 // Reconcile is the main reconciliation loop for the BGP Control Plane Controller. 240 // It is responsible for determining the current mode of BGP control plane, which can be disabled, bgpv1 or bgpv2. 241 // Based on presence of BGP peering policy and BGP node config, it will apply the appropriate configuration. 242 // Following is the state transition table for the controller: 243 // Initial state | BGPPP exists | BGPNC exists | Action | Next state 244 // ----------------------|--------------|--------------|----------------------------|----------- 245 // disabled | true | don't care | Apply BGPv1 | bgpv1 246 // disabled | false | true | Apply BGPv2 | bgpv2 247 // disabled | false | false | Do nothing | disabled 248 // bgpv1 | true | don't care | Apply BGPv1 | bgpv1 249 // bgpv1 | false | true | Delete BGPv1, Apply BGPv2 | bgpv2 250 // bgpv1 | false | false | Delete BGPv1 | disabled 251 // bgpv2 | true | don't care | Delete BGPv2, Apply BGPv1 | bgpv1 252 // bgpv2 | false | true | Apply BGPv2 | bgpv2 253 // bgpv2 | false | false | Delete BGPv2 | disabled 254 func (c *Controller) Reconcile(ctx context.Context) error { 255 bgpp, err := c.bgppSelection() 256 if err != nil { 257 log.WithError(err).Error("bgp peering policy selection failed") 258 return err 259 } 260 bgppExists := bgpp != nil 261 262 bgpnc, bgpncExists, err := c.BGPNodeConfigStore.GetByKey(resource.Key{ 263 Name: c.LocalCiliumNode.Name, 264 }) 265 if err != nil { 266 if errors.Is(err, store.ErrStoreUninitialized) { 267 log.Debug("BGPNodeConfig store not yet initialized") 268 return nil // skip the reconciliation - once the store is initialized, it will trigger new reconcile event 269 } 270 log.WithError(err).Error("failed to get BGPNodeConfig") 271 return err 272 } 273 274 switch c.ConfigMode.Get() { 275 case mode.Disabled: 276 if bgppExists { 277 err = c.reconcileBGPP(ctx, bgpp) 278 } else if bgpncExists { 279 err = c.reconcileBGPNC(ctx, bgpnc) 280 } 281 282 case mode.BGPv1: 283 if bgppExists { 284 err = c.reconcileBGPP(ctx, bgpp) 285 } else { 286 c.cleanupBGPP(ctx) 287 288 // check if we need to reconcile bgpv2 289 if bgpncExists { 290 err = c.reconcileBGPNC(ctx, bgpnc) 291 } 292 } 293 294 case mode.BGPv2: 295 if bgppExists { 296 // delete bgpv2 and apply bgpv1 297 c.cleanupBGPNC(ctx) 298 err = c.reconcileBGPP(ctx, bgpp) 299 } else if bgpncExists { 300 err = c.reconcileBGPNC(ctx, bgpnc) 301 } else { 302 c.cleanupBGPNC(ctx) 303 } 304 } 305 return err 306 } 307 308 func (c *Controller) reconcileBGPP(ctx context.Context, policy *v2alpha1api.CiliumBGPPeeringPolicy) error { 309 // apply policy defaults to have consistent default config across sub-systems 310 policy = policy.DeepCopy() // deepcopy to not modify the policy object in store 311 policy.SetDefaults() 312 313 err := c.validatePolicy(policy) 314 if err != nil { 315 return fmt.Errorf("invalid BGP peering policy %s: %w", policy.Name, err) 316 } 317 318 // call bgp sub-systems required to apply this policy's BGP topology. 319 if err := c.BGPMgr.ConfigurePeers(ctx, policy, c.LocalCiliumNode); err != nil { 320 return fmt.Errorf("failed to configure BGP peers, cannot apply BGP peering policy: %w", err) 321 } 322 323 c.ConfigMode.Set(mode.BGPv1) 324 return nil 325 } 326 327 func (c *Controller) cleanupBGPP(ctx context.Context) { 328 err := c.BGPMgr.ConfigurePeers(ctx, nil, nil) 329 if err != nil { 330 // log cleanup error 331 log.WithError(err).Error("failed to cleanup BGP peering policy peers") 332 } 333 334 c.ConfigMode.Set(mode.Disabled) 335 } 336 337 func (c *Controller) reconcileBGPNC(ctx context.Context, bgpnc *v2alpha1api.CiliumBGPNodeConfig) error { 338 err := c.BGPMgr.ReconcileInstances(ctx, bgpnc, c.LocalCiliumNode) 339 if err != nil { 340 return fmt.Errorf("failed to reconcile BGPNodeConfig: %w", err) 341 } 342 343 c.ConfigMode.Set(mode.BGPv2) 344 return nil 345 } 346 347 func (c *Controller) cleanupBGPNC(ctx context.Context) { 348 err := c.BGPMgr.ReconcileInstances(ctx, nil, c.LocalCiliumNode) 349 if err != nil { 350 log.WithError(err).Error("failed to cleanup BGPNodeConfig") 351 } 352 353 c.ConfigMode.Set(mode.Disabled) 354 } 355 356 func (c *Controller) bgppSelection() (*v2alpha1api.CiliumBGPPeeringPolicy, error) { 357 // retrieve all CiliumBGPPeeringPolicies 358 policies, err := c.PolicyLister.List() 359 if err != nil { 360 return nil, fmt.Errorf("failed to list CiliumBGPPeeringPolicies") 361 } 362 // perform policy selection based on node. 363 labels := c.LocalCiliumNode.Labels 364 365 return PolicySelection(labels, policies) 366 } 367 368 // PolicySelection returns a CiliumBGPPeeringPolicy which applies to the provided 369 // *corev1.Node, enforced by a set of policy selection rules. 370 // 371 // Policy selection follows the following rules: 372 // - A policy matches a node if said policy's "nodeSelector" field matches 373 // the node's labels. If "nodeSelector" is omitted, it is unconditionally 374 // selected. 375 // - If (N > 1) policies match the provided *corev1.Node an error is returned. 376 // only a single policy may apply to a node to avoid ambiguity at this stage 377 // of development. 378 func PolicySelection(labels map[string]string, policies []*v2alpha1api.CiliumBGPPeeringPolicy) (*v2alpha1api.CiliumBGPPeeringPolicy, error) { 379 var ( 380 l = log.WithFields(logrus.Fields{ 381 "component": "PolicySelection", 382 }) 383 384 // determine which policies match our node's labels. 385 selectedPolicy *v2alpha1api.CiliumBGPPeeringPolicy 386 slimLabels = slimlabels.Set(labels) 387 ) 388 389 // range over policies and see if any match this node's labels. 390 // 391 // for now, only a single BGP policy can be applied to a node. if more than 392 // one policy applies to a node, we disconnect from all BGP peers and log 393 // an error. 394 for _, policy := range policies { 395 var selected bool 396 397 l.WithFields(logrus.Fields{ 398 "policyName": policy.Name, 399 "nodeLabels": slimLabels, 400 "policyNodeSelector": policy.Spec.NodeSelector.String(), 401 }).Debug("Comparing BGP policy node selector with node's labels") 402 403 if policy.Spec.NodeSelector == nil { 404 selected = true 405 } else { 406 nodeSelector, err := slimmetav1.LabelSelectorAsSelector(policy.Spec.NodeSelector) 407 if err != nil { 408 l.WithError(err).Error("Failed to convert CiliumBGPPeeringPolicy's NodeSelector to a label.Selector interface") 409 continue 410 } 411 if nodeSelector.Matches(slimLabels) { 412 selected = true 413 } 414 } 415 416 if selected { 417 if selectedPolicy != nil { 418 return nil, ErrMultiplePolicies 419 } 420 selectedPolicy = policy 421 } 422 } 423 424 return selectedPolicy, nil 425 } 426 427 // validatePolicy validates the CiliumBGPPeeringPolicy. 428 // The validation is normally done by kube-apiserver (based on CRD validation markers), 429 // this validates only those constraints that cannot be enforced by them. 430 func (c *Controller) validatePolicy(policy *v2alpha1api.CiliumBGPPeeringPolicy) error { 431 for _, r := range policy.Spec.VirtualRouters { 432 for _, n := range r.Neighbors { 433 if err := n.Validate(); err != nil { 434 return err 435 } 436 } 437 } 438 return nil 439 }