
     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     4  package agent
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    11  	""
    12  	""
    13  	""
    14  	""
    16  	daemon_k8s ""
    17  	""
    18  	""
    19  	""
    20  	""
    21  	v2_api ""
    22  	v2alpha1api ""
    23  	""
    24  	slimlabels ""
    25  	slimmetav1 ""
    26  	""
    27  	""
    28  	""
    29  	""
    30  )
    32  var (
    33  	log = logging.DefaultLogger.WithField(logfields.LogSubsys, "bgp-control-plane")
    34  )
    36  var (
    37  	// ErrMultiplePolicies is a static error typed when the controller encounters
    38  	// multiple policies which apply to its host.
    39  	ErrMultiplePolicies = fmt.Errorf("more then one CiliumBGPPeeringPolicy applies to this node, please ensure only a single Policy matches this node's labels")
    41  	// ErrBGPControlPlaneDisabled is set when the BGP control plane is disabled
    42  	ErrBGPControlPlaneDisabled = fmt.Errorf("BGP control plane is disabled")
    43  )
    45  type policyLister interface {
    46  	List() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error)
    47  }
    49  type policyListerFunc func() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error)
    51  func (plf policyListerFunc) List() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error) {
    52  	return plf()
    53  }
    55  // Controller is the agent side BGP Control Plane controller.
    56  //
    57  // Controller listens for events and drives BGP related sub-systems
    58  // to maintain a desired state.
    59  type Controller struct {
    60  	// CiliumNodeResource provides a stream of events for changes to the local CiliumNode resource.
    61  	CiliumNodeResource daemon_k8s.LocalCiliumNodeResource
    62  	// LocalCiliumNode is the CiliumNode object for the local node.
    63  	LocalCiliumNode *v2_api.CiliumNode
    64  	// PolicyResource provides a store of cached policies and allows us to observe changes to the objects in its
    65  	// store.
    66  	PolicyResource resource.Resource[*v2alpha1api.CiliumBGPPeeringPolicy]
    67  	// PolicyLister is an interface which allows for the listing of all known policies
    68  	PolicyLister policyLister
    70  	// BGP v2 node store
    71  	BGPNodeConfigStore store.BGPCPResourceStore[*v2alpha1api.CiliumBGPNodeConfig]
    73  	// Sig informs the Controller that a Kubernetes
    74  	// event of interest has occurred.
    75  	//
    76  	// The signal itself provides no other information,
    77  	// when it occurs the Controller will query each
    78  	// informer for the latest API information required
    79  	// to drive it's control loop.
    80  	Sig *signaler.BGPCPSignaler
    81  	// BGPMgr is an implementation of the BGPRouterManager interface
    82  	// and provides a declarative API for configuring BGP peers.
    83  	BGPMgr BGPRouterManager
    85  	// current configuration state
    86  	ConfigMode *mode.ConfigMode
    87  }
    89  // ControllerParams contains all parameters needed to construct a Controller
    90  type ControllerParams struct {
    91  	cell.In
    93  	Lifecycle               cell.Lifecycle
    94  	Health                  cell.Health
    95  	JobGroup                job.Group
    96  	Shutdowner              hive.Shutdowner
    97  	Sig                     *signaler.BGPCPSignaler
    98  	ConfigMode              *mode.ConfigMode
    99  	RouteMgr                BGPRouterManager
   100  	PolicyResource          resource.Resource[*v2alpha1api.CiliumBGPPeeringPolicy]
   101  	BGPNodeConfigStore      store.BGPCPResourceStore[*v2alpha1api.CiliumBGPNodeConfig]
   102  	DaemonConfig            *option.DaemonConfig
   103  	LocalCiliumNodeResource daemon_k8s.LocalCiliumNodeResource
   104  }
   106  // NewController constructs a new BGP Control Plane Controller.
   107  //
   108  // When the constructor returns the Controller will be actively watching for
   109  // events and configuring BGP related sub-systems.
   110  //
   111  // The constructor requires an implementation of BGPRouterManager to be provided.
   112  // This implementation defines which BGP backend will be used (GoBGP, FRR, Bird, etc...)
   113  // NOTE: only GoBGP currently implemented.
   114  func NewController(params ControllerParams) (*Controller, error) {
   115  	// If the BGP control plane is disabled, just return nil. This way the hive dependency graph is always static
   116  	// regardless of config. The lifecycle has not been appended so no work will be done.
   117  	if !params.DaemonConfig.BGPControlPlaneEnabled() {
   118  		return nil, nil
   119  	}
   121  	c := &Controller{
   122  		Sig:                params.Sig,
   123  		ConfigMode:         params.ConfigMode,
   124  		BGPMgr:             params.RouteMgr,
   125  		PolicyResource:     params.PolicyResource,
   126  		BGPNodeConfigStore: params.BGPNodeConfigStore,
   127  		CiliumNodeResource: params.LocalCiliumNodeResource,
   128  	}
   130  	params.JobGroup.Add(
   131  		job.OneShot("bgp-policy-observer", func(ctx context.Context, health cell.Health) (err error) {
   132  			for ev := range c.PolicyResource.Events(ctx) {
   133  				switch ev.Kind {
   134  				case resource.Upsert, resource.Delete:
   135  					// Signal the reconciliation logic.
   136  					c.Sig.Event(struct{}{})
   137  				}
   138  				ev.Done(nil)
   139  			}
   140  			return nil
   141  		}),
   143  		job.OneShot("bgp-controller",
   144  			func(ctx context.Context, health cell.Health) (err error) {
   145  				// initialize PolicyLister used in the controller
   146  				policyStore, err := c.PolicyResource.Store(ctx)
   147  				if err != nil {
   148  					return fmt.Errorf("error creating CiliumBGPPeeringPolicy resource store: %w", err)
   149  				}
   150  				c.PolicyLister = policyListerFunc(func() ([]*v2alpha1api.CiliumBGPPeeringPolicy, error) {
   151  					return policyStore.List(), nil
   152  				})
   154  				// run the controller
   155  				c.Run(ctx)
   156  				return nil
   157  			},
   158  			job.WithRetry(3, &job.ExponentialBackoff{Min: 100 * time.Millisecond, Max: time.Second}),
   159  			job.WithShutdown()),
   160  	)
   162  	return c, nil
   163  }
   165  // Run places the Controller into its control loop.
   166  //
   167  // When new events trigger a signal the control loop will be evaluated.
   168  //
   169  // A cancel of the provided ctx will kill the control loop along with the running
   170  // informers.
   171  func (c *Controller) Run(ctx context.Context) {
   172  	var (
   173  		l = log.WithFields(logrus.Fields{
   174  			"component": "Controller.Run",
   175  		})
   176  	)
   178  	l.Info("Cilium BGP Control Plane Controller now running...")
   179  	ciliumNodeCh := c.CiliumNodeResource.Events(ctx)
   180  	for {
   181  		select {
   182  		case ev, ok := <-ciliumNodeCh:
   183  			if !ok {
   184  				l.Info("LocalCiliumNode resource channel closed, Cilium BGP Control Plane Controller shut down")
   185  				return
   186  			}
   187  			switch ev.Kind {
   188  			case resource.Upsert:
   189  				// Set the local CiliumNode.
   190  				c.LocalCiliumNode = ev.Object
   191  				// Signal the reconciliation logic.
   192  				c.Sig.Event(struct{}{})
   193  			}
   194  			ev.Done(nil)
   195  		case <-ctx.Done():
   196  			l.Info("Cilium BGP Control Plane Controller shut down")
   197  			return
   198  		case <-c.Sig.Sig:
   199  			if c.LocalCiliumNode == nil {
   200  				l.Debug("localCiliumNode has not been set yet")
   201  			} else if err := c.reconcileWithRetry(ctx); err != nil {
   202  				l.WithError(err).Error("Reconciliation with retries failed")
   203  			} else {
   204  				l.Debug("Successfully completed reconciliation")
   205  			}
   206  		}
   207  	}
   208  }
   210  // reconcileWithRetry runs Reconcile and retries if it fails until the iterations count defined in backoff is reached.
   211  func (c *Controller) reconcileWithRetry(ctx context.Context) error {
   212  	// reconciliation will repeat for ~15 seconds
   213  	backoff := wait.Backoff{
   214  		Duration: 500 * time.Millisecond,
   215  		Factor:   2,
   216  		Jitter:   0.5,
   217  		Steps:    5,
   218  	}
   220  	var err error
   221  	retryFn := func(ctx context.Context) (bool, error) {
   222  		err = c.Reconcile(ctx)
   223  		if err != nil {
   224  			log.WithError(err).Debug("Reconciliation failed")
   225  			return false, nil
   226  		}
   227  		return true, nil
   228  	}
   230  	if retryErr := wait.ExponentialBackoffWithContext(ctx, backoff, retryFn); retryErr != nil {
   231  		if wait.Interrupted(retryErr) && err != nil {
   232  			return err // return the actual reconciliation error
   233  		}
   234  		return retryErr
   235  	}
   236  	return nil
   237  }
   239  // Reconcile is the main reconciliation loop for the BGP Control Plane Controller.
   240  // It is responsible for determining the current mode of BGP control plane, which can be disabled, bgpv1 or bgpv2.
   241  // Based on presence of BGP peering policy and BGP node config, it will apply the appropriate configuration.
   242  // Following is the state transition table for the controller:
   243  // Initial state         | BGPPP exists | BGPNC exists | Action	                    | Next state
   244  // ----------------------|--------------|--------------|----------------------------|-----------
   245  // disabled              | true         | don't care   | Apply BGPv1                | bgpv1
   246  // disabled              | false        | true         | Apply BGPv2                | bgpv2
   247  // disabled              | false        | false        | Do nothing                 | disabled
   248  // bgpv1                 | true         | don't care   | Apply BGPv1                | bgpv1
   249  // bgpv1                 | false        | true         | Delete BGPv1, Apply BGPv2  | bgpv2
   250  // bgpv1                 | false        | false        | Delete BGPv1               | disabled
   251  // bgpv2                 | true         | don't care   | Delete BGPv2, Apply BGPv1  | bgpv1
   252  // bgpv2                 | false        | true         | Apply BGPv2                | bgpv2
   253  // bgpv2                 | false        | false        | Delete BGPv2               | disabled
   254  func (c *Controller) Reconcile(ctx context.Context) error {
   255  	bgpp, err := c.bgppSelection()
   256  	if err != nil {
   257  		log.WithError(err).Error("bgp peering policy selection failed")
   258  		return err
   259  	}
   260  	bgppExists := bgpp != nil
   262  	bgpnc, bgpncExists, err := c.BGPNodeConfigStore.GetByKey(resource.Key{
   263  		Name: c.LocalCiliumNode.Name,
   264  	})
   265  	if err != nil {
   266  		if errors.Is(err, store.ErrStoreUninitialized) {
   267  			log.Debug("BGPNodeConfig store not yet initialized")
   268  			return nil // skip the reconciliation - once the store is initialized, it will trigger new reconcile event
   269  		}
   270  		log.WithError(err).Error("failed to get BGPNodeConfig")
   271  		return err
   272  	}
   274  	switch c.ConfigMode.Get() {
   275  	case mode.Disabled:
   276  		if bgppExists {
   277  			err = c.reconcileBGPP(ctx, bgpp)
   278  		} else if bgpncExists {
   279  			err = c.reconcileBGPNC(ctx, bgpnc)
   280  		}
   282  	case mode.BGPv1:
   283  		if bgppExists {
   284  			err = c.reconcileBGPP(ctx, bgpp)
   285  		} else {
   286  			c.cleanupBGPP(ctx)
   288  			// check if we need to reconcile bgpv2
   289  			if bgpncExists {
   290  				err = c.reconcileBGPNC(ctx, bgpnc)
   291  			}
   292  		}
   294  	case mode.BGPv2:
   295  		if bgppExists {
   296  			// delete bgpv2 and apply bgpv1
   297  			c.cleanupBGPNC(ctx)
   298  			err = c.reconcileBGPP(ctx, bgpp)
   299  		} else if bgpncExists {
   300  			err = c.reconcileBGPNC(ctx, bgpnc)
   301  		} else {
   302  			c.cleanupBGPNC(ctx)
   303  		}
   304  	}
   305  	return err
   306  }
   308  func (c *Controller) reconcileBGPP(ctx context.Context, policy *v2alpha1api.CiliumBGPPeeringPolicy) error {
   309  	// apply policy defaults to have consistent default config across sub-systems
   310  	policy = policy.DeepCopy() // deepcopy to not modify the policy object in store
   311  	policy.SetDefaults()
   313  	err := c.validatePolicy(policy)
   314  	if err != nil {
   315  		return fmt.Errorf("invalid BGP peering policy %s: %w", policy.Name, err)
   316  	}
   318  	// call bgp sub-systems required to apply this policy's BGP topology.
   319  	if err := c.BGPMgr.ConfigurePeers(ctx, policy, c.LocalCiliumNode); err != nil {
   320  		return fmt.Errorf("failed to configure BGP peers, cannot apply BGP peering policy: %w", err)
   321  	}
   323  	c.ConfigMode.Set(mode.BGPv1)
   324  	return nil
   325  }
   327  func (c *Controller) cleanupBGPP(ctx context.Context) {
   328  	err := c.BGPMgr.ConfigurePeers(ctx, nil, nil)
   329  	if err != nil {
   330  		// log cleanup error
   331  		log.WithError(err).Error("failed to cleanup BGP peering policy peers")
   332  	}
   334  	c.ConfigMode.Set(mode.Disabled)
   335  }
   337  func (c *Controller) reconcileBGPNC(ctx context.Context, bgpnc *v2alpha1api.CiliumBGPNodeConfig) error {
   338  	err := c.BGPMgr.ReconcileInstances(ctx, bgpnc, c.LocalCiliumNode)
   339  	if err != nil {
   340  		return fmt.Errorf("failed to reconcile BGPNodeConfig: %w", err)
   341  	}
   343  	c.ConfigMode.Set(mode.BGPv2)
   344  	return nil
   345  }
   347  func (c *Controller) cleanupBGPNC(ctx context.Context) {
   348  	err := c.BGPMgr.ReconcileInstances(ctx, nil, c.LocalCiliumNode)
   349  	if err != nil {
   350  		log.WithError(err).Error("failed to cleanup BGPNodeConfig")
   351  	}
   353  	c.ConfigMode.Set(mode.Disabled)
   354  }
   356  func (c *Controller) bgppSelection() (*v2alpha1api.CiliumBGPPeeringPolicy, error) {
   357  	// retrieve all CiliumBGPPeeringPolicies
   358  	policies, err := c.PolicyLister.List()
   359  	if err != nil {
   360  		return nil, fmt.Errorf("failed to list CiliumBGPPeeringPolicies")
   361  	}
   362  	// perform policy selection based on node.
   363  	labels := c.LocalCiliumNode.Labels
   365  	return PolicySelection(labels, policies)
   366  }
   368  // PolicySelection returns a CiliumBGPPeeringPolicy which applies to the provided
   369  // *corev1.Node, enforced by a set of policy selection rules.
   370  //
   371  // Policy selection follows the following rules:
   372  //   - A policy matches a node if said policy's "nodeSelector" field matches
   373  //     the node's labels. If "nodeSelector" is omitted, it is unconditionally
   374  //     selected.
   375  //   - If (N > 1) policies match the provided *corev1.Node an error is returned.
   376  //     only a single policy may apply to a node to avoid ambiguity at this stage
   377  //     of development.
   378  func PolicySelection(labels map[string]string, policies []*v2alpha1api.CiliumBGPPeeringPolicy) (*v2alpha1api.CiliumBGPPeeringPolicy, error) {
   379  	var (
   380  		l = log.WithFields(logrus.Fields{
   381  			"component": "PolicySelection",
   382  		})
   384  		// determine which policies match our node's labels.
   385  		selectedPolicy *v2alpha1api.CiliumBGPPeeringPolicy
   386  		slimLabels     = slimlabels.Set(labels)
   387  	)
   389  	// range over policies and see if any match this node's labels.
   390  	//
   391  	// for now, only a single BGP policy can be applied to a node. if more than
   392  	// one policy applies to a node, we disconnect from all BGP peers and log
   393  	// an error.
   394  	for _, policy := range policies {
   395  		var selected bool
   397  		l.WithFields(logrus.Fields{
   398  			"policyName":         policy.Name,
   399  			"nodeLabels":         slimLabels,
   400  			"policyNodeSelector": policy.Spec.NodeSelector.String(),
   401  		}).Debug("Comparing BGP policy node selector with node's labels")
   403  		if policy.Spec.NodeSelector == nil {
   404  			selected = true
   405  		} else {
   406  			nodeSelector, err := slimmetav1.LabelSelectorAsSelector(policy.Spec.NodeSelector)
   407  			if err != nil {
   408  				l.WithError(err).Error("Failed to convert CiliumBGPPeeringPolicy's NodeSelector to a label.Selector interface")
   409  				continue
   410  			}
   411  			if nodeSelector.Matches(slimLabels) {
   412  				selected = true
   413  			}
   414  		}
   416  		if selected {
   417  			if selectedPolicy != nil {
   418  				return nil, ErrMultiplePolicies
   419  			}
   420  			selectedPolicy = policy
   421  		}
   422  	}
   424  	return selectedPolicy, nil
   425  }
   427  // validatePolicy validates the CiliumBGPPeeringPolicy.
   428  // The validation is normally done by kube-apiserver (based on CRD validation markers),
   429  // this validates only those constraints that cannot be enforced by them.
   430  func (c *Controller) validatePolicy(policy *v2alpha1api.CiliumBGPPeeringPolicy) error {
   431  	for _, r := range policy.Spec.VirtualRouters {
   432  		for _, n := range r.Neighbors {
   433  			if err := n.Validate(); err != nil {
   434  				return err
   435  			}
   436  		}
   437  	}
   438  	return nil
   439  }