github.com/cilium/cilium@v1.16.2/pkg/bgpv1/manager/manager.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package manager
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  
    11  	"github.com/cilium/hive/cell"
    12  	"github.com/cilium/hive/job"
    13  	"github.com/sirupsen/logrus"
    14  	"k8s.io/apimachinery/pkg/util/sets"
    15  	"k8s.io/apimachinery/pkg/util/wait"
    16  
    17  	"github.com/cilium/cilium/api/v1/models"
    18  	restapi "github.com/cilium/cilium/api/v1/server/restapi/bgp"
    19  	"github.com/cilium/cilium/pkg/bgpv1/agent"
    20  	"github.com/cilium/cilium/pkg/bgpv1/agent/mode"
    21  	"github.com/cilium/cilium/pkg/bgpv1/api"
    22  	"github.com/cilium/cilium/pkg/bgpv1/manager/instance"
    23  	"github.com/cilium/cilium/pkg/bgpv1/manager/reconciler"
    24  	"github.com/cilium/cilium/pkg/bgpv1/manager/reconcilerv2"
    25  	"github.com/cilium/cilium/pkg/bgpv1/types"
    26  	v2api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    27  	v2alpha1api "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1"
    28  	"github.com/cilium/cilium/pkg/lock"
    29  	"github.com/cilium/cilium/pkg/logging"
    30  	"github.com/cilium/cilium/pkg/logging/logfields"
    31  	"github.com/cilium/cilium/pkg/time"
    32  )
    33  
    34  var (
    35  	// ATTENTION:
    36  	// All logs generated from this package will have the k/v
    37  	// `subsys=bgp-control-plane`.
    38  	//
    39  	// Each log message will additionally contain the k/v
    40  	// 'component=manager.{Struct}.{Method}' or 'component=manager.{Function}' to
    41  	// provide further granularity on where the log is originating from.
    42  	log = logging.DefaultLogger.WithField(logfields.LogSubsys, "bgp-control-plane")
    43  )
    44  
    45  // LocalASNMap maps local ASNs to their associated BgpServers and server
    46  // configuration info.
    47  type LocalASNMap map[int64]*instance.ServerWithConfig
    48  
    49  // LocalInstanceMap maps instance names to their associated BgpInstances and
    50  // configuration info.
    51  type LocalInstanceMap map[string]*instance.BGPInstance
    52  
    53  type bgpRouterManagerParams struct {
    54  	cell.In
    55  	Logger           logrus.FieldLogger
    56  	JobGroup         job.Group
    57  	ConfigMode       *mode.ConfigMode
    58  	Reconcilers      []reconciler.ConfigReconciler   `group:"bgp-config-reconciler"`
    59  	ReconcilersV2    []reconcilerv2.ConfigReconciler `group:"bgp-config-reconciler-v2"`
    60  	StateReconcilers []reconcilerv2.StateReconciler  `group:"bgp-state-reconciler-v2"`
    61  }
    62  
    63  type State struct {
    64  	// reconcilers are list of state reconcilers which will be called when instance state changes.
    65  	reconcilers []reconcilerv2.StateReconciler
    66  
    67  	// notifications is a map of instance name to the channel which will be used to get notification
    68  	// from underlying BGP instance. This map is used for bookkeeping and closing of channel when
    69  	// instance is deleted.
    70  	notifications map[string]types.StateNotificationCh
    71  
    72  	// pendingInstancesMutex is used to protect the pendingInstances set.
    73  	//
    74  	// pendingInstancesMutex in BGPRouterManager is introduced as we can have high number of
    75  	// state notifications. We do not want to hold the BGPRouterManager.Lock for each
    76  	// state update.
    77  	//
    78  	// Order of locking: pendingInstancesMutex -> BGPRouterManager.Lock
    79  	// DO NOT take BGPRouterManager.Lock and then State.pendingInstancesMutex.
    80  	pendingInstancesMutex lock.Mutex
    81  
    82  	// pendingInstances set contains the instances which need to be reconciled for state change.
    83  	pendingInstances sets.Set[string]
    84  
    85  	// reconcileSignal is used to signal bgp-state-observer to reconcile the state based on
    86  	// pendingInstances set.
    87  	reconcileSignal chan struct{}
    88  
    89  	// instanceDeletionSignal is used to signal bgp-state-observer to reconcile the cleanup of
    90  	// instance. Instance name is signaled on this channel.
    91  	instanceDeletionSignal chan string
    92  }
    93  
    94  // BGPRouterManager implements the pkg.bgpv1.agent.BGPRouterManager interface.
    95  //
    96  // Logically, this manager views each CiliumBGPVirtualRouter within a
    97  // CiliumBGPPeeringPolicy as a BGP router instantiated on its host.
    98  //
    99  // BGP routers are grouped and accessed by their local ASNs, thus this backend
   100  // mandates that each CiliumBGPPeeringConfig have a unique local ASN and
   101  // precludes a single host instantiating two routers with the same local ASN.
   102  //
   103  // This manager employs two main data structures to implement its high level
   104  // business logic.
   105  //
   106  // A reconcilerDiff is used to establish which BgpServers must be created,
   107  // and removed from the Manager along with which servers must have their
   108  // configurations reconciled.
   109  //
   110  // A set of ReconcilerConfigFunc(s), which usages are wrapped by the
   111  // ReconcileBGPConfig function, reconcile individual features of a
   112  // CiliumBGPPeeringConfig.
   113  //
   114  // Together, the high-level flow the manager takes is:
   115  //   - Instantiate a reconcilerDiff to compute which BgpServers to create, remove,
   116  //     and reconcile
   117  //   - Create any BgpServers necessary, run ReconcilerConfigFuncs(s) on each
   118  //   - Run each ReconcilerConfigFunc, by way of ReconcileBGPConfig,
   119  //     on any BgpServers marked for reconcile
   120  //
   121  // BgpServers are abstracted by the ServerWithConfig structure which provides a
   122  // method set for low-level BGP operations.
   123  //
   124  // As part of BGPv2 development, this manager has been extended to support BGPv2
   125  // fields - BGPInstance and ReconcilersV2.
   126  type BGPRouterManager struct {
   127  	lock.RWMutex
   128  
   129  	Logger logrus.FieldLogger
   130  
   131  	// Helper to determine the mode of the agent
   132  	ConfigMode *mode.ConfigMode
   133  
   134  	// BGPv1 servers and reconcilers
   135  	Servers     LocalASNMap
   136  	Reconcilers []reconciler.ConfigReconciler
   137  
   138  	// BGPv2 instances and reconcilers
   139  	BGPInstances      LocalInstanceMap
   140  	ConfigReconcilers []reconcilerv2.ConfigReconciler
   141  
   142  	// running is set when the manager is running, and unset when it is stopped.
   143  	running bool
   144  
   145  	// state management
   146  	state State
   147  }
   148  
   149  // NewBGPRouterManager constructs a GoBGP-backed BGPRouterManager.
   150  //
   151  // See BGPRouterManager for details.
   152  func NewBGPRouterManager(params bgpRouterManagerParams) agent.BGPRouterManager {
   153  	activeReconcilers := reconciler.GetActiveReconcilers(params.Reconcilers)
   154  	activeReconcilersV2 := reconcilerv2.GetActiveReconcilers(params.Logger, params.ReconcilersV2)
   155  
   156  	m := &BGPRouterManager{
   157  		Logger:      params.Logger,
   158  		ConfigMode:  params.ConfigMode,
   159  		Servers:     make(LocalASNMap),
   160  		Reconcilers: activeReconcilers,
   161  		running:     true, // start with running state set
   162  
   163  		// BGPv2
   164  		BGPInstances:      make(LocalInstanceMap),
   165  		ConfigReconcilers: activeReconcilersV2,
   166  
   167  		// state
   168  		state: State{
   169  			reconcilers:            reconcilerv2.GetActiveStateReconcilers(params.Logger, params.StateReconcilers),
   170  			notifications:          make(map[string]types.StateNotificationCh),
   171  			pendingInstances:       sets.New[string](),
   172  			reconcileSignal:        make(chan struct{}, 1),
   173  			instanceDeletionSignal: make(chan string),
   174  		},
   175  	}
   176  
   177  	params.JobGroup.Add(
   178  		job.OneShot("bgp-state-observer", func(ctx context.Context, health cell.Health) (err error) {
   179  			for {
   180  				select {
   181  				case <-ctx.Done():
   182  					return nil
   183  				case <-m.state.reconcileSignal:
   184  					err := m.reconcileStateWithRetry(ctx)
   185  					if err != nil {
   186  						m.Logger.WithError(err).Error("failed to reconcile state")
   187  					}
   188  				case instanceName := <-m.state.instanceDeletionSignal:
   189  					m.reconcileInstanceDeletion(ctx, instanceName)
   190  				}
   191  			}
   192  		}),
   193  	)
   194  
   195  	return m
   196  }
   197  
   198  func (m *BGPRouterManager) reconcileStateWithRetry(ctx context.Context) error {
   199  	bo := wait.Backoff{
   200  		Duration: 100 * time.Millisecond,
   201  		Factor:   1.2,
   202  		Steps:    10,
   203  	}
   204  
   205  	retryFn := func(ctx context.Context) (bool, error) {
   206  		err := m.reconcileState(ctx)
   207  		if err != nil {
   208  			m.Logger.WithError(err).Error("failed to reconcile state")
   209  			return false, nil
   210  		}
   211  		return true, nil
   212  	}
   213  
   214  	return wait.ExponentialBackoffWithContext(ctx, bo, retryFn)
   215  }
   216  
   217  // ConfigurePeers is a declarative API for configuring the BGP peering topology
   218  // given a desired CiliumBGPPeeringPolicy.
   219  //
   220  // ConfigurePeers will evaluate BGPRouterManager's current state and the desired
   221  // CiliumBGPPeeringPolicy policy then take the necessary actions to apply the
   222  // provided policy. For more details see BGPRouterManager's comments.
   223  //
   224  // ConfigurePeers should return only once a subsequent invocation is safe.
   225  // This method is not thread safe and does not intend to be called concurrently.
   226  func (m *BGPRouterManager) ConfigurePeers(ctx context.Context,
   227  	policy *v2alpha1api.CiliumBGPPeeringPolicy,
   228  	ciliumNode *v2api.CiliumNode) error {
   229  	m.Lock()
   230  	defer m.Unlock()
   231  
   232  	if !m.running {
   233  		return fmt.Errorf("bgp router manager is not running")
   234  	}
   235  
   236  	l := log.WithFields(
   237  		logrus.Fields{
   238  			"component": "manager.ConfigurePeers",
   239  		},
   240  	)
   241  
   242  	// use a reconcileDiff to compute which BgpServers must be created, removed
   243  	// and reconciled.
   244  	rd := newReconcileDiff(ciliumNode)
   245  
   246  	if policy == nil {
   247  		return m.withdrawAll(ctx, rd)
   248  	}
   249  
   250  	rd.diff(m.Servers, policy)
   251  
   252  	if rd.empty() {
   253  		l.Debug("GoBGP peering topology up-to-date with CiliumBGPPeeringPolicy for this node.")
   254  		return nil
   255  	}
   256  	l.WithField("diff", rd.String()).Debug("Reconciling new CiliumBGPPeeringPolicy")
   257  
   258  	if len(rd.register) > 0 {
   259  		if err := m.register(ctx, rd); err != nil {
   260  			return fmt.Errorf("encountered error adding new BGP Servers: %w", err)
   261  		}
   262  	}
   263  	if len(rd.withdraw) > 0 {
   264  		if err := m.withdraw(ctx, rd); err != nil {
   265  			return fmt.Errorf("encountered error removing existing BGP Servers: %w", err)
   266  		}
   267  	}
   268  	if len(rd.reconcile) > 0 {
   269  		if err := m.reconcile(ctx, rd); err != nil {
   270  			return fmt.Errorf("encountered error reconciling existing BGP Servers: %w", err)
   271  		}
   272  	}
   273  	return nil
   274  }
   275  
   276  // register instantiates and configures BgpServer(s) as instructed by the provided
   277  // work diff.
   278  func (m *BGPRouterManager) register(ctx context.Context, rd *reconcileDiff) error {
   279  	l := log.WithFields(
   280  		logrus.Fields{
   281  			"component": "manager.add",
   282  		},
   283  	)
   284  	for _, asn := range rd.register {
   285  		var config *v2alpha1api.CiliumBGPVirtualRouter
   286  		var ok bool
   287  		if config, ok = rd.seen[asn]; !ok {
   288  			l.Errorf("Work diff (add) contains unseen ASN %v, skipping", asn)
   289  			continue
   290  		}
   291  		if err := m.registerBGPServer(ctx, config, rd.ciliumNode); err != nil {
   292  			// we'll just log the error and attempt to register the next BgpServer.
   293  			l.WithError(err).Errorf("Error while registering new BGP server for local ASN %v.", config.LocalASN)
   294  		}
   295  	}
   296  	return nil
   297  }
   298  
   299  // registerBGPServer encapsulates the logic for instantiating a
   300  // BgpServer, configuring it based on a CiliumBGPVirtualRouter, and
   301  // registering it with the Manager.
   302  //
   303  // If this registration process fails the server will be stopped (if it was started)
   304  // and deleted from our manager (if it was added).
   305  func (m *BGPRouterManager) registerBGPServer(ctx context.Context,
   306  	c *v2alpha1api.CiliumBGPVirtualRouter,
   307  	ciliumNode *v2api.CiliumNode) error {
   308  	l := log.WithFields(
   309  		logrus.Fields{
   310  			"component": "manager.registerBGPServer",
   311  		},
   312  	)
   313  
   314  	l.Infof("Registering BGP servers for policy with local ASN %v", c.LocalASN)
   315  
   316  	annoMap, err := agent.NewAnnotationMap(ciliumNode.Annotations)
   317  	if err != nil {
   318  		return fmt.Errorf("unable to parse local node's annotations: %w", err)
   319  	}
   320  
   321  	// resolve local port from kubernetes annotations
   322  	var localPort int32
   323  	localPort = -1
   324  	if attrs, ok := annoMap[c.LocalASN]; ok {
   325  		if attrs.LocalPort != 0 {
   326  			localPort = int32(attrs.LocalPort)
   327  		}
   328  	}
   329  
   330  	routerID, err := annoMap.ResolveRouterID(c.LocalASN)
   331  	if err != nil {
   332  		if nodeIP := ciliumNode.GetIP(false); nodeIP == nil {
   333  			return fmt.Errorf("failed to get ciliumnode IP %v: %w", nodeIP, err)
   334  		} else {
   335  			routerID = nodeIP.String()
   336  		}
   337  	}
   338  
   339  	globalConfig := types.ServerParameters{
   340  		Global: types.BGPGlobal{
   341  			ASN:        uint32(c.LocalASN),
   342  			RouterID:   routerID,
   343  			ListenPort: localPort,
   344  			RouteSelectionOptions: &types.RouteSelectionOptions{
   345  				AdvertiseInactiveRoutes: true,
   346  			},
   347  		},
   348  	}
   349  
   350  	s, err := instance.NewServerWithConfig(ctx, log, globalConfig)
   351  	if err != nil {
   352  		return fmt.Errorf("failed to start BGP server for config with local ASN %v: %w", c.LocalASN, err)
   353  	}
   354  
   355  	// We can commit the register the server here. Even if the following
   356  	// reconciliation fails, we can return error and it triggers retry. The
   357  	// next retry will be handled by reconcile(). We don't need to retry
   358  	// the server creation which already succeeded.
   359  	m.Servers[c.LocalASN] = s
   360  
   361  	// initialize the reconcilers for this instance
   362  	for _, r := range m.Reconcilers {
   363  		err = r.Init(s)
   364  		if err != nil {
   365  			return fmt.Errorf("%s reconciler initialization failed: %w", r.Name(), err)
   366  		}
   367  	}
   368  
   369  	if err = m.reconcileBGPConfig(ctx, s, c, ciliumNode); err != nil {
   370  		return fmt.Errorf("failed initial reconciliation for peer config with local ASN %v: %w", c.LocalASN, err)
   371  	}
   372  
   373  	l.Infof("Successfully registered GoBGP servers for policy with local ASN %v", c.LocalASN)
   374  
   375  	return err
   376  }
   377  
   378  // withdraw disconnects and removes BgpServer(s) as instructed by the provided
   379  // work diff.
   380  func (m *BGPRouterManager) withdraw(ctx context.Context, rd *reconcileDiff) error {
   381  	l := log.WithFields(
   382  		logrus.Fields{
   383  			"component": "manager.remove",
   384  		},
   385  	)
   386  	for _, asn := range rd.withdraw {
   387  		var (
   388  			s  *instance.ServerWithConfig
   389  			ok bool
   390  		)
   391  		if s, ok = m.Servers[asn]; !ok {
   392  			l.Warnf("Server with local ASN %v marked for deletion but does not exist", asn)
   393  			continue
   394  		}
   395  		for _, r := range m.Reconcilers {
   396  			r.Cleanup(s)
   397  		}
   398  		s.Server.Stop()
   399  		delete(m.Servers, asn)
   400  		l.Infof("Removed BGP server with local ASN %v", asn)
   401  	}
   402  	return nil
   403  }
   404  
   405  // withdrawAll will disconnect and remove all currently registered BgpServer(s).
   406  //
   407  // `rd` must be a newly created reconcileDiff which has not had its `Diff` method
   408  // called.
   409  func (m *BGPRouterManager) withdrawAll(ctx context.Context, rd *reconcileDiff) error {
   410  	if len(m.Servers) == 0 {
   411  		return nil
   412  	}
   413  	for asn := range m.Servers {
   414  		rd.withdraw = append(rd.withdraw, asn)
   415  	}
   416  	return m.withdraw(ctx, rd)
   417  }
   418  
   419  // reconcile evaluates existing BgpServer(s), making changes if necessary, as
   420  // instructed by the provided reoncileDiff.
   421  func (m *BGPRouterManager) reconcile(ctx context.Context, rd *reconcileDiff) error {
   422  	l := log.WithFields(
   423  		logrus.Fields{
   424  			"component": "manager.reconcile",
   425  		},
   426  	)
   427  	for _, asn := range rd.reconcile {
   428  		var (
   429  			sc   = m.Servers[asn]
   430  			newc = rd.seen[asn]
   431  		)
   432  		if sc == nil {
   433  			l.Errorf("Virtual router with local ASN %v marked for reconciliation but missing from Manager", newc.LocalASN) // really shouldn't happen
   434  			continue
   435  		}
   436  		if newc == nil {
   437  			l.Errorf("Virtual router with local ASN %v marked for reconciliation but missing from incoming configurations", sc.Config.LocalASN) // also really shouldn't happen
   438  			continue
   439  		}
   440  		if err := m.reconcileBGPConfig(ctx, sc, newc, rd.ciliumNode); err != nil {
   441  			l.WithError(err).Errorf("Encountered error reconciling virtual router with local ASN %v", newc.LocalASN)
   442  		}
   443  	}
   444  	return nil
   445  }
   446  
   447  // reconcileBGPConfig will utilize the current set of ConfigReconciler(s)
   448  // to push a BgpServer to its desired configuration.
   449  //
   450  // If any ConfigReconciler fails so will ReconcileBGPConfig and the caller
   451  // is left to decide how to handle the possible inconsistent state of the
   452  // BgpServer left over.
   453  //
   454  // Providing a ServerWithConfig that has a nil `Config` field indicates that
   455  // this is the first time this BgpServer is being configured, each
   456  // ConfigReconciler must be prepared to handle this.
   457  //
   458  // The two CiliumBGPVirtualRouter(s) being compared must have the same local
   459  // ASN, unless `sc.Config` is nil, or else an error is returned.
   460  //
   461  // On success the provided `newc` will be written to `sc.Config`. The caller
   462  // should then store `sc` until next reconciliation.
   463  func (m *BGPRouterManager) reconcileBGPConfig(ctx context.Context,
   464  	sc *instance.ServerWithConfig,
   465  	newc *v2alpha1api.CiliumBGPVirtualRouter,
   466  	ciliumNode *v2api.CiliumNode) error {
   467  	if sc.Config != nil {
   468  		if sc.Config.LocalASN != newc.LocalASN {
   469  			return fmt.Errorf("cannot reconcile two BgpServers with different local ASNs")
   470  		}
   471  	}
   472  	for _, r := range m.Reconcilers {
   473  		if err := r.Reconcile(ctx, reconciler.ReconcileParams{
   474  			CurrentServer: sc,
   475  			DesiredConfig: newc,
   476  			CiliumNode:    ciliumNode,
   477  		}); err != nil {
   478  			return fmt.Errorf("reconciliation of virtual router with local ASN %v failed: %w", newc.LocalASN, err)
   479  		}
   480  	}
   481  	// all reconcilers succeeded so update Server's config with new peering config.
   482  	sc.Config = newc
   483  	return nil
   484  }
   485  
   486  // GetPeers gets peering state from previously initialized bgp instances.
   487  func (m *BGPRouterManager) GetPeers(ctx context.Context) ([]*models.BgpPeer, error) {
   488  	m.RLock()
   489  	defer m.RUnlock()
   490  
   491  	if !m.running {
   492  		return nil, fmt.Errorf("bgp router manager is not running")
   493  	}
   494  
   495  	var res []*models.BgpPeer
   496  	switch m.ConfigMode.Get() {
   497  	case mode.BGPv1:
   498  		for _, s := range m.Servers {
   499  			getPeerResp, err := s.Server.GetPeerState(ctx)
   500  			if err != nil {
   501  				return nil, err
   502  			}
   503  			res = append(res, getPeerResp.Peers...)
   504  		}
   505  
   506  	case mode.BGPv2:
   507  		for _, i := range m.BGPInstances {
   508  			getPeerResp, err := i.Router.GetPeerState(ctx)
   509  			if err != nil {
   510  				return nil, err
   511  			}
   512  			res = append(res, getPeerResp.Peers...)
   513  		}
   514  	}
   515  	return res, nil
   516  }
   517  
   518  // GetRoutes retrieves routes from the RIB of underlying router
   519  func (m *BGPRouterManager) GetRoutes(ctx context.Context, params restapi.GetBgpRoutesParams) ([]*models.BgpRoute, error) {
   520  	m.RLock()
   521  	defer m.RUnlock()
   522  
   523  	if !m.running {
   524  		return nil, fmt.Errorf("bgp router manager is not running")
   525  	}
   526  
   527  	switch m.ConfigMode.Get() {
   528  	case mode.BGPv1:
   529  		return m.getRoutesV1(ctx, params)
   530  	case mode.BGPv2:
   531  		return m.getRoutesV2(ctx, params)
   532  	default:
   533  		return nil, nil
   534  	}
   535  }
   536  
   537  func (m *BGPRouterManager) getRoutesV1(ctx context.Context, params restapi.GetBgpRoutesParams) ([]*models.BgpRoute, error) {
   538  	// validate router ASN
   539  	if params.RouterAsn != nil {
   540  		if _, found := m.Servers[*params.RouterAsn]; !found {
   541  			return nil, fmt.Errorf("virtual router with ASN %d does not exist", *params.RouterAsn)
   542  		}
   543  	}
   544  
   545  	// validate that router ASN is set for the neighbor if there are multiple servers
   546  	if params.Neighbor != nil && len(m.Servers) > 1 && params.RouterAsn == nil {
   547  		return nil, fmt.Errorf("multiple virtual routers configured, router ASN must be specified")
   548  	}
   549  
   550  	// determine if we need to retrieve the routes for each peer (in case of adj-rib but no peer specified)
   551  	tt := types.ParseTableType(params.TableType)
   552  	allPeers := (tt == types.TableTypeAdjRIBIn || tt == types.TableTypeAdjRIBOut) && (params.Neighbor == nil || *params.Neighbor == "")
   553  
   554  	var res []*models.BgpRoute
   555  	for _, s := range m.Servers {
   556  		if params.RouterAsn != nil && *params.RouterAsn != s.Config.LocalASN {
   557  			continue // return routes matching provided router ASN only
   558  		}
   559  		if allPeers {
   560  			// get routes for each peer of the server
   561  			getPeerResp, err := s.Server.GetPeerState(ctx)
   562  			if err != nil {
   563  				return nil, err
   564  			}
   565  			for _, peer := range getPeerResp.Peers {
   566  				params.Neighbor = &peer.PeerAddress
   567  				routes, err := m.getRoutesFromServer(ctx, s, params)
   568  				if err != nil {
   569  					return nil, err
   570  				}
   571  				res = append(res, routes...)
   572  			}
   573  		} else {
   574  			// get routes with provided params
   575  			routes, err := m.getRoutesFromServer(ctx, s, params)
   576  			if err != nil {
   577  				return nil, err
   578  			}
   579  			res = append(res, routes...)
   580  		}
   581  	}
   582  
   583  	return res, nil
   584  }
   585  
   586  // getRoutesFromServer retrieves routes from the RIB of the specified server
   587  func (m *BGPRouterManager) getRoutesFromServer(ctx context.Context, sc *instance.ServerWithConfig, params restapi.GetBgpRoutesParams) ([]*models.BgpRoute, error) {
   588  	req, err := api.ToAgentGetRoutesRequest(params)
   589  	if err != nil {
   590  		return nil, err
   591  	}
   592  	rs, err := sc.Server.GetRoutes(ctx, req)
   593  	if err != nil {
   594  		return nil, err
   595  	}
   596  	neighbor := ""
   597  	if params.Neighbor != nil {
   598  		neighbor = *params.Neighbor
   599  	}
   600  	return api.ToAPIRoutes(rs.Routes, sc.Config.LocalASN, neighbor)
   601  }
   602  
   603  func (m *BGPRouterManager) getRoutesV2(ctx context.Context, params restapi.GetBgpRoutesParams) ([]*models.BgpRoute, error) {
   604  	// validate router ASN
   605  	if params.RouterAsn != nil {
   606  		if !m.asnExistsInInstances(*params.RouterAsn) {
   607  			return nil, fmt.Errorf("virtual router with ASN %d does not exist", *params.RouterAsn)
   608  		}
   609  	}
   610  
   611  	// validate that router ASN is set for the neighbor if there are multiple servers
   612  	if params.Neighbor != nil && len(m.BGPInstances) > 1 && params.RouterAsn == nil {
   613  		return nil, fmt.Errorf("multiple virtual routers configured, router ASN must be specified")
   614  	}
   615  
   616  	// determine if we need to retrieve the routes for each peer (in case of adj-rib but no peer specified)
   617  	tt := types.ParseTableType(params.TableType)
   618  	allPeers := (tt == types.TableTypeAdjRIBIn || tt == types.TableTypeAdjRIBOut) && (params.Neighbor == nil || *params.Neighbor == "")
   619  
   620  	var res []*models.BgpRoute
   621  	for _, i := range m.BGPInstances {
   622  		if params.RouterAsn != nil && i.Config.LocalASN != nil && *params.RouterAsn != *i.Config.LocalASN {
   623  			continue // return routes matching provided router ASN only
   624  		}
   625  		if allPeers {
   626  			// get routes for each peer of the server
   627  			getPeerResp, err := i.Router.GetPeerState(ctx)
   628  			if err != nil {
   629  				return nil, err
   630  			}
   631  			for _, peer := range getPeerResp.Peers {
   632  				params.Neighbor = &peer.PeerAddress
   633  				routes, err := m.getRoutesFromInstance(ctx, i, params)
   634  				if err != nil {
   635  					return nil, err
   636  				}
   637  				res = append(res, routes...)
   638  			}
   639  		} else {
   640  			// get routes with provided params
   641  			routes, err := m.getRoutesFromInstance(ctx, i, params)
   642  			if err != nil {
   643  				return nil, err
   644  			}
   645  			res = append(res, routes...)
   646  		}
   647  	}
   648  	return res, nil
   649  }
   650  
   651  // getRoutesFromInstance retrieves routes from the RIB of the specified BGP instance
   652  func (m *BGPRouterManager) getRoutesFromInstance(ctx context.Context, i *instance.BGPInstance, params restapi.GetBgpRoutesParams) ([]*models.BgpRoute, error) {
   653  	if i.Config.LocalASN == nil {
   654  		return nil, fmt.Errorf("local ASN not set for instance")
   655  	}
   656  
   657  	req, err := api.ToAgentGetRoutesRequest(params)
   658  	if err != nil {
   659  		return nil, err
   660  	}
   661  
   662  	rs, err := i.Router.GetRoutes(ctx, req)
   663  	if err != nil {
   664  		return nil, err
   665  	}
   666  
   667  	neighbor := ""
   668  	if params.Neighbor != nil {
   669  		neighbor = *params.Neighbor
   670  	}
   671  	return api.ToAPIRoutes(rs.Routes, *i.Config.LocalASN, neighbor)
   672  }
   673  
   674  func (m *BGPRouterManager) asnExistsInInstances(asn int64) bool {
   675  	for _, instance := range m.BGPInstances {
   676  		if instance.Config.LocalASN != nil && *instance.Config.LocalASN == asn {
   677  			return true
   678  		}
   679  	}
   680  	return false
   681  }
   682  
   683  // GetRoutePolicies fetches BGP routing policies from underlying routing daemon.
   684  func (m *BGPRouterManager) GetRoutePolicies(ctx context.Context, params restapi.GetBgpRoutePoliciesParams) ([]*models.BgpRoutePolicy, error) {
   685  	m.RLock()
   686  	defer m.RUnlock()
   687  
   688  	if !m.running {
   689  		return nil, fmt.Errorf("bgp router manager is not running")
   690  	}
   691  
   692  	switch m.ConfigMode.Get() {
   693  	case mode.BGPv1:
   694  		return m.getRoutePoliciesV1(ctx, params)
   695  	case mode.BGPv2:
   696  		return m.getRoutePoliciesV2(ctx, params)
   697  	default:
   698  		return nil, nil
   699  	}
   700  }
   701  
   702  func (m *BGPRouterManager) getRoutePoliciesV1(ctx context.Context, params restapi.GetBgpRoutePoliciesParams) ([]*models.BgpRoutePolicy, error) {
   703  	// validate router ASN
   704  	if params.RouterAsn != nil {
   705  		if _, found := m.Servers[*params.RouterAsn]; !found {
   706  			return nil, fmt.Errorf("virtual router with ASN %d does not exist", *params.RouterAsn)
   707  		}
   708  	}
   709  
   710  	var res []*models.BgpRoutePolicy
   711  	for _, s := range m.Servers {
   712  		if params.RouterAsn != nil && *params.RouterAsn != s.Config.LocalASN {
   713  			continue // return policies matching provided router ASN only
   714  		}
   715  		rs, err := s.Server.GetRoutePolicies(ctx)
   716  		if err != nil {
   717  			return nil, err
   718  		}
   719  		res = append(res, api.ToAPIRoutePolicies(rs.Policies, s.Config.LocalASN)...)
   720  	}
   721  	return res, nil
   722  }
   723  
   724  func (m *BGPRouterManager) getRoutePoliciesV2(ctx context.Context, params restapi.GetBgpRoutePoliciesParams) ([]*models.BgpRoutePolicy, error) {
   725  	// validate router ASN
   726  	if params.RouterAsn != nil {
   727  		if !m.asnExistsInInstances(*params.RouterAsn) {
   728  			return nil, fmt.Errorf("virtual router with ASN %d does not exist", *params.RouterAsn)
   729  		}
   730  	}
   731  
   732  	var res []*models.BgpRoutePolicy
   733  	for _, i := range m.BGPInstances {
   734  		if params.RouterAsn != nil && i.Config.LocalASN != nil && *params.RouterAsn != *i.Config.LocalASN {
   735  			continue // return policies matching provided router ASN only
   736  		}
   737  		rs, err := i.Router.GetRoutePolicies(ctx)
   738  		if err != nil {
   739  			return nil, err
   740  		}
   741  		res = append(res, api.ToAPIRoutePolicies(rs.Policies, *i.Config.LocalASN)...)
   742  	}
   743  	return res, nil
   744  }
   745  
   746  // Stop cleans up all servers, should be called at shutdown
   747  func (m *BGPRouterManager) Stop() {
   748  	m.Lock()
   749  	defer m.Unlock()
   750  
   751  	for _, s := range m.Servers {
   752  		s.Server.Stop()
   753  	}
   754  
   755  	for name, i := range m.BGPInstances {
   756  		i.CancelCtx()
   757  		i.Router.Stop()
   758  		notifCh, exists := m.state.notifications[name]
   759  		if exists {
   760  			close(notifCh)
   761  		}
   762  	}
   763  
   764  	m.Servers = make(LocalASNMap)
   765  	m.BGPInstances = make(LocalInstanceMap)
   766  	m.state.notifications = make(map[string]types.StateNotificationCh)
   767  	m.running = false
   768  }
   769  
   770  // ReconcileInstances is a API for configuring the BGP Instances from the
   771  // desired CiliumBGPNodeConfig resource.
   772  //
   773  // ReconcileInstances will evaluate BGP instances to be created, removed and
   774  // reconciled.
   775  func (m *BGPRouterManager) ReconcileInstances(ctx context.Context,
   776  	nodeObj *v2alpha1api.CiliumBGPNodeConfig,
   777  	ciliumNode *v2api.CiliumNode) error {
   778  	m.Lock()
   779  	defer m.Unlock()
   780  
   781  	// use a reconcileDiff to compute which BgpServers must be created, removed
   782  	// and reconciled.
   783  	rd := newReconcileDiffV2(ciliumNode)
   784  
   785  	if nodeObj == nil {
   786  		m.withdrawAllV2(ctx, rd)
   787  		return nil
   788  	}
   789  
   790  	l := m.Logger.WithFields(logrus.Fields{
   791  		types.BGPNodeConfigLogField: nodeObj.Name,
   792  	})
   793  
   794  	err := rd.diff(m.BGPInstances, nodeObj)
   795  	if err != nil {
   796  		return err
   797  	}
   798  
   799  	if rd.empty() {
   800  		l.Debug("BGP instance up-to-date with CiliumBGPNodeConfig")
   801  		return nil
   802  	}
   803  	l.WithField("diff", rd.String()).Debug("Reconciling BGP instances")
   804  
   805  	if len(rd.register) > 0 {
   806  		if err := m.registerV2(ctx, rd); err != nil {
   807  			return err
   808  		}
   809  	}
   810  	if len(rd.withdraw) > 0 {
   811  		m.withdrawV2(ctx, rd)
   812  	}
   813  	if len(rd.reconcile) > 0 {
   814  		if err := m.reconcileV2(ctx, rd); err != nil {
   815  			return err
   816  		}
   817  	}
   818  
   819  	return nil
   820  }
   821  
   822  // registerV2 instantiates and configures BGP Instance(s) as instructed by the provided
   823  // work diff.
   824  func (m *BGPRouterManager) registerV2(ctx context.Context, rd *reconcileDiffV2) error {
   825  	var (
   826  		instancesWithError []string
   827  		lastErr            error
   828  	)
   829  	for _, name := range rd.register {
   830  		var config *v2alpha1api.CiliumBGPNodeInstance
   831  		var ok bool
   832  		if config, ok = rd.seen[name]; !ok {
   833  			m.Logger.WithField(types.InstanceLogField, name).Debug("Work diff (add) contains unseen instance, skipping")
   834  			instancesWithError = append(instancesWithError, name)
   835  			lastErr = errors.New("unseen instance")
   836  			continue
   837  		}
   838  		if rErr := m.registerBGPInstance(ctx, config, rd.ciliumNode); rErr != nil {
   839  			// we'll log the error and attempt to register the next instance.
   840  			m.Logger.WithField(types.InstanceLogField, name).WithError(rErr).Debug("Error registering new BGP instance")
   841  			instancesWithError = append(instancesWithError, name)
   842  			lastErr = rErr
   843  		}
   844  	}
   845  	if len(instancesWithError) > 0 {
   846  		return fmt.Errorf("error registering new BGP instances: %v (last error: %w)", instancesWithError, lastErr)
   847  	}
   848  	return nil
   849  }
   850  
   851  // registerBGPServer encapsulates the logic for instantiating a
   852  // BgpInstance
   853  func (m *BGPRouterManager) registerBGPInstance(ctx context.Context,
   854  	c *v2alpha1api.CiliumBGPNodeInstance,
   855  	ciliumNode *v2api.CiliumNode) error {
   856  
   857  	l := m.Logger.WithFields(logrus.Fields{
   858  		types.InstanceLogField: c.Name,
   859  	})
   860  
   861  	l.Info("Registering BGP instance")
   862  
   863  	var localASN int64
   864  	if c.LocalASN != nil {
   865  		localASN = *c.LocalASN
   866  	} else {
   867  		// TODO for now we require a local ASN to be specified
   868  		// remove this check once we support auto-ASN assignment.
   869  		return fmt.Errorf("local ASN must be specified")
   870  	}
   871  
   872  	annoMap, err := agent.NewAnnotationMap(ciliumNode.Annotations)
   873  	if err != nil {
   874  		return fmt.Errorf("unable to parse local node annotations: %w", err)
   875  	}
   876  
   877  	// resolve local port from kubernetes annotations
   878  	var localPort int32
   879  	localPort = -1
   880  	if attrs, ok := annoMap[localASN]; ok {
   881  		if attrs.LocalPort != 0 {
   882  			localPort = int32(attrs.LocalPort)
   883  		}
   884  	}
   885  
   886  	routerID, err := annoMap.ResolveRouterID(localASN)
   887  	if err != nil {
   888  		if nodeIP := ciliumNode.GetIP(false); nodeIP == nil {
   889  			return fmt.Errorf("failed to get cilium node IP %v: %w", nodeIP, err)
   890  		} else {
   891  			routerID = nodeIP.String()
   892  		}
   893  	}
   894  
   895  	// override configuration via CiliumBGPNodeConfigOverride, it will take precedence over
   896  	// the annotations.
   897  	if c.LocalPort != nil {
   898  		localPort = *c.LocalPort
   899  	}
   900  
   901  	if c.RouterID != nil {
   902  		routerID = *c.RouterID
   903  	}
   904  
   905  	globalConfig := types.ServerParameters{
   906  		Global: types.BGPGlobal{
   907  			ASN:        uint32(localASN),
   908  			RouterID:   routerID,
   909  			ListenPort: localPort,
   910  			RouteSelectionOptions: &types.RouteSelectionOptions{
   911  				AdvertiseInactiveRoutes: true,
   912  			},
   913  		},
   914  		StateNotification: make(types.StateNotificationCh, 1),
   915  	}
   916  
   917  	i, err := instance.NewBGPInstance(ctx, m.Logger.WithField(types.InstanceLogField, c.Name), globalConfig)
   918  	if err != nil {
   919  		return fmt.Errorf("failed to start BGP instance: %w", err)
   920  	}
   921  
   922  	// register with manager
   923  	m.BGPInstances[c.Name] = i
   924  	m.state.notifications[c.Name] = globalConfig.StateNotification
   925  
   926  	// start consuming state notifications
   927  	go m.trackInstanceStateChange(c.Name, globalConfig.StateNotification)
   928  
   929  	// initialize the reconcilers for this instance
   930  	for _, r := range m.ConfigReconcilers {
   931  		err = r.Init(i)
   932  		if err != nil {
   933  			return fmt.Errorf("%s reconciler initialization failed: %w", r.Name(), err)
   934  		}
   935  	}
   936  
   937  	if err = m.reconcileBGPConfigV2(ctx, i, c, ciliumNode); err != nil {
   938  		return fmt.Errorf("failed initial reconciliation of BGP instance: %w", err)
   939  	}
   940  
   941  	l.Info("Successfully registered BGP instance")
   942  
   943  	return err
   944  }
   945  
   946  // reconcileBGPConfigV2 will utilize the current set of ConfigReconcilerV2
   947  // to push a BGP Instance to its desired configuration.
   948  //
   949  // Each reconcilier is responsible for getting the desired configuration from
   950  // resource store and applying it to the BGP Instance.
   951  func (m *BGPRouterManager) reconcileBGPConfigV2(ctx context.Context,
   952  	i *instance.BGPInstance,
   953  	newc *v2alpha1api.CiliumBGPNodeInstance,
   954  	ciliumNode *v2api.CiliumNode) error {
   955  
   956  	for _, r := range m.ConfigReconcilers {
   957  		if err := r.Reconcile(ctx, reconcilerv2.ReconcileParams{
   958  			BGPInstance:   i,
   959  			DesiredConfig: newc,
   960  			CiliumNode:    ciliumNode,
   961  		}); err != nil {
   962  			return err
   963  		}
   964  	}
   965  	i.Config = newc
   966  	return nil
   967  }
   968  
   969  // withdraw disconnects and removes BGP Instance(s) as instructed by the provided
   970  // work diff.
   971  func (m *BGPRouterManager) withdrawV2(ctx context.Context, rd *reconcileDiffV2) {
   972  	for _, name := range rd.withdraw {
   973  		var (
   974  			i  *instance.BGPInstance
   975  			ok bool
   976  		)
   977  		if i, ok = m.BGPInstances[name]; !ok {
   978  			m.Logger.WithField(types.InstanceLogField, name).Warn("BGP instance marked for deletion but does not exist")
   979  			continue
   980  		}
   981  		for _, r := range m.ConfigReconcilers {
   982  			r.Cleanup(i)
   983  		}
   984  
   985  		i.CancelCtx()
   986  		i.Router.Stop()
   987  		notifCh, exists := m.state.notifications[name]
   988  		if exists {
   989  			close(notifCh)
   990  		}
   991  		delete(m.BGPInstances, name)
   992  		delete(m.state.notifications, name)
   993  		m.Logger.WithField(types.InstanceLogField, name).Info("Removed BGP instance")
   994  	}
   995  }
   996  
   997  // withdrawAll will disconnect and remove all currently registered BGP Instance(s).
   998  //
   999  // `rd` must be a newly created reconcileDiff which has not had its `Diff` method
  1000  // called.
  1001  func (m *BGPRouterManager) withdrawAllV2(ctx context.Context, rd *reconcileDiffV2) {
  1002  	if len(m.BGPInstances) == 0 {
  1003  		return
  1004  	}
  1005  	for name := range m.BGPInstances {
  1006  		rd.withdraw = append(rd.withdraw, name)
  1007  	}
  1008  	m.withdrawV2(ctx, rd)
  1009  }
  1010  
  1011  // reconcile evaluates existing BGP Instance(s).
  1012  func (m *BGPRouterManager) reconcileV2(ctx context.Context, rd *reconcileDiffV2) error {
  1013  	var (
  1014  		instancesWithError []string
  1015  		lastErr            error
  1016  	)
  1017  	for _, name := range rd.reconcile {
  1018  		var (
  1019  			i    = m.BGPInstances[name]
  1020  			newc = rd.seen[name]
  1021  		)
  1022  		if i == nil {
  1023  			m.Logger.WithField(types.InstanceLogField, name).Error("BUG: BGP instance marked for reconciliation but missing from Manager") // really shouldn't happen, tagging as bug
  1024  			instancesWithError = append(instancesWithError, name)
  1025  			continue
  1026  		}
  1027  		if newc == nil {
  1028  			m.Logger.WithField(types.InstanceLogField, name).Error("BUG: BGP instance marked for reconciliation but missing from incoming configurations") // also really shouldn't happen
  1029  			instancesWithError = append(instancesWithError, name)
  1030  			continue
  1031  		}
  1032  
  1033  		if err := m.reconcileBGPConfigV2(ctx, i, newc, rd.ciliumNode); err != nil {
  1034  			m.Logger.WithField(types.InstanceLogField, name).WithError(err).Debug("Error reconciling BGP instance")
  1035  			instancesWithError = append(instancesWithError, name)
  1036  			lastErr = err
  1037  		}
  1038  	}
  1039  
  1040  	if len(instancesWithError) > 0 {
  1041  		return fmt.Errorf("error reconciling BGP instances: %v (last error: %w)", instancesWithError, lastErr)
  1042  	}
  1043  	return nil
  1044  }