github.com/cilium/cilium@v1.16.2/operator/pkg/bgpv2/manager.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package bgpv2
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  
    10  	"github.com/cilium/hive/cell"
    11  	"github.com/cilium/hive/job"
    12  	"github.com/sirupsen/logrus"
    13  	"k8s.io/apimachinery/pkg/util/wait"
    14  
    15  	cilium_api_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    16  	cilium_api_v2alpha1 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1"
    17  	k8s_client "github.com/cilium/cilium/pkg/k8s/client"
    18  	cilium_client_v2alpha1 "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned/typed/cilium.io/v2alpha1"
    19  	"github.com/cilium/cilium/pkg/k8s/resource"
    20  	"github.com/cilium/cilium/pkg/option"
    21  	"github.com/cilium/cilium/pkg/time"
    22  )
    23  
    24  var (
    25  	// retry options used in reconcileWithRetry method.
    26  	// steps will repeat for ~8.5 minutes.
    27  	bo = wait.Backoff{
    28  		Duration: 1 * time.Second,
    29  		Factor:   2,
    30  		Jitter:   0,
    31  		Steps:    10,
    32  		Cap:      0,
    33  	}
    34  
    35  	// maxErrorLen is the maximum length of error message to be logged.
    36  	maxErrorLen = 140
    37  )
    38  
    39  type BGPParams struct {
    40  	cell.In
    41  
    42  	Logger       logrus.FieldLogger
    43  	LC           cell.Lifecycle
    44  	Clientset    k8s_client.Clientset
    45  	DaemonConfig *option.DaemonConfig
    46  	JobGroup     job.Group
    47  	Health       cell.Health
    48  
    49  	// resource tracking
    50  	ClusterConfigResource      resource.Resource[*cilium_api_v2alpha1.CiliumBGPClusterConfig]
    51  	NodeConfigOverrideResource resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfigOverride]
    52  	NodeConfigResource         resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfig]
    53  	NodeResource               resource.Resource[*cilium_api_v2.CiliumNode]
    54  }
    55  
    56  type BGPResourceManager struct {
    57  	logger    logrus.FieldLogger
    58  	clientset k8s_client.Clientset
    59  	lc        cell.Lifecycle
    60  	jobs      job.Group
    61  	health    cell.Health
    62  
    63  	// For BGP Cluster Config
    64  	clusterConfig           resource.Resource[*cilium_api_v2alpha1.CiliumBGPClusterConfig]
    65  	nodeConfigOverride      resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfigOverride]
    66  	nodeConfig              resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfig]
    67  	ciliumNode              resource.Resource[*cilium_api_v2.CiliumNode]
    68  	clusterConfigStore      resource.Store[*cilium_api_v2alpha1.CiliumBGPClusterConfig]
    69  	nodeConfigOverrideStore resource.Store[*cilium_api_v2alpha1.CiliumBGPNodeConfigOverride]
    70  	nodeConfigStore         resource.Store[*cilium_api_v2alpha1.CiliumBGPNodeConfig]
    71  	ciliumNodeStore         resource.Store[*cilium_api_v2.CiliumNode]
    72  	nodeConfigClient        cilium_client_v2alpha1.CiliumBGPNodeConfigInterface
    73  
    74  	// internal state
    75  	reconcileCh      chan struct{}
    76  	bgpClusterSyncCh chan struct{}
    77  }
    78  
    79  // registerBGPResourceManager creates a new BGPResourceManager operator instance.
    80  func registerBGPResourceManager(p BGPParams) *BGPResourceManager {
    81  	// if BGPResourceManager Control Plane is not enabled or BGPv2 API is not enabled, return nil
    82  	if !p.DaemonConfig.BGPControlPlaneEnabled() {
    83  		return nil
    84  	}
    85  
    86  	b := &BGPResourceManager{
    87  		logger:    p.Logger,
    88  		clientset: p.Clientset,
    89  		jobs:      p.JobGroup,
    90  		lc:        p.LC,
    91  		health:    p.Health,
    92  
    93  		reconcileCh:        make(chan struct{}, 1),
    94  		bgpClusterSyncCh:   make(chan struct{}, 1),
    95  		clusterConfig:      p.ClusterConfigResource,
    96  		nodeConfigOverride: p.NodeConfigOverrideResource,
    97  		nodeConfig:         p.NodeConfigResource,
    98  		ciliumNode:         p.NodeResource,
    99  	}
   100  
   101  	b.nodeConfigClient = b.clientset.CiliumV2alpha1().CiliumBGPNodeConfigs()
   102  
   103  	// initialize jobs and register them with lifecycle
   104  	b.initializeJobs()
   105  
   106  	return b
   107  }
   108  
   109  func (b *BGPResourceManager) initializeJobs() {
   110  	b.jobs.Add(
   111  		job.OneShot("bgpv2-operator-main", func(ctx context.Context, health cell.Health) error {
   112  			// initialize resource stores
   113  			err := b.initializeStores(ctx)
   114  			if err != nil {
   115  				return err
   116  			}
   117  
   118  			b.logger.Info("BGPv2 control plane operator started")
   119  
   120  			return b.Run(ctx)
   121  		}),
   122  
   123  		job.OneShot("bgpv2-operator-cluster-config-tracker", func(ctx context.Context, health cell.Health) error {
   124  			for e := range b.clusterConfig.Events(ctx) {
   125  				if e.Kind == resource.Sync {
   126  					select {
   127  					case b.bgpClusterSyncCh <- struct{}{}:
   128  					default:
   129  					}
   130  				}
   131  
   132  				b.triggerReconcile()
   133  				e.Done(nil)
   134  			}
   135  			return nil
   136  		}),
   137  
   138  		job.OneShot("bgpv2-operator-node-config-override-tracker", func(ctx context.Context, health cell.Health) error {
   139  			for e := range b.nodeConfigOverride.Events(ctx) {
   140  				b.triggerReconcile()
   141  				e.Done(nil)
   142  			}
   143  			return nil
   144  		}),
   145  
   146  		job.OneShot("bgpv2-operator-node-tracker", func(ctx context.Context, health cell.Health) error {
   147  			for e := range b.ciliumNode.Events(ctx) {
   148  				b.triggerReconcile()
   149  				e.Done(nil)
   150  			}
   151  			return nil
   152  		}),
   153  	)
   154  }
   155  
   156  func (b *BGPResourceManager) initializeStores(ctx context.Context) (err error) {
   157  	b.clusterConfigStore, err = b.clusterConfig.Store(ctx)
   158  	if err != nil {
   159  		return
   160  	}
   161  
   162  	b.nodeConfigOverrideStore, err = b.nodeConfigOverride.Store(ctx)
   163  	if err != nil {
   164  		return
   165  	}
   166  
   167  	b.nodeConfigStore, err = b.nodeConfig.Store(ctx)
   168  	if err != nil {
   169  		return
   170  	}
   171  
   172  	b.ciliumNodeStore, err = b.ciliumNode.Store(ctx)
   173  	if err != nil {
   174  		return
   175  	}
   176  
   177  	return nil
   178  }
   179  
   180  // triggerReconcile initiates level triggered reconciliation.
   181  func (b *BGPResourceManager) triggerReconcile() {
   182  	select {
   183  	case b.reconcileCh <- struct{}{}:
   184  		b.logger.Debug("BGP reconciliation triggered")
   185  	default:
   186  	}
   187  }
   188  
   189  // Run starts the BGPResourceManager operator.
   190  func (b *BGPResourceManager) Run(ctx context.Context) (err error) {
   191  	// make sure cluster config is synced before starting the reconciliation
   192  	<-b.bgpClusterSyncCh
   193  
   194  	// trigger reconciliation for first time.
   195  	b.triggerReconcile()
   196  
   197  	for {
   198  		select {
   199  		case <-ctx.Done():
   200  			return
   201  
   202  		case _, open := <-b.reconcileCh:
   203  			if !open {
   204  				return
   205  			}
   206  
   207  			err := b.reconcileWithRetry(ctx)
   208  			if err != nil {
   209  				b.logger.WithError(err).Error("BGP reconciliation failed")
   210  			} else {
   211  				b.logger.Debug("BGP reconciliation successful")
   212  			}
   213  		}
   214  	}
   215  }
   216  
   217  // reconcileWithRetry retries reconcile with exponential backoff.
   218  func (b *BGPResourceManager) reconcileWithRetry(ctx context.Context) error {
   219  	retryFn := func(ctx context.Context) (bool, error) {
   220  		err := b.reconcile(ctx)
   221  
   222  		switch {
   223  		case err != nil:
   224  			// log error, continue retry
   225  			b.logger.WithError(TrimError(err, maxErrorLen)).Warn("BGP reconciliation error")
   226  			return false, nil
   227  		default:
   228  			// no error, stop retry
   229  			return true, nil
   230  		}
   231  	}
   232  
   233  	return wait.ExponentialBackoffWithContext(ctx, bo, retryFn)
   234  }
   235  
   236  // reconcile is called when any interesting resource change event is triggered.
   237  func (b *BGPResourceManager) reconcile(ctx context.Context) error {
   238  	return b.reconcileBGPClusterConfigs(ctx)
   239  }
   240  
   241  // TrimError trims error message to maxLen.
   242  func TrimError(err error, maxLen int) error {
   243  	if err == nil {
   244  		return nil
   245  	}
   246  
   247  	if len(err.Error()) > maxLen {
   248  		return fmt.Errorf("%s... ", err.Error()[:maxLen])
   249  	}
   250  	return err
   251  }