github.com/cilium/cilium@v1.16.2/operator/pkg/bgpv2/manager.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package bgpv2 5 6 import ( 7 "context" 8 "fmt" 9 10 "github.com/cilium/hive/cell" 11 "github.com/cilium/hive/job" 12 "github.com/sirupsen/logrus" 13 "k8s.io/apimachinery/pkg/util/wait" 14 15 cilium_api_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 16 cilium_api_v2alpha1 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1" 17 k8s_client "github.com/cilium/cilium/pkg/k8s/client" 18 cilium_client_v2alpha1 "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned/typed/cilium.io/v2alpha1" 19 "github.com/cilium/cilium/pkg/k8s/resource" 20 "github.com/cilium/cilium/pkg/option" 21 "github.com/cilium/cilium/pkg/time" 22 ) 23 24 var ( 25 // retry options used in reconcileWithRetry method. 26 // steps will repeat for ~8.5 minutes. 27 bo = wait.Backoff{ 28 Duration: 1 * time.Second, 29 Factor: 2, 30 Jitter: 0, 31 Steps: 10, 32 Cap: 0, 33 } 34 35 // maxErrorLen is the maximum length of error message to be logged. 36 maxErrorLen = 140 37 ) 38 39 type BGPParams struct { 40 cell.In 41 42 Logger logrus.FieldLogger 43 LC cell.Lifecycle 44 Clientset k8s_client.Clientset 45 DaemonConfig *option.DaemonConfig 46 JobGroup job.Group 47 Health cell.Health 48 49 // resource tracking 50 ClusterConfigResource resource.Resource[*cilium_api_v2alpha1.CiliumBGPClusterConfig] 51 NodeConfigOverrideResource resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfigOverride] 52 NodeConfigResource resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfig] 53 NodeResource resource.Resource[*cilium_api_v2.CiliumNode] 54 } 55 56 type BGPResourceManager struct { 57 logger logrus.FieldLogger 58 clientset k8s_client.Clientset 59 lc cell.Lifecycle 60 jobs job.Group 61 health cell.Health 62 63 // For BGP Cluster Config 64 clusterConfig resource.Resource[*cilium_api_v2alpha1.CiliumBGPClusterConfig] 65 nodeConfigOverride resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfigOverride] 66 nodeConfig resource.Resource[*cilium_api_v2alpha1.CiliumBGPNodeConfig] 67 ciliumNode resource.Resource[*cilium_api_v2.CiliumNode] 68 clusterConfigStore resource.Store[*cilium_api_v2alpha1.CiliumBGPClusterConfig] 69 nodeConfigOverrideStore resource.Store[*cilium_api_v2alpha1.CiliumBGPNodeConfigOverride] 70 nodeConfigStore resource.Store[*cilium_api_v2alpha1.CiliumBGPNodeConfig] 71 ciliumNodeStore resource.Store[*cilium_api_v2.CiliumNode] 72 nodeConfigClient cilium_client_v2alpha1.CiliumBGPNodeConfigInterface 73 74 // internal state 75 reconcileCh chan struct{} 76 bgpClusterSyncCh chan struct{} 77 } 78 79 // registerBGPResourceManager creates a new BGPResourceManager operator instance. 80 func registerBGPResourceManager(p BGPParams) *BGPResourceManager { 81 // if BGPResourceManager Control Plane is not enabled or BGPv2 API is not enabled, return nil 82 if !p.DaemonConfig.BGPControlPlaneEnabled() { 83 return nil 84 } 85 86 b := &BGPResourceManager{ 87 logger: p.Logger, 88 clientset: p.Clientset, 89 jobs: p.JobGroup, 90 lc: p.LC, 91 health: p.Health, 92 93 reconcileCh: make(chan struct{}, 1), 94 bgpClusterSyncCh: make(chan struct{}, 1), 95 clusterConfig: p.ClusterConfigResource, 96 nodeConfigOverride: p.NodeConfigOverrideResource, 97 nodeConfig: p.NodeConfigResource, 98 ciliumNode: p.NodeResource, 99 } 100 101 b.nodeConfigClient = b.clientset.CiliumV2alpha1().CiliumBGPNodeConfigs() 102 103 // initialize jobs and register them with lifecycle 104 b.initializeJobs() 105 106 return b 107 } 108 109 func (b *BGPResourceManager) initializeJobs() { 110 b.jobs.Add( 111 job.OneShot("bgpv2-operator-main", func(ctx context.Context, health cell.Health) error { 112 // initialize resource stores 113 err := b.initializeStores(ctx) 114 if err != nil { 115 return err 116 } 117 118 b.logger.Info("BGPv2 control plane operator started") 119 120 return b.Run(ctx) 121 }), 122 123 job.OneShot("bgpv2-operator-cluster-config-tracker", func(ctx context.Context, health cell.Health) error { 124 for e := range b.clusterConfig.Events(ctx) { 125 if e.Kind == resource.Sync { 126 select { 127 case b.bgpClusterSyncCh <- struct{}{}: 128 default: 129 } 130 } 131 132 b.triggerReconcile() 133 e.Done(nil) 134 } 135 return nil 136 }), 137 138 job.OneShot("bgpv2-operator-node-config-override-tracker", func(ctx context.Context, health cell.Health) error { 139 for e := range b.nodeConfigOverride.Events(ctx) { 140 b.triggerReconcile() 141 e.Done(nil) 142 } 143 return nil 144 }), 145 146 job.OneShot("bgpv2-operator-node-tracker", func(ctx context.Context, health cell.Health) error { 147 for e := range b.ciliumNode.Events(ctx) { 148 b.triggerReconcile() 149 e.Done(nil) 150 } 151 return nil 152 }), 153 ) 154 } 155 156 func (b *BGPResourceManager) initializeStores(ctx context.Context) (err error) { 157 b.clusterConfigStore, err = b.clusterConfig.Store(ctx) 158 if err != nil { 159 return 160 } 161 162 b.nodeConfigOverrideStore, err = b.nodeConfigOverride.Store(ctx) 163 if err != nil { 164 return 165 } 166 167 b.nodeConfigStore, err = b.nodeConfig.Store(ctx) 168 if err != nil { 169 return 170 } 171 172 b.ciliumNodeStore, err = b.ciliumNode.Store(ctx) 173 if err != nil { 174 return 175 } 176 177 return nil 178 } 179 180 // triggerReconcile initiates level triggered reconciliation. 181 func (b *BGPResourceManager) triggerReconcile() { 182 select { 183 case b.reconcileCh <- struct{}{}: 184 b.logger.Debug("BGP reconciliation triggered") 185 default: 186 } 187 } 188 189 // Run starts the BGPResourceManager operator. 190 func (b *BGPResourceManager) Run(ctx context.Context) (err error) { 191 // make sure cluster config is synced before starting the reconciliation 192 <-b.bgpClusterSyncCh 193 194 // trigger reconciliation for first time. 195 b.triggerReconcile() 196 197 for { 198 select { 199 case <-ctx.Done(): 200 return 201 202 case _, open := <-b.reconcileCh: 203 if !open { 204 return 205 } 206 207 err := b.reconcileWithRetry(ctx) 208 if err != nil { 209 b.logger.WithError(err).Error("BGP reconciliation failed") 210 } else { 211 b.logger.Debug("BGP reconciliation successful") 212 } 213 } 214 } 215 } 216 217 // reconcileWithRetry retries reconcile with exponential backoff. 218 func (b *BGPResourceManager) reconcileWithRetry(ctx context.Context) error { 219 retryFn := func(ctx context.Context) (bool, error) { 220 err := b.reconcile(ctx) 221 222 switch { 223 case err != nil: 224 // log error, continue retry 225 b.logger.WithError(TrimError(err, maxErrorLen)).Warn("BGP reconciliation error") 226 return false, nil 227 default: 228 // no error, stop retry 229 return true, nil 230 } 231 } 232 233 return wait.ExponentialBackoffWithContext(ctx, bo, retryFn) 234 } 235 236 // reconcile is called when any interesting resource change event is triggered. 237 func (b *BGPResourceManager) reconcile(ctx context.Context) error { 238 return b.reconcileBGPClusterConfigs(ctx) 239 } 240 241 // TrimError trims error message to maxLen. 242 func TrimError(err error, maxLen int) error { 243 if err == nil { 244 return nil 245 } 246 247 if len(err.Error()) > maxLen { 248 return fmt.Errorf("%s... ", err.Error()[:maxLen]) 249 } 250 return err 251 }