github.com/cilium/cilium@v1.16.2/pkg/ciliumenvoyconfig/cec_reconciler.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package ciliumenvoyconfig 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "maps" 11 "sync/atomic" 12 13 "github.com/sirupsen/logrus" 14 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 16 ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 17 "github.com/cilium/cilium/pkg/k8s/resource" 18 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/labels" 19 slim_metav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1" 20 "github.com/cilium/cilium/pkg/k8s/synced" 21 "github.com/cilium/cilium/pkg/lock" 22 "github.com/cilium/cilium/pkg/logging/logfields" 23 "github.com/cilium/cilium/pkg/node" 24 ) 25 26 const ( 27 k8sAPIGroupCiliumEnvoyConfigV2 = "cilium/v2::CiliumEnvoyConfig" 28 k8sAPIGroupCiliumClusterwideEnvoyConfigV2 = "cilium/v2::CiliumClusterwideEnvoyConfig" 29 ) 30 31 type ciliumEnvoyConfigReconciler struct { 32 logger logrus.FieldLogger 33 34 k8sResourceSynced *synced.Resources 35 k8sAPIGroups *synced.APIGroups 36 37 cecSynced atomic.Bool 38 ccecSynced atomic.Bool 39 40 manager ciliumEnvoyConfigManager 41 42 mutex lock.Mutex 43 configs map[resource.Key]*config 44 localNodeLabels map[string]string 45 } 46 47 type config struct { 48 meta metav1.ObjectMeta 49 spec *ciliumv2.CiliumEnvoyConfigSpec 50 // Keeping the state whether the config matched as dedicated field. 51 // This is only used when checking whether an existing config selected 52 // the local node. (instead of re-evaluating using the node selector) 53 selectsLocalNode bool 54 } 55 56 func newCiliumEnvoyConfigReconciler(params reconcilerParams) *ciliumEnvoyConfigReconciler { 57 return &ciliumEnvoyConfigReconciler{ 58 logger: params.Logger, 59 k8sResourceSynced: params.K8sResourceSynced, 60 k8sAPIGroups: params.K8sAPIGroups, 61 manager: params.Manager, 62 configs: map[resource.Key]*config{}, 63 } 64 } 65 66 func (r *ciliumEnvoyConfigReconciler) registerResourceWithSyncFn(ctx context.Context, resource string, syncFn func() bool) { 67 if r.k8sResourceSynced != nil && r.k8sAPIGroups != nil { 68 r.k8sResourceSynced.BlockWaitGroupToSyncResources(ctx.Done(), nil, syncFn, resource) 69 r.k8sAPIGroups.AddAPI(resource) 70 } 71 } 72 73 func (r *ciliumEnvoyConfigReconciler) handleCECEvent(ctx context.Context, event resource.Event[*ciliumv2.CiliumEnvoyConfig]) error { 74 scopedLogger := r.logger. 75 WithField(logfields.K8sNamespace, event.Key.Namespace). 76 WithField(logfields.CiliumEnvoyConfigName, event.Key.Name) 77 78 var err error 79 80 switch event.Kind { 81 case resource.Sync: 82 scopedLogger.Debug("Received CiliumEnvoyConfig sync event") 83 r.cecSynced.Store(true) 84 case resource.Upsert: 85 scopedLogger.Debug("Received CiliumEnvoyConfig upsert event") 86 err = r.configUpserted(ctx, event.Key, &config{meta: event.Object.ObjectMeta, spec: &event.Object.Spec}) 87 if err != nil { 88 scopedLogger.WithError(err).Info("Failed to handle CEC upsert, Hive will retry") 89 err = fmt.Errorf("failed to handle CEC upsert: %w", err) 90 } 91 case resource.Delete: 92 scopedLogger.Debug("Received CiliumEnvoyConfig delete event") 93 err = r.configDeleted(ctx, event.Key) 94 if err != nil { 95 scopedLogger.WithError(err).Info("Failed to handle CEC delete, Hive will retry") 96 err = fmt.Errorf("failed to handle CEC delete: %w", err) 97 } 98 } 99 100 event.Done(err) 101 102 return err 103 } 104 105 func (r *ciliumEnvoyConfigReconciler) handleCCECEvent(ctx context.Context, event resource.Event[*ciliumv2.CiliumClusterwideEnvoyConfig]) error { 106 scopedLogger := r.logger. 107 WithField(logfields.K8sNamespace, event.Key.Namespace). 108 WithField(logfields.CiliumClusterwideEnvoyConfigName, event.Key.Name) 109 110 var err error 111 112 switch event.Kind { 113 case resource.Sync: 114 scopedLogger.Debug("Received CiliumClusterwideEnvoyConfig sync event") 115 r.ccecSynced.Store(true) 116 case resource.Upsert: 117 scopedLogger.Debug("Received CiliumClusterwideEnvoyConfig upsert event") 118 err = r.configUpserted(ctx, event.Key, &config{meta: event.Object.ObjectMeta, spec: &event.Object.Spec}) 119 if err != nil { 120 scopedLogger.WithError(err).Info("Failed to handle CCEC upsert, Hive will retry") 121 err = fmt.Errorf("failed to handle CCEC upsert: %w", err) 122 } 123 case resource.Delete: 124 scopedLogger.Debug("Received CiliumClusterwideEnvoyConfig delete event") 125 err = r.configDeleted(ctx, event.Key) 126 if err != nil { 127 scopedLogger.WithError(err).Info("Failed to handle CEC delete, Hive will retry") 128 err = fmt.Errorf("failed to handle CCEC delete: %w", err) 129 } 130 } 131 132 event.Done(err) 133 134 return err 135 } 136 137 func (r *ciliumEnvoyConfigReconciler) handleLocalNodeEvent(ctx context.Context, localNode node.LocalNode) error { 138 r.logger.Debug("Received LocalNode changed event") 139 140 if err := r.handleLocalNodeLabels(ctx, localNode); err != nil { 141 r.logger.WithError(err).Error("failed to handle LocalNode changed event") 142 return fmt.Errorf("failed to handle LocalNode changed event: %w", err) 143 } 144 145 return nil 146 } 147 148 func (r *ciliumEnvoyConfigReconciler) handleLocalNodeLabels(ctx context.Context, localNode node.LocalNode) error { 149 r.mutex.Lock() 150 defer r.mutex.Unlock() 151 152 if maps.Equal(r.localNodeLabels, localNode.Labels) { 153 r.logger.Debug("Labels of local Node didn't change") 154 return nil 155 } 156 157 r.localNodeLabels = localNode.Labels 158 r.logger.Debug("Labels of local Node changed - updated local store") 159 160 // Best effort attempt to reconcile existing configs as fast as possible. 161 // 162 // Errors are only logged and not reported. Otherwise the healthmanager state will be degraded 163 // until the next label change on the node. 164 // It's the responsibility of the corresponding TimerJob to perform a periodic reconciliation. 165 if err := r.reconcileExistingConfigsLocked(ctx); err != nil { 166 r.logger.WithError(err).Error("failed to reconcile existing configs due to changed node labels") 167 } 168 169 return nil 170 } 171 172 func (r *ciliumEnvoyConfigReconciler) reconcileExistingConfigs(ctx context.Context) error { 173 r.mutex.Lock() 174 defer r.mutex.Unlock() 175 176 return r.reconcileExistingConfigsLocked(ctx) 177 } 178 179 func (r *ciliumEnvoyConfigReconciler) reconcileExistingConfigsLocked(ctx context.Context) error { 180 r.logger.Debug("Checking whether existing configs need to be applied or filtered") 181 182 // Error containing all potential errors during reconciliation of the configs. 183 // On error, only the reconciliation of the faulty config is skipped. All other 184 // configs should be reconciled. 185 var reconcileErr error 186 187 for key, cfg := range r.configs { 188 scopedLogger := r.logger.WithField("key", key) 189 190 err := r.configUpsertedInternal(ctx, key, cfg, false /* spec didn't change */) 191 if err != nil { 192 scopedLogger.WithError(err).Error("failed to reconcile existing configs") 193 // don't prevent reconciliation of other configs in case of an error for a particular config 194 reconcileErr = errors.Join(reconcileErr, fmt.Errorf("failed to reconcile existing config (%s): %w", key, err)) 195 continue 196 } 197 } 198 199 return reconcileErr 200 } 201 202 func (r *ciliumEnvoyConfigReconciler) configUpserted(ctx context.Context, key resource.Key, cfg *config) error { 203 r.mutex.Lock() 204 defer r.mutex.Unlock() 205 206 return r.configUpsertedInternal(ctx, key, cfg, true /* spec may have changed */) 207 } 208 209 func (r *ciliumEnvoyConfigReconciler) configUpsertedInternal(ctx context.Context, key resource.Key, cfg *config, specMayChanged bool) error { 210 scopedLogger := r.logger.WithField("key", key) 211 212 selectsLocalNode, err := r.configSelectsLocalNode(cfg) 213 if err != nil { 214 return fmt.Errorf("failed to match Node labels with config nodeselector (%s): %w", key, err) 215 } 216 217 appliedConfig, isApplied := r.configs[key] 218 219 switch { 220 case !isApplied && !selectsLocalNode: 221 scopedLogger.Debug("New config doesn't select the local Node") 222 223 case !isApplied && selectsLocalNode: 224 scopedLogger.Debug("New config selects the local node - adding config") 225 if err := r.manager.addCiliumEnvoyConfig(cfg.meta, cfg.spec); err != nil { 226 return err 227 } 228 229 case isApplied && selectsLocalNode && !appliedConfig.selectsLocalNode: 230 scopedLogger.Debug("Config now selects the local Node - adding previously filtered config") 231 if err := r.manager.addCiliumEnvoyConfig(cfg.meta, cfg.spec); err != nil { 232 return err 233 } 234 235 case isApplied && selectsLocalNode && appliedConfig.selectsLocalNode && specMayChanged: 236 scopedLogger.Debug("Config still selects the local Node - updating applied config") 237 if err := r.manager.updateCiliumEnvoyConfig(appliedConfig.meta, appliedConfig.spec, cfg.meta, cfg.spec); err != nil { 238 return err 239 } 240 241 case isApplied && !selectsLocalNode && !appliedConfig.selectsLocalNode: 242 scopedLogger.Debug("Config still doesn't select the local Node") 243 244 case isApplied && !selectsLocalNode && appliedConfig.selectsLocalNode: 245 scopedLogger.Debug("Config no longer selects the local Node - deleting previously applied config") 246 if err := r.manager.deleteCiliumEnvoyConfig(appliedConfig.meta, appliedConfig.spec); err != nil { 247 return err 248 } 249 } 250 251 r.configs[key] = &config{meta: cfg.meta, spec: cfg.spec, selectsLocalNode: selectsLocalNode} 252 253 return nil 254 } 255 256 func (r *ciliumEnvoyConfigReconciler) configDeleted(ctx context.Context, key resource.Key) error { 257 scopedLogger := r.logger. 258 WithField("key", key) 259 260 r.mutex.Lock() 261 defer r.mutex.Unlock() 262 263 appliedConfig, isApplied := r.configs[key] 264 265 switch { 266 case !isApplied: 267 scopedLogger.Warn("Deleted Envoy config has never been applied") 268 269 case isApplied && !appliedConfig.selectsLocalNode: 270 scopedLogger.Debug("Deleted CEC was already filtered by NodeSelector") 271 272 case isApplied && appliedConfig.selectsLocalNode: 273 scopedLogger.Debug("Deleting applied CEC") 274 if err := r.manager.deleteCiliumEnvoyConfig(appliedConfig.meta, appliedConfig.spec); err != nil { 275 return err 276 } 277 } 278 279 delete(r.configs, key) 280 281 return nil 282 } 283 284 func (r *ciliumEnvoyConfigReconciler) configSelectsLocalNode(cfg *config) (bool, error) { 285 if cfg != nil && cfg.spec != nil && cfg.spec.NodeSelector != nil { 286 ls, err := slim_metav1.LabelSelectorAsSelector(cfg.spec.NodeSelector) 287 if err != nil { 288 return false, fmt.Errorf("invalid NodeSelector: %w", err) 289 } 290 291 if !ls.Matches(labels.Set(r.localNodeLabels)) { 292 return false, nil 293 } 294 } 295 296 return true, nil 297 } 298 299 func (r *ciliumEnvoyConfigReconciler) syncHeadlessService(_ context.Context) error { 300 r.mutex.Lock() 301 defer r.mutex.Unlock() 302 303 var reconcileErr error 304 305 for key, cfg := range r.configs { 306 if err := r.manager.syncCiliumEnvoyConfigService(cfg.meta.Name, cfg.meta.Namespace, cfg.spec); err != nil { 307 r.logger.WithField("key", key).WithError(err).Info("Failed to sync headless service, Hive will retry") 308 reconcileErr = errors.Join(reconcileErr, fmt.Errorf("failed to reconcile existing config (%s): %w", key, err)) 309 continue 310 } 311 } 312 313 return reconcileErr 314 }