github.com/cilium/cilium@v1.16.2/pkg/ciliumenvoyconfig/cec_reconciler.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package ciliumenvoyconfig
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"maps"
    11  	"sync/atomic"
    12  
    13  	"github.com/sirupsen/logrus"
    14  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    15  
    16  	ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    17  	"github.com/cilium/cilium/pkg/k8s/resource"
    18  	"github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/labels"
    19  	slim_metav1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1"
    20  	"github.com/cilium/cilium/pkg/k8s/synced"
    21  	"github.com/cilium/cilium/pkg/lock"
    22  	"github.com/cilium/cilium/pkg/logging/logfields"
    23  	"github.com/cilium/cilium/pkg/node"
    24  )
    25  
    26  const (
    27  	k8sAPIGroupCiliumEnvoyConfigV2            = "cilium/v2::CiliumEnvoyConfig"
    28  	k8sAPIGroupCiliumClusterwideEnvoyConfigV2 = "cilium/v2::CiliumClusterwideEnvoyConfig"
    29  )
    30  
    31  type ciliumEnvoyConfigReconciler struct {
    32  	logger logrus.FieldLogger
    33  
    34  	k8sResourceSynced *synced.Resources
    35  	k8sAPIGroups      *synced.APIGroups
    36  
    37  	cecSynced  atomic.Bool
    38  	ccecSynced atomic.Bool
    39  
    40  	manager ciliumEnvoyConfigManager
    41  
    42  	mutex           lock.Mutex
    43  	configs         map[resource.Key]*config
    44  	localNodeLabels map[string]string
    45  }
    46  
    47  type config struct {
    48  	meta metav1.ObjectMeta
    49  	spec *ciliumv2.CiliumEnvoyConfigSpec
    50  	// Keeping the state whether the config matched as dedicated field.
    51  	// This is only used when checking whether an existing config selected
    52  	// the local node. (instead of re-evaluating using the node selector)
    53  	selectsLocalNode bool
    54  }
    55  
    56  func newCiliumEnvoyConfigReconciler(params reconcilerParams) *ciliumEnvoyConfigReconciler {
    57  	return &ciliumEnvoyConfigReconciler{
    58  		logger:            params.Logger,
    59  		k8sResourceSynced: params.K8sResourceSynced,
    60  		k8sAPIGroups:      params.K8sAPIGroups,
    61  		manager:           params.Manager,
    62  		configs:           map[resource.Key]*config{},
    63  	}
    64  }
    65  
    66  func (r *ciliumEnvoyConfigReconciler) registerResourceWithSyncFn(ctx context.Context, resource string, syncFn func() bool) {
    67  	if r.k8sResourceSynced != nil && r.k8sAPIGroups != nil {
    68  		r.k8sResourceSynced.BlockWaitGroupToSyncResources(ctx.Done(), nil, syncFn, resource)
    69  		r.k8sAPIGroups.AddAPI(resource)
    70  	}
    71  }
    72  
    73  func (r *ciliumEnvoyConfigReconciler) handleCECEvent(ctx context.Context, event resource.Event[*ciliumv2.CiliumEnvoyConfig]) error {
    74  	scopedLogger := r.logger.
    75  		WithField(logfields.K8sNamespace, event.Key.Namespace).
    76  		WithField(logfields.CiliumEnvoyConfigName, event.Key.Name)
    77  
    78  	var err error
    79  
    80  	switch event.Kind {
    81  	case resource.Sync:
    82  		scopedLogger.Debug("Received CiliumEnvoyConfig sync event")
    83  		r.cecSynced.Store(true)
    84  	case resource.Upsert:
    85  		scopedLogger.Debug("Received CiliumEnvoyConfig upsert event")
    86  		err = r.configUpserted(ctx, event.Key, &config{meta: event.Object.ObjectMeta, spec: &event.Object.Spec})
    87  		if err != nil {
    88  			scopedLogger.WithError(err).Info("Failed to handle CEC upsert, Hive will retry")
    89  			err = fmt.Errorf("failed to handle CEC upsert: %w", err)
    90  		}
    91  	case resource.Delete:
    92  		scopedLogger.Debug("Received CiliumEnvoyConfig delete event")
    93  		err = r.configDeleted(ctx, event.Key)
    94  		if err != nil {
    95  			scopedLogger.WithError(err).Info("Failed to handle CEC delete, Hive will retry")
    96  			err = fmt.Errorf("failed to handle CEC delete: %w", err)
    97  		}
    98  	}
    99  
   100  	event.Done(err)
   101  
   102  	return err
   103  }
   104  
   105  func (r *ciliumEnvoyConfigReconciler) handleCCECEvent(ctx context.Context, event resource.Event[*ciliumv2.CiliumClusterwideEnvoyConfig]) error {
   106  	scopedLogger := r.logger.
   107  		WithField(logfields.K8sNamespace, event.Key.Namespace).
   108  		WithField(logfields.CiliumClusterwideEnvoyConfigName, event.Key.Name)
   109  
   110  	var err error
   111  
   112  	switch event.Kind {
   113  	case resource.Sync:
   114  		scopedLogger.Debug("Received CiliumClusterwideEnvoyConfig sync event")
   115  		r.ccecSynced.Store(true)
   116  	case resource.Upsert:
   117  		scopedLogger.Debug("Received CiliumClusterwideEnvoyConfig upsert event")
   118  		err = r.configUpserted(ctx, event.Key, &config{meta: event.Object.ObjectMeta, spec: &event.Object.Spec})
   119  		if err != nil {
   120  			scopedLogger.WithError(err).Info("Failed to handle CCEC upsert, Hive will retry")
   121  			err = fmt.Errorf("failed to handle CCEC upsert: %w", err)
   122  		}
   123  	case resource.Delete:
   124  		scopedLogger.Debug("Received CiliumClusterwideEnvoyConfig delete event")
   125  		err = r.configDeleted(ctx, event.Key)
   126  		if err != nil {
   127  			scopedLogger.WithError(err).Info("Failed to handle CEC delete, Hive will retry")
   128  			err = fmt.Errorf("failed to handle CCEC delete: %w", err)
   129  		}
   130  	}
   131  
   132  	event.Done(err)
   133  
   134  	return err
   135  }
   136  
   137  func (r *ciliumEnvoyConfigReconciler) handleLocalNodeEvent(ctx context.Context, localNode node.LocalNode) error {
   138  	r.logger.Debug("Received LocalNode changed event")
   139  
   140  	if err := r.handleLocalNodeLabels(ctx, localNode); err != nil {
   141  		r.logger.WithError(err).Error("failed to handle LocalNode changed event")
   142  		return fmt.Errorf("failed to handle LocalNode changed event: %w", err)
   143  	}
   144  
   145  	return nil
   146  }
   147  
   148  func (r *ciliumEnvoyConfigReconciler) handleLocalNodeLabels(ctx context.Context, localNode node.LocalNode) error {
   149  	r.mutex.Lock()
   150  	defer r.mutex.Unlock()
   151  
   152  	if maps.Equal(r.localNodeLabels, localNode.Labels) {
   153  		r.logger.Debug("Labels of local Node didn't change")
   154  		return nil
   155  	}
   156  
   157  	r.localNodeLabels = localNode.Labels
   158  	r.logger.Debug("Labels of local Node changed - updated local store")
   159  
   160  	// Best effort attempt to reconcile existing configs as fast as possible.
   161  	//
   162  	// Errors are only logged and not reported. Otherwise the healthmanager state will be degraded
   163  	// until the next label change on the node.
   164  	// It's the responsibility of the corresponding TimerJob to perform a periodic reconciliation.
   165  	if err := r.reconcileExistingConfigsLocked(ctx); err != nil {
   166  		r.logger.WithError(err).Error("failed to reconcile existing configs due to changed node labels")
   167  	}
   168  
   169  	return nil
   170  }
   171  
   172  func (r *ciliumEnvoyConfigReconciler) reconcileExistingConfigs(ctx context.Context) error {
   173  	r.mutex.Lock()
   174  	defer r.mutex.Unlock()
   175  
   176  	return r.reconcileExistingConfigsLocked(ctx)
   177  }
   178  
   179  func (r *ciliumEnvoyConfigReconciler) reconcileExistingConfigsLocked(ctx context.Context) error {
   180  	r.logger.Debug("Checking whether existing configs need to be applied or filtered")
   181  
   182  	// Error containing all potential errors during reconciliation of the configs.
   183  	// On error, only the reconciliation of the faulty config is skipped. All other
   184  	// configs should be reconciled.
   185  	var reconcileErr error
   186  
   187  	for key, cfg := range r.configs {
   188  		scopedLogger := r.logger.WithField("key", key)
   189  
   190  		err := r.configUpsertedInternal(ctx, key, cfg, false /* spec didn't change */)
   191  		if err != nil {
   192  			scopedLogger.WithError(err).Error("failed to reconcile existing configs")
   193  			// don't prevent reconciliation of other configs in case of an error for a particular config
   194  			reconcileErr = errors.Join(reconcileErr, fmt.Errorf("failed to reconcile existing config (%s): %w", key, err))
   195  			continue
   196  		}
   197  	}
   198  
   199  	return reconcileErr
   200  }
   201  
   202  func (r *ciliumEnvoyConfigReconciler) configUpserted(ctx context.Context, key resource.Key, cfg *config) error {
   203  	r.mutex.Lock()
   204  	defer r.mutex.Unlock()
   205  
   206  	return r.configUpsertedInternal(ctx, key, cfg, true /* spec may have changed */)
   207  }
   208  
   209  func (r *ciliumEnvoyConfigReconciler) configUpsertedInternal(ctx context.Context, key resource.Key, cfg *config, specMayChanged bool) error {
   210  	scopedLogger := r.logger.WithField("key", key)
   211  
   212  	selectsLocalNode, err := r.configSelectsLocalNode(cfg)
   213  	if err != nil {
   214  		return fmt.Errorf("failed to match Node labels with config nodeselector (%s): %w", key, err)
   215  	}
   216  
   217  	appliedConfig, isApplied := r.configs[key]
   218  
   219  	switch {
   220  	case !isApplied && !selectsLocalNode:
   221  		scopedLogger.Debug("New config doesn't select the local Node")
   222  
   223  	case !isApplied && selectsLocalNode:
   224  		scopedLogger.Debug("New config selects the local node - adding config")
   225  		if err := r.manager.addCiliumEnvoyConfig(cfg.meta, cfg.spec); err != nil {
   226  			return err
   227  		}
   228  
   229  	case isApplied && selectsLocalNode && !appliedConfig.selectsLocalNode:
   230  		scopedLogger.Debug("Config now selects the local Node - adding previously filtered config")
   231  		if err := r.manager.addCiliumEnvoyConfig(cfg.meta, cfg.spec); err != nil {
   232  			return err
   233  		}
   234  
   235  	case isApplied && selectsLocalNode && appliedConfig.selectsLocalNode && specMayChanged:
   236  		scopedLogger.Debug("Config still selects the local Node - updating applied config")
   237  		if err := r.manager.updateCiliumEnvoyConfig(appliedConfig.meta, appliedConfig.spec, cfg.meta, cfg.spec); err != nil {
   238  			return err
   239  		}
   240  
   241  	case isApplied && !selectsLocalNode && !appliedConfig.selectsLocalNode:
   242  		scopedLogger.Debug("Config still doesn't select the local Node")
   243  
   244  	case isApplied && !selectsLocalNode && appliedConfig.selectsLocalNode:
   245  		scopedLogger.Debug("Config no longer selects the local Node - deleting previously applied config")
   246  		if err := r.manager.deleteCiliumEnvoyConfig(appliedConfig.meta, appliedConfig.spec); err != nil {
   247  			return err
   248  		}
   249  	}
   250  
   251  	r.configs[key] = &config{meta: cfg.meta, spec: cfg.spec, selectsLocalNode: selectsLocalNode}
   252  
   253  	return nil
   254  }
   255  
   256  func (r *ciliumEnvoyConfigReconciler) configDeleted(ctx context.Context, key resource.Key) error {
   257  	scopedLogger := r.logger.
   258  		WithField("key", key)
   259  
   260  	r.mutex.Lock()
   261  	defer r.mutex.Unlock()
   262  
   263  	appliedConfig, isApplied := r.configs[key]
   264  
   265  	switch {
   266  	case !isApplied:
   267  		scopedLogger.Warn("Deleted Envoy config has never been applied")
   268  
   269  	case isApplied && !appliedConfig.selectsLocalNode:
   270  		scopedLogger.Debug("Deleted CEC was already filtered by NodeSelector")
   271  
   272  	case isApplied && appliedConfig.selectsLocalNode:
   273  		scopedLogger.Debug("Deleting applied CEC")
   274  		if err := r.manager.deleteCiliumEnvoyConfig(appliedConfig.meta, appliedConfig.spec); err != nil {
   275  			return err
   276  		}
   277  	}
   278  
   279  	delete(r.configs, key)
   280  
   281  	return nil
   282  }
   283  
   284  func (r *ciliumEnvoyConfigReconciler) configSelectsLocalNode(cfg *config) (bool, error) {
   285  	if cfg != nil && cfg.spec != nil && cfg.spec.NodeSelector != nil {
   286  		ls, err := slim_metav1.LabelSelectorAsSelector(cfg.spec.NodeSelector)
   287  		if err != nil {
   288  			return false, fmt.Errorf("invalid NodeSelector: %w", err)
   289  		}
   290  
   291  		if !ls.Matches(labels.Set(r.localNodeLabels)) {
   292  			return false, nil
   293  		}
   294  	}
   295  
   296  	return true, nil
   297  }
   298  
   299  func (r *ciliumEnvoyConfigReconciler) syncHeadlessService(_ context.Context) error {
   300  	r.mutex.Lock()
   301  	defer r.mutex.Unlock()
   302  
   303  	var reconcileErr error
   304  
   305  	for key, cfg := range r.configs {
   306  		if err := r.manager.syncCiliumEnvoyConfigService(cfg.meta.Name, cfg.meta.Namespace, cfg.spec); err != nil {
   307  			r.logger.WithField("key", key).WithError(err).Info("Failed to sync headless service, Hive will retry")
   308  			reconcileErr = errors.Join(reconcileErr, fmt.Errorf("failed to reconcile existing config (%s): %w", key, err))
   309  			continue
   310  		}
   311  	}
   312  
   313  	return reconcileErr
   314  }