github.com/cilium/cilium@v1.16.2/clustermesh-apiserver/clustermesh/root.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package clustermesh
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"log/slog"
    10  	"net"
    11  	"path"
    12  	"sync"
    13  
    14  	"github.com/cilium/hive/cell"
    15  	"github.com/sirupsen/logrus"
    16  	"github.com/spf13/cobra"
    17  	"k8s.io/apimachinery/pkg/runtime"
    18  
    19  	cmk8s "github.com/cilium/cilium/clustermesh-apiserver/clustermesh/k8s"
    20  	"github.com/cilium/cilium/clustermesh-apiserver/syncstate"
    21  	operatorWatchers "github.com/cilium/cilium/operator/watchers"
    22  	cmtypes "github.com/cilium/cilium/pkg/clustermesh/types"
    23  	cmutils "github.com/cilium/cilium/pkg/clustermesh/utils"
    24  	"github.com/cilium/cilium/pkg/hive"
    25  	"github.com/cilium/cilium/pkg/identity"
    26  	identityCache "github.com/cilium/cilium/pkg/identity/cache"
    27  	"github.com/cilium/cilium/pkg/ipcache"
    28  	ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    29  	k8sClient "github.com/cilium/cilium/pkg/k8s/client"
    30  	"github.com/cilium/cilium/pkg/k8s/resource"
    31  	"github.com/cilium/cilium/pkg/k8s/types"
    32  	"github.com/cilium/cilium/pkg/kvstore"
    33  	"github.com/cilium/cilium/pkg/kvstore/store"
    34  	"github.com/cilium/cilium/pkg/labels"
    35  	"github.com/cilium/cilium/pkg/logging"
    36  	"github.com/cilium/cilium/pkg/logging/logfields"
    37  	"github.com/cilium/cilium/pkg/metrics"
    38  	nodeStore "github.com/cilium/cilium/pkg/node/store"
    39  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    40  	"github.com/cilium/cilium/pkg/option"
    41  	"github.com/cilium/cilium/pkg/promise"
    42  	"github.com/cilium/cilium/pkg/version"
    43  )
    44  
    45  var (
    46  	log = logging.DefaultLogger.WithField(logfields.LogSubsys, "clustermesh-apiserver")
    47  )
    48  
    49  func NewCmd(h *hive.Hive) *cobra.Command {
    50  	rootCmd := &cobra.Command{
    51  		Use:   "clustermesh",
    52  		Short: "Run ClusterMesh",
    53  		Run: func(cmd *cobra.Command, args []string) {
    54  			if err := h.Run(slog.Default()); err != nil {
    55  				log.Fatal(err)
    56  			}
    57  		},
    58  		PreRun: func(cmd *cobra.Command, args []string) {
    59  			// Overwrite the metrics namespace with the one specific for the ClusterMesh API Server
    60  			metrics.Namespace = metrics.CiliumClusterMeshAPIServerNamespace
    61  			option.Config.Populate(h.Viper())
    62  			if option.Config.Debug {
    63  				log.Logger.SetLevel(logrus.DebugLevel)
    64  			}
    65  			option.LogRegisteredOptions(h.Viper(), log)
    66  			log.Infof("Cilium ClusterMesh %s", version.Version)
    67  		},
    68  	}
    69  
    70  	h.RegisterFlags(rootCmd.Flags())
    71  	rootCmd.AddCommand(h.Command())
    72  	return rootCmd
    73  }
    74  
    75  type parameters struct {
    76  	cell.In
    77  
    78  	ExternalWorkloadsConfig
    79  	ClusterInfo    cmtypes.ClusterInfo
    80  	Clientset      k8sClient.Clientset
    81  	Resources      cmk8s.Resources
    82  	BackendPromise promise.Promise[kvstore.BackendOperations]
    83  	StoreFactory   store.Factory
    84  	SyncState      syncstate.SyncState
    85  }
    86  
    87  func registerHooks(lc cell.Lifecycle, params parameters) error {
    88  	lc.Append(cell.Hook{
    89  		OnStart: func(ctx cell.HookContext) error {
    90  			if !params.Clientset.IsEnabled() {
    91  				return errors.New("Kubernetes client not configured, cannot continue.")
    92  			}
    93  
    94  			backend, err := params.BackendPromise.Await(ctx)
    95  			if err != nil {
    96  				return err
    97  			}
    98  
    99  			startServer(ctx, params.ClusterInfo, params.EnableExternalWorkloads, params.Clientset, backend, params.Resources, params.StoreFactory, params.SyncState)
   100  			return nil
   101  		},
   102  	})
   103  	return nil
   104  }
   105  
   106  type identitySynchronizer struct {
   107  	store        store.SyncStore
   108  	encoder      func([]byte) string
   109  	syncCallback func(context.Context)
   110  }
   111  
   112  func newIdentitySynchronizer(ctx context.Context, cinfo cmtypes.ClusterInfo, backend kvstore.BackendOperations, factory store.Factory, syncCallback func(context.Context)) synchronizer {
   113  	identitiesStore := factory.NewSyncStore(cinfo.Name, backend,
   114  		path.Join(identityCache.IdentitiesPath, "id"),
   115  		store.WSSWithSyncedKeyOverride(identityCache.IdentitiesPath))
   116  	go identitiesStore.Run(ctx)
   117  
   118  	return &identitySynchronizer{store: identitiesStore, encoder: backend.Encode, syncCallback: syncCallback}
   119  }
   120  
   121  func parseLabelArrayFromMap(base map[string]string) labels.LabelArray {
   122  	array := make(labels.LabelArray, 0, len(base))
   123  	for sourceAndKey, value := range base {
   124  		array = append(array, labels.NewLabel(sourceAndKey, value, ""))
   125  	}
   126  	return array.Sort()
   127  }
   128  
   129  func (is *identitySynchronizer) upsert(ctx context.Context, _ resource.Key, obj runtime.Object) error {
   130  	identity := obj.(*ciliumv2.CiliumIdentity)
   131  	scopedLog := log.WithField(logfields.Identity, identity.Name)
   132  	if len(identity.SecurityLabels) == 0 {
   133  		scopedLog.WithError(errors.New("missing security labels")).Warning("Ignoring invalid identity")
   134  		// Do not return an error, since it is pointless to retry.
   135  		// We will receive a new update event if the security labels change.
   136  		return nil
   137  	}
   138  
   139  	labelArray := parseLabelArrayFromMap(identity.SecurityLabels)
   140  
   141  	var labels []byte
   142  	for _, l := range labelArray {
   143  		labels = append(labels, l.FormatForKVStore()...)
   144  	}
   145  
   146  	scopedLog.Info("Upserting identity in etcd")
   147  	kv := store.NewKVPair(identity.Name, is.encoder(labels))
   148  	if err := is.store.UpsertKey(ctx, kv); err != nil {
   149  		// The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones.
   150  		log.WithError(err).Warning("Unable to upsert identity in etcd")
   151  	}
   152  
   153  	return nil
   154  }
   155  
   156  func (is *identitySynchronizer) delete(ctx context.Context, key resource.Key) error {
   157  	scopedLog := log.WithField(logfields.Identity, key.Name)
   158  	scopedLog.Info("Deleting identity from etcd")
   159  
   160  	if err := is.store.DeleteKey(ctx, store.NewKVPair(key.Name, "")); err != nil {
   161  		// The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones.
   162  		scopedLog.WithError(err).Warning("Unable to delete node from etcd")
   163  	}
   164  
   165  	return nil
   166  }
   167  
   168  func (is *identitySynchronizer) synced(ctx context.Context) error {
   169  	log.Info("Initial list of identities successfully received from Kubernetes")
   170  	return is.store.Synced(ctx, is.syncCallback)
   171  }
   172  
   173  type nodeStub struct {
   174  	cluster string
   175  	name    string
   176  }
   177  
   178  func (n *nodeStub) GetKeyName() string {
   179  	return nodeTypes.GetKeyNodeName(n.cluster, n.name)
   180  }
   181  
   182  type nodeSynchronizer struct {
   183  	clusterInfo  cmtypes.ClusterInfo
   184  	store        store.SyncStore
   185  	syncCallback func(context.Context)
   186  }
   187  
   188  func newNodeSynchronizer(ctx context.Context, cinfo cmtypes.ClusterInfo, backend kvstore.BackendOperations, factory store.Factory, syncCallback func(context.Context)) synchronizer {
   189  	nodesStore := factory.NewSyncStore(cinfo.Name, backend, nodeStore.NodeStorePrefix)
   190  	go nodesStore.Run(ctx)
   191  
   192  	return &nodeSynchronizer{clusterInfo: cinfo, store: nodesStore, syncCallback: syncCallback}
   193  }
   194  
   195  func (ns *nodeSynchronizer) upsert(ctx context.Context, _ resource.Key, obj runtime.Object) error {
   196  	n := nodeTypes.ParseCiliumNode(obj.(*ciliumv2.CiliumNode))
   197  	n.Cluster = ns.clusterInfo.Name
   198  	n.ClusterID = ns.clusterInfo.ID
   199  
   200  	scopedLog := log.WithField(logfields.Node, n.Name)
   201  	scopedLog.Info("Upserting node in etcd")
   202  
   203  	if err := ns.store.UpsertKey(ctx, &n); err != nil {
   204  		// The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones.
   205  		log.WithError(err).Warning("Unable to upsert node in etcd")
   206  	}
   207  
   208  	return nil
   209  }
   210  
   211  func (ns *nodeSynchronizer) delete(ctx context.Context, key resource.Key) error {
   212  	n := nodeStub{
   213  		cluster: ns.clusterInfo.Name,
   214  		name:    key.Name,
   215  	}
   216  
   217  	scopedLog := log.WithFields(logrus.Fields{logfields.Node: key.Name})
   218  	scopedLog.Info("Deleting node from etcd")
   219  
   220  	if err := ns.store.DeleteKey(ctx, &n); err != nil {
   221  		// The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones.
   222  		scopedLog.WithError(err).Warning("Unable to delete node from etcd")
   223  	}
   224  
   225  	return nil
   226  }
   227  
   228  func (ns *nodeSynchronizer) synced(ctx context.Context) error {
   229  	log.Info("Initial list of nodes successfully received from Kubernetes")
   230  	return ns.store.Synced(ctx, ns.syncCallback)
   231  }
   232  
   233  type ipmap map[string]struct{}
   234  
   235  type endpointSynchronizer struct {
   236  	store        store.SyncStore
   237  	cache        map[string]ipmap
   238  	syncCallback func(context.Context)
   239  }
   240  
   241  func newEndpointSynchronizer(ctx context.Context, cinfo cmtypes.ClusterInfo, backend kvstore.BackendOperations, factory store.Factory, syncCallback func(context.Context)) synchronizer {
   242  	endpointsStore := factory.NewSyncStore(cinfo.Name, backend,
   243  		path.Join(ipcache.IPIdentitiesPath, ipcache.DefaultAddressSpace),
   244  		store.WSSWithSyncedKeyOverride(ipcache.IPIdentitiesPath))
   245  	go endpointsStore.Run(ctx)
   246  
   247  	return &endpointSynchronizer{
   248  		store:        endpointsStore,
   249  		cache:        make(map[string]ipmap),
   250  		syncCallback: syncCallback,
   251  	}
   252  }
   253  
   254  func (es *endpointSynchronizer) upsert(ctx context.Context, key resource.Key, obj runtime.Object) error {
   255  	endpoint := obj.(*types.CiliumEndpoint)
   256  	ips := make(ipmap)
   257  	stale := es.cache[key.String()]
   258  
   259  	if n := endpoint.Networking; n != nil {
   260  		for _, address := range n.Addressing {
   261  			for _, ip := range []string{address.IPV4, address.IPV6} {
   262  				if ip == "" {
   263  					continue
   264  				}
   265  
   266  				scopedLog := log.WithFields(logrus.Fields{logfields.Endpoint: key.String(), logfields.IPAddr: ip})
   267  				entry := identity.IPIdentityPair{
   268  					IP:           net.ParseIP(ip),
   269  					HostIP:       net.ParseIP(n.NodeIP),
   270  					K8sNamespace: endpoint.Namespace,
   271  					K8sPodName:   endpoint.Name,
   272  				}
   273  
   274  				if endpoint.Identity != nil {
   275  					entry.ID = identity.NumericIdentity(endpoint.Identity.ID)
   276  				}
   277  
   278  				if endpoint.Encryption != nil {
   279  					entry.Key = uint8(endpoint.Encryption.Key)
   280  				}
   281  
   282  				scopedLog.Info("Upserting endpoint in etcd")
   283  				if err := es.store.UpsertKey(ctx, &entry); err != nil {
   284  					// The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones.
   285  					scopedLog.WithError(err).Warning("Unable to upsert endpoint in etcd")
   286  					continue
   287  				}
   288  
   289  				ips[ip] = struct{}{}
   290  				delete(stale, ip)
   291  			}
   292  		}
   293  	}
   294  
   295  	// Delete the stale endpoint IPs from the KVStore.
   296  	es.deleteEndpoints(ctx, key, stale)
   297  	es.cache[key.String()] = ips
   298  
   299  	return nil
   300  }
   301  
   302  func (es *endpointSynchronizer) delete(ctx context.Context, key resource.Key) error {
   303  	es.deleteEndpoints(ctx, key, es.cache[key.String()])
   304  	delete(es.cache, key.String())
   305  	return nil
   306  }
   307  
   308  func (es *endpointSynchronizer) synced(ctx context.Context) error {
   309  	log.Info("Initial list of endpoints successfully received from Kubernetes")
   310  	return es.store.Synced(ctx, es.syncCallback)
   311  }
   312  
   313  func (es *endpointSynchronizer) deleteEndpoints(ctx context.Context, key resource.Key, ips ipmap) {
   314  	for ip := range ips {
   315  		scopedLog := log.WithFields(logrus.Fields{logfields.Endpoint: key.String(), logfields.IPAddr: ip})
   316  		scopedLog.Info("Deleting endpoint from etcd")
   317  
   318  		entry := identity.IPIdentityPair{IP: net.ParseIP(ip)}
   319  		if err := es.store.DeleteKey(ctx, &entry); err != nil {
   320  			// The only errors surfaced by WorkqueueSyncStore are the unrecoverable ones.
   321  			scopedLog.WithError(err).Warning("Unable to delete endpoint from etcd")
   322  		}
   323  	}
   324  }
   325  
   326  type synchronizer interface {
   327  	upsert(ctx context.Context, key resource.Key, obj runtime.Object) error
   328  	delete(ctx context.Context, key resource.Key) error
   329  	synced(ctx context.Context) error
   330  }
   331  
   332  func synchronize[T runtime.Object](ctx context.Context, r resource.Resource[T], sync synchronizer) {
   333  	for event := range r.Events(ctx) {
   334  		switch event.Kind {
   335  		case resource.Upsert:
   336  			event.Done(sync.upsert(ctx, event.Key, event.Object))
   337  		case resource.Delete:
   338  			event.Done(sync.delete(ctx, event.Key))
   339  		case resource.Sync:
   340  			event.Done(sync.synced(ctx))
   341  		}
   342  	}
   343  }
   344  
   345  func startServer(
   346  	startCtx cell.HookContext,
   347  	cinfo cmtypes.ClusterInfo,
   348  	allServices bool,
   349  	clientset k8sClient.Clientset,
   350  	backend kvstore.BackendOperations,
   351  	resources cmk8s.Resources,
   352  	factory store.Factory,
   353  	syncState syncstate.SyncState,
   354  ) {
   355  	log.WithFields(logrus.Fields{
   356  		"cluster-name": cinfo.Name,
   357  		"cluster-id":   cinfo.ID,
   358  	}).Info("Starting clustermesh-apiserver...")
   359  
   360  	config := cmtypes.CiliumClusterConfig{
   361  		ID: cinfo.ID,
   362  		Capabilities: cmtypes.CiliumClusterConfigCapabilities{
   363  			SyncedCanaries:       true,
   364  			MaxConnectedClusters: cinfo.MaxConnectedClusters,
   365  		},
   366  	}
   367  
   368  	_, err := cmutils.EnforceClusterConfig(context.Background(), cinfo.Name, config, backend, log)
   369  	if err != nil {
   370  		log.WithError(err).Fatal("Unable to set local cluster config on kvstore")
   371  	}
   372  
   373  	ctx := context.Background()
   374  	go synchronize(ctx, resources.CiliumIdentities, newIdentitySynchronizer(ctx, cinfo, backend, factory, syncState.WaitForResource()))
   375  	go synchronize(ctx, resources.CiliumNodes, newNodeSynchronizer(ctx, cinfo, backend, factory, syncState.WaitForResource()))
   376  	go synchronize(ctx, resources.CiliumSlimEndpoints, newEndpointSynchronizer(ctx, cinfo, backend, factory, syncState.WaitForResource()))
   377  	operatorWatchers.StartSynchronizingServices(ctx, &sync.WaitGroup{}, operatorWatchers.ServiceSyncParameters{
   378  		ClusterInfo:  cinfo,
   379  		Clientset:    clientset,
   380  		Services:     resources.Services,
   381  		Endpoints:    resources.Endpoints,
   382  		Backend:      backend,
   383  		SharedOnly:   !allServices,
   384  		StoreFactory: factory,
   385  		SyncCallback: syncState.WaitForResource(),
   386  	})
   387  	syncState.Stop()
   388  
   389  	log.Info("Initialization complete")
   390  }