github.com/cilium/cilium@v1.16.2/clustermesh-apiserver/clustermesh/vmmanager.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package clustermesh
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  	"net"
    11  	"path"
    12  	"sort"
    13  
    14  	"github.com/cilium/hive/cell"
    15  	"github.com/spf13/pflag"
    16  	"k8s.io/apimachinery/pkg/api/errors"
    17  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    18  	"k8s.io/apimachinery/pkg/types"
    19  
    20  	"github.com/cilium/cilium/api/v1/models"
    21  	"github.com/cilium/cilium/pkg/cidr"
    22  	cmtypes "github.com/cilium/cilium/pkg/clustermesh/types"
    23  	"github.com/cilium/cilium/pkg/defaults"
    24  	"github.com/cilium/cilium/pkg/identity"
    25  	identityCache "github.com/cilium/cilium/pkg/identity/cache"
    26  	identitymodel "github.com/cilium/cilium/pkg/identity/model"
    27  	"github.com/cilium/cilium/pkg/k8s"
    28  	k8sConst "github.com/cilium/cilium/pkg/k8s/apis/cilium.io"
    29  	ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    30  	k8sClient "github.com/cilium/cilium/pkg/k8s/client"
    31  	clientset "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned"
    32  	"github.com/cilium/cilium/pkg/k8s/resource"
    33  	"github.com/cilium/cilium/pkg/k8s/synced"
    34  	"github.com/cilium/cilium/pkg/kvstore"
    35  	"github.com/cilium/cilium/pkg/kvstore/store"
    36  	"github.com/cilium/cilium/pkg/labels"
    37  	nodeStore "github.com/cilium/cilium/pkg/node/store"
    38  	nodeTypes "github.com/cilium/cilium/pkg/node/types"
    39  	"github.com/cilium/cilium/pkg/option"
    40  	"github.com/cilium/cilium/pkg/promise"
    41  )
    42  
    43  var externalWorkloadsCell = cell.Module(
    44  	"external-workloads",
    45  	"External workloads",
    46  
    47  	cell.Config(
    48  		// The default value is set to true to match the existing behavior in case
    49  		// the flag is not configured (for instance by the legacy cilium CLI).
    50  		ExternalWorkloadsConfig{EnableExternalWorkloads: true},
    51  	),
    52  
    53  	cell.Provide(externalWorkloadsProvider),
    54  	cell.Invoke(func(*VMManager) {}),
    55  )
    56  
    57  type ExternalWorkloadsConfig struct {
    58  	EnableExternalWorkloads bool
    59  }
    60  
    61  func (def ExternalWorkloadsConfig) Flags(flags *pflag.FlagSet) {
    62  	flags.Bool("enable-external-workloads", def.EnableExternalWorkloads, "Enable support for external workloads")
    63  }
    64  
    65  func externalWorkloadsProvider(
    66  	lc cell.Lifecycle,
    67  
    68  	cfg ExternalWorkloadsConfig,
    69  	clusterInfo cmtypes.ClusterInfo,
    70  
    71  	clientset k8sClient.Clientset,
    72  	crdSyncPromise promise.Promise[synced.CRDSync],
    73  	ciliumExternalWorkloads resource.Resource[*ciliumv2.CiliumExternalWorkload],
    74  	backendPromise promise.Promise[kvstore.BackendOperations],
    75  ) *VMManager {
    76  	if !cfg.EnableExternalWorkloads {
    77  		return nil
    78  	}
    79  
    80  	// External workloads require CRD allocation mode
    81  	option.Config.IdentityAllocationMode = option.IdentityAllocationModeCRD
    82  	option.Config.AllocatorListTimeout = defaults.AllocatorListTimeout
    83  
    84  	mgr := &VMManager{
    85  		clusterInfo:  clusterInfo,
    86  		ciliumClient: clientset,
    87  	}
    88  
    89  	lc.Append(cell.Hook{
    90  		OnStart: func(ctx cell.HookContext) error {
    91  			_, err := crdSyncPromise.Await(ctx)
    92  			if err != nil {
    93  				return fmt.Errorf("Wait for CRD resources failed: %w", err)
    94  			}
    95  
    96  			ewstore, err := ciliumExternalWorkloads.Store(ctx)
    97  			if err != nil {
    98  				return fmt.Errorf("unable to retrieve CiliumExternalWorkloads store: %w", err)
    99  			}
   100  
   101  			backend, err := backendPromise.Await(ctx)
   102  			if err != nil {
   103  				return err
   104  			}
   105  
   106  			mgr.ciliumExternalWorkloadStore = ewstore
   107  			mgr.backend = backend
   108  			mgr.identityAllocator = identityCache.NewCachingIdentityAllocator(mgr)
   109  			mgr.identityAllocator.InitIdentityAllocator(clientset)
   110  
   111  			if _, err = store.JoinSharedStore(store.Configuration{
   112  				Backend:              backend,
   113  				Prefix:               nodeStore.NodeRegisterStorePrefix,
   114  				KeyCreator:           nodeStore.RegisterKeyCreator,
   115  				SharedKeyDeleteDelay: defaults.NodeDeleteDelay,
   116  				Observer:             mgr,
   117  			}); err != nil {
   118  				return fmt.Errorf("unable to set up node register store: %w", err)
   119  			}
   120  
   121  			return nil
   122  		},
   123  	})
   124  
   125  	return mgr
   126  }
   127  
   128  type VMManager struct {
   129  	clusterInfo cmtypes.ClusterInfo
   130  
   131  	ciliumClient      clientset.Interface
   132  	identityAllocator *identityCache.CachingIdentityAllocator
   133  
   134  	ciliumExternalWorkloadStore resource.Store[*ciliumv2.CiliumExternalWorkload]
   135  
   136  	backend kvstore.BackendOperations
   137  }
   138  
   139  //
   140  // IdentityAllocatorOwner interface
   141  //
   142  
   143  // UpdateIdentities will be called when identities have changed
   144  func (m *VMManager) UpdateIdentities(added, deleted identity.IdentityMap) {}
   145  
   146  // GetNodeSuffix must return the node specific suffix to use
   147  func (m *VMManager) GetNodeSuffix() string {
   148  	return "vm-allocator"
   149  }
   150  
   151  func (m *VMManager) nodeOverrideFromCEW(n *nodeTypes.RegisterNode, cew *ciliumv2.CiliumExternalWorkload) *nodeTypes.RegisterNode {
   152  	nk := n.DeepCopy()
   153  
   154  	nk.Labels = make(map[string]string, len(cew.Labels))
   155  	for k, v := range cew.Labels {
   156  		nk.Labels[k] = v
   157  	}
   158  
   159  	// Default pod name and namespace labels
   160  	if nk.Labels[k8sConst.PodNamespaceLabel] == "" {
   161  		nk.Labels[k8sConst.PodNamespaceLabel] = "default"
   162  	}
   163  	if nk.Labels[k8sConst.PodNameLabel] == "" {
   164  		nk.Labels[k8sConst.PodNameLabel] = cew.Name
   165  	}
   166  
   167  	// Override cluster
   168  	nk.Cluster = m.clusterInfo.Name
   169  	nk.ClusterID = m.clusterInfo.ID
   170  	nk.Labels[k8sConst.PolicyLabelCluster] = m.clusterInfo.Name
   171  
   172  	// Override CIDRs if defined
   173  	if cew.Spec.IPv4AllocCIDR != "" {
   174  		if cidr, err := cidr.ParseCIDR(cew.Spec.IPv4AllocCIDR); err == nil {
   175  			if ip4 := cidr.IP.To4(); ip4 != nil {
   176  				nk.IPv4AllocCIDR = cidr
   177  			} else {
   178  				log.Warning("CEW: ipv4-alloc-cidr is not IPv4")
   179  			}
   180  		} else {
   181  			log.WithError(err).Warningf("CEW: parse error on %s", cew.Spec.IPv4AllocCIDR)
   182  		}
   183  	}
   184  	if cew.Spec.IPv6AllocCIDR != "" {
   185  		if cidr, err := cidr.ParseCIDR(cew.Spec.IPv6AllocCIDR); err == nil {
   186  			if ip6 := cidr.IP.To16(); ip6 != nil {
   187  				nk.IPv6AllocCIDR = cidr
   188  			} else {
   189  				log.Warning("CEW: ipv6-alloc-cidr is not IPv6")
   190  			}
   191  		} else {
   192  			log.WithError(err).Warningf("CEW: parse error on %s", cew.Spec.IPv6AllocCIDR)
   193  		}
   194  	}
   195  	return nk
   196  }
   197  
   198  //
   199  // Observer interface
   200  //
   201  
   202  func (m *VMManager) OnUpdate(k store.Key) {
   203  	if n, ok := k.(*nodeTypes.RegisterNode); ok {
   204  		// Only handle registration events if CiliumExternalWorkload CRD with a matching name exists
   205  		cew, exists, _ := m.ciliumExternalWorkloadStore.GetByKey(resource.Key{Name: n.Name})
   206  		if !exists {
   207  			log.Warningf("CEW: CiliumExternalWorkload resource not found for: %v", n)
   208  			return
   209  		}
   210  
   211  		if n.NodeIdentity == 0 {
   212  			// Phase 1: Initial registration with zero ID, return configuration
   213  			nk := m.nodeOverrideFromCEW(n, cew)
   214  
   215  			log.Debugf("CEW: VM Cilium Node updated: %v -> %v", n, nk)
   216  			// FIXME: GH-17909 Balance this call with a call to release the identity.
   217  			id := m.AllocateNodeIdentity(nk)
   218  			if id != nil {
   219  				nid := id.ID.Uint32()
   220  				nk.NodeIdentity = nid
   221  
   222  				// clear addresses so that we know the registration is not ready yet
   223  				nk.IPAddresses = nil
   224  
   225  				// Update the registration, now with the node identity and overridden fields
   226  				if err := m.syncKVStoreKey(context.Background(), nk); err != nil {
   227  					log.WithError(err).Warning("CEW: Unable to update register node in etcd")
   228  				} else {
   229  					log.Debugf("CEW: Updated register node in etcd (nid: %d): %v", nid, nk)
   230  				}
   231  			}
   232  		} else if len(n.IPAddresses) > 0 {
   233  			// Phase 2: non-zero ID registration with addresses
   234  
   235  			// Override again, just in case the external node is misbehaving
   236  			nk := m.nodeOverrideFromCEW(n, cew)
   237  
   238  			id := m.LookupNodeIdentity(nk)
   239  			if id == nil || id.ID.Uint32() != nk.NodeIdentity {
   240  				log.Errorf("CEW: Invalid identity %d in %v", nk.NodeIdentity, nk)
   241  			}
   242  
   243  			// Create cluster resources for the external node
   244  			nodeIP := nk.GetNodeIP(false)
   245  			m.UpdateCiliumNodeResource(nk, cew)
   246  			m.UpdateCiliumEndpointResource(nk.Name, id, nk.IPAddresses, nodeIP)
   247  
   248  			nid := id.ID.Uint32()
   249  
   250  			// Update CEW with the identity and IP
   251  			cewCopy := cew.DeepCopy()
   252  			cewCopy.Status.ID = uint64(nid)
   253  			cewCopy.Status.IP = nodeIP.String()
   254  			for retryCount := 0; retryCount < maxRetryCount; retryCount++ {
   255  				if _, err := m.ciliumClient.CiliumV2().CiliumExternalWorkloads().UpdateStatus(context.TODO(), cewCopy, metav1.UpdateOptions{}); err != nil {
   256  					if errors.IsConflict(err) {
   257  						log.WithError(err).Warn("CEW: Unable to update CiliumExternalWorkload status, will retry")
   258  						continue
   259  					}
   260  					log.WithError(err).Error("CEW: Unable to update CiliumExternalWorkload status")
   261  				} else {
   262  					log.Debugf("CEW: Successfully updated CiliumExternalWorkload status: %v", *cewCopy)
   263  					break
   264  				}
   265  			}
   266  		}
   267  	} else {
   268  		log.Errorf("CEW: VM Cilium Node not RegisterNode: %v", k)
   269  	}
   270  }
   271  
   272  func (m *VMManager) OnDelete(k store.NamedKey) {
   273  	log.Debugf("RegisterNode deleted: %v", k.GetKeyName())
   274  }
   275  
   276  func (m *VMManager) AllocateNodeIdentity(n *nodeTypes.RegisterNode) *identity.Identity {
   277  	vmLabels := labels.Map2Labels(n.Labels, labels.LabelSourceK8s)
   278  
   279  	log.Debug("Resolving identity for VM labels")
   280  	ctx, cancel := context.WithTimeout(context.TODO(), option.Config.KVstoreConnectivityTimeout)
   281  	defer cancel()
   282  
   283  	id := m.identityAllocator.LookupIdentity(ctx, vmLabels)
   284  	if id != nil {
   285  		return id
   286  	}
   287  
   288  	id, allocated, err := m.identityAllocator.AllocateIdentity(ctx, vmLabels, true, identity.InvalidIdentity)
   289  	if err != nil {
   290  		log.WithError(err).Error("unable to resolve identity")
   291  	} else {
   292  		if allocated {
   293  			log.Debugf("allocated identity %v", id)
   294  		} else {
   295  			log.Debugf("identity %v was already allocated", id)
   296  		}
   297  	}
   298  	return id
   299  }
   300  
   301  func (m *VMManager) LookupNodeIdentity(n *nodeTypes.RegisterNode) *identity.Identity {
   302  	vmLabels := labels.Map2Labels(n.Labels, labels.LabelSourceK8s)
   303  
   304  	log.Debug("Looking up identity for VM labels")
   305  	ctx, cancel := context.WithTimeout(context.TODO(), option.Config.KVstoreConnectivityTimeout)
   306  	defer cancel()
   307  
   308  	return m.identityAllocator.LookupIdentity(ctx, vmLabels)
   309  }
   310  
   311  const (
   312  	maxRetryCount = 5
   313  )
   314  
   315  // UpdateCiliumNodeResource updates the CiliumNode resource representing the
   316  // local node
   317  func (m *VMManager) UpdateCiliumNodeResource(n *nodeTypes.RegisterNode, cew *ciliumv2.CiliumExternalWorkload) {
   318  	nr := n.ToCiliumNode()
   319  	nr.OwnerReferences = []metav1.OwnerReference{
   320  		{
   321  			APIVersion: ciliumv2.SchemeGroupVersion.String(),
   322  			Kind:       ciliumv2.CEWKindDefinition,
   323  			Name:       cew.GetName(),
   324  			UID:        cew.GetUID(),
   325  		},
   326  	}
   327  
   328  	for retryCount := 0; retryCount < maxRetryCount; retryCount++ {
   329  		log.Info("Getting CN during an update")
   330  		nodeResource, err := m.ciliumClient.CiliumV2().CiliumNodes().Get(context.TODO(), n.Name, metav1.GetOptions{})
   331  		if err != nil {
   332  			if _, err = m.ciliumClient.CiliumV2().CiliumNodes().Create(context.TODO(), nr, metav1.CreateOptions{}); err != nil {
   333  				if errors.IsConflict(err) {
   334  					log.WithError(err).Warn("Unable to create CiliumNode resource, will retry")
   335  					continue
   336  				}
   337  				log.WithError(err).Fatal("Unable to create CiliumNode resource")
   338  			} else {
   339  				log.Infof("Successfully created CiliumNode resource: %v", *nr)
   340  				return
   341  			}
   342  		} else {
   343  			nodeResource.ObjectMeta.Labels = nr.ObjectMeta.Labels
   344  			nodeResource.Spec = nr.Spec
   345  			if _, err := m.ciliumClient.CiliumV2().CiliumNodes().Update(context.TODO(), nodeResource, metav1.UpdateOptions{}); err != nil {
   346  				if errors.IsConflict(err) {
   347  					log.WithError(err).Warn("Unable to update CiliumNode resource, will retry")
   348  					continue
   349  				}
   350  				log.WithError(err).Fatal("Unable to update CiliumNode resource")
   351  			} else {
   352  				log.Infof("Successfully updated CiliumNode resource: %v", *nodeResource)
   353  				return
   354  			}
   355  		}
   356  	}
   357  	log.Fatal("Could not create or update CiliumNode resource, despite retries")
   358  }
   359  
   360  // UpdateCiliumEndpointResource updates the CiliumNode resource representing the
   361  // local node
   362  func (m *VMManager) UpdateCiliumEndpointResource(name string, id *identity.Identity, ipAddresses []nodeTypes.Address, nodeIP net.IP) {
   363  	var addresses []*ciliumv2.AddressPair
   364  	i := 0
   365  	for _, addr := range ipAddresses {
   366  		if len(addresses) == i {
   367  			addresses = append(addresses, &ciliumv2.AddressPair{})
   368  		}
   369  		if ipv4 := addr.IP.To4(); ipv4 != nil {
   370  			if addresses[i].IPV4 != "" {
   371  				addresses = append(addresses, &ciliumv2.AddressPair{})
   372  				i++
   373  			}
   374  			addresses[i].IPV4 = ipv4.String()
   375  		} else if ipv6 := addr.IP.To16(); ipv6 != nil {
   376  			if addresses[i].IPV6 != "" {
   377  				addresses = append(addresses, &ciliumv2.AddressPair{})
   378  				i++
   379  			}
   380  			addresses[i].IPV6 = ipv6.String()
   381  		}
   382  	}
   383  
   384  	namespace := id.Labels[k8sConst.PodNamespaceLabel].Value
   385  
   386  	var localCEP *ciliumv2.CiliumEndpoint
   387  	for retryCount := 0; retryCount < maxRetryCount; retryCount++ {
   388  		log.Info("Getting Node during an CEP update")
   389  		nr, err := m.ciliumClient.CiliumV2().CiliumNodes().Get(context.TODO(), name, metav1.GetOptions{})
   390  		if err != nil {
   391  			log.WithError(err).Warn("Unable to get CiliumNode resource, will retry")
   392  			continue
   393  		}
   394  		log.Info("Getting CEP during an initialization")
   395  		_, err = m.ciliumClient.CiliumV2().CiliumEndpoints(namespace).Get(context.TODO(), name, metav1.GetOptions{})
   396  		if err != nil {
   397  			cep := &ciliumv2.CiliumEndpoint{
   398  				ObjectMeta: metav1.ObjectMeta{
   399  					Name:      name,
   400  					Namespace: namespace,
   401  					OwnerReferences: []metav1.OwnerReference{{
   402  						APIVersion: "cilium.io/v2",
   403  						Kind:       "CiliumNode",
   404  						Name:       nr.ObjectMeta.Name,
   405  						UID:        nr.ObjectMeta.UID,
   406  					}},
   407  					Labels: map[string]string{
   408  						"name": name,
   409  					},
   410  				},
   411  			}
   412  			if localCEP, err = m.ciliumClient.CiliumV2().CiliumEndpoints(namespace).Create(context.TODO(), cep, metav1.CreateOptions{}); err != nil {
   413  				if errors.IsConflict(err) {
   414  					log.WithError(err).Warn("Unable to create CiliumEndpoint resource, will retry")
   415  					continue
   416  				}
   417  				log.WithError(err).Fatal("Unable to create CiliumEndpoint resource")
   418  			}
   419  			js, _ := json.Marshal(cep)
   420  			log.Infof("Successfully created CiliumEndpoint resource %s/%s: %s", namespace, name, js)
   421  			js, _ = json.Marshal(localCEP)
   422  			log.Infof("Returned CiliumEndpoint resource %s/%s: %s", namespace, name, js)
   423  		}
   424  
   425  		mdl := ciliumv2.EndpointStatus{
   426  			ID: int64(1),
   427  			// ExternalIdentifiers: e.getModelEndpointIdentitiersRLocked(),
   428  			Identity: getEndpointIdentity(identitymodel.CreateModel(id)),
   429  			Networking: &ciliumv2.EndpointNetworking{
   430  				Addressing: addresses,
   431  				NodeIP:     nodeIP.String(),
   432  			},
   433  			State: string(models.EndpointStateReady), // XXX
   434  			// Encryption: ciliumv2.EncryptionSpec{Key: int(n.GetIPsecKeyIdentity())},
   435  			// NamedPorts: e.getNamedPortsModel(),
   436  		}
   437  
   438  		replaceCEPStatus := []k8s.JSONPatch{
   439  			{
   440  				OP:    "replace",
   441  				Path:  "/status",
   442  				Value: mdl,
   443  			},
   444  		}
   445  		var createStatusPatch []byte
   446  		createStatusPatch, err = json.Marshal(replaceCEPStatus)
   447  		if err != nil {
   448  			log.WithError(err).Fatalf("json.Marshal(%v) failed", replaceCEPStatus)
   449  		}
   450  		localCEP, err = m.ciliumClient.CiliumV2().CiliumEndpoints(namespace).Patch(context.TODO(), name,
   451  			types.JSONPatchType, createStatusPatch, metav1.PatchOptions{})
   452  		if err != nil {
   453  			if errors.IsConflict(err) {
   454  				log.WithError(err).Warn("Unable to update CiliumEndpoint resource, will retry")
   455  				continue
   456  			}
   457  			log.WithError(err).Fatal("Unable to update CiliumEndpoint resource")
   458  		} else {
   459  			log.Infof("Successfully patched CiliumEndpoint resource: %v", *localCEP)
   460  			return
   461  		}
   462  	}
   463  	log.Fatal("Could not create or update CiliumEndpoint resource, despite retries")
   464  }
   465  
   466  func getEndpointIdentity(mdlIdentity *models.Identity) (identity *ciliumv2.EndpointIdentity) {
   467  	if mdlIdentity == nil {
   468  		return
   469  	}
   470  	identity = &ciliumv2.EndpointIdentity{
   471  		ID: mdlIdentity.ID,
   472  	}
   473  
   474  	identity.Labels = make([]string, len(mdlIdentity.Labels))
   475  	copy(identity.Labels, mdlIdentity.Labels)
   476  	sort.Strings(identity.Labels)
   477  	log.Infof("Got Endpoint Identity: %v", *identity)
   478  	return
   479  }
   480  
   481  // syncKVStoreKey synchronizes a key to the kvstore
   482  func (m *VMManager) syncKVStoreKey(ctx context.Context, key store.LocalKey) error {
   483  	jsonValue, err := key.Marshal()
   484  	if err != nil {
   485  		return err
   486  	}
   487  
   488  	// Update key in kvstore, overwrite an eventual existing key, attach
   489  	// lease to expire entry when agent dies and never comes back up.
   490  	k := path.Join(nodeStore.NodeRegisterStorePrefix, key.GetKeyName())
   491  	if _, err := m.backend.UpdateIfDifferent(ctx, k, jsonValue, true); err != nil {
   492  		return err
   493  	}
   494  
   495  	return nil
   496  }