github.com/cilium/cilium@v1.16.2/pkg/k8s/version/version.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  // Package version keeps track of the Kubernetes version the client is
     5  // connected to
     6  package version
     7  
     8  import (
     9  	"context"
    10  	"fmt"
    11  
    12  	"github.com/blang/semver/v4"
    13  	"k8s.io/apimachinery/pkg/api/errors"
    14  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    15  	"k8s.io/client-go/kubernetes"
    16  
    17  	"github.com/cilium/cilium/pkg/lock"
    18  	"github.com/cilium/cilium/pkg/logging"
    19  	"github.com/cilium/cilium/pkg/logging/logfields"
    20  	"github.com/cilium/cilium/pkg/versioncheck"
    21  )
    22  
    23  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "k8s")
    24  
    25  // ServerCapabilities is a list of server capabilities derived based on
    26  // version, the Kubernetes discovery API, or probing of individual API
    27  // endpoints.
    28  type ServerCapabilities struct {
    29  	// MinimalVersionMet is true when the minimal version of Kubernetes
    30  	// required to run Cilium has been met
    31  	MinimalVersionMet bool
    32  
    33  	// EndpointSlice is the ability of k8s server to support endpoint slices
    34  	EndpointSlice bool
    35  
    36  	// EndpointSliceV1 is the ability of k8s server to support endpoint slices
    37  	// v1. This version was introduced in K8s v1.21.0.
    38  	EndpointSliceV1 bool
    39  
    40  	// LeasesResourceLock is the ability of K8s server to support Lease type
    41  	// from coordination.k8s.io/v1 API for leader election purposes(currently only in operator).
    42  	// https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#lease-v1-coordination-k8s-io
    43  	//
    44  	// This capability was introduced in K8s version 1.14, prior to which
    45  	// we don't support HA mode for the cilium-operator.
    46  	LeasesResourceLock bool
    47  }
    48  
    49  type cachedVersion struct {
    50  	mutex        lock.RWMutex
    51  	capabilities ServerCapabilities
    52  	version      semver.Version
    53  }
    54  
    55  const (
    56  	// MinimalVersionConstraint is the minimal version that Cilium supports to
    57  	// run kubernetes.
    58  	MinimalVersionConstraint = "1.16.0"
    59  )
    60  
    61  var (
    62  	cached = cachedVersion{}
    63  
    64  	discoveryAPIGroupV1beta1 = "discovery.k8s.io/v1beta1"
    65  	discoveryAPIGroupV1      = "discovery.k8s.io/v1"
    66  	coordinationV1APIGroup   = "coordination.k8s.io/v1"
    67  	endpointSliceKind        = "EndpointSlice"
    68  	leaseKind                = "Lease"
    69  
    70  	// Constraint to check support for discovery/v1 types. Support for v1
    71  	// discovery was introduced in K8s version 1.21.
    72  	isGEThanAPIDiscoveryV1 = versioncheck.MustCompile(">=1.21.0")
    73  
    74  	// Constraint to check support for discovery/v1beta1 types. Support for
    75  	// v1beta1 discovery was introduced in K8s version 1.17.
    76  	isGEThanAPIDiscoveryV1Beta1 = versioncheck.MustCompile(">=1.17.0")
    77  
    78  	// isGEThanMinimalVersionConstraint is the minimal version required to run
    79  	// Cilium
    80  	isGEThanMinimalVersionConstraint = versioncheck.MustCompile(">=" + MinimalVersionConstraint)
    81  )
    82  
    83  // Version returns the version of the Kubernetes apiserver
    84  func Version() semver.Version {
    85  	cached.mutex.RLock()
    86  	c := cached.version
    87  	cached.mutex.RUnlock()
    88  	return c
    89  }
    90  
    91  // Capabilities returns the capabilities of the Kubernetes apiserver
    92  func Capabilities() ServerCapabilities {
    93  	cached.mutex.RLock()
    94  	c := cached.capabilities
    95  	cached.mutex.RUnlock()
    96  	return c
    97  }
    98  
    99  func DisableLeasesResourceLock() {
   100  	cached.mutex.Lock()
   101  	defer cached.mutex.Unlock()
   102  	cached.capabilities.LeasesResourceLock = false
   103  }
   104  
   105  func updateVersion(version semver.Version) {
   106  	cached.mutex.Lock()
   107  	defer cached.mutex.Unlock()
   108  
   109  	cached.version = version
   110  
   111  	cached.capabilities.MinimalVersionMet = isGEThanMinimalVersionConstraint(version)
   112  	cached.capabilities.EndpointSliceV1 = isGEThanAPIDiscoveryV1(version)
   113  	cached.capabilities.EndpointSlice = isGEThanAPIDiscoveryV1Beta1(version)
   114  }
   115  
   116  func updateServerGroupsAndResources(apiResourceLists []*metav1.APIResourceList) {
   117  	cached.mutex.Lock()
   118  	defer cached.mutex.Unlock()
   119  
   120  	cached.capabilities.EndpointSlice = false
   121  	cached.capabilities.EndpointSliceV1 = false
   122  	cached.capabilities.LeasesResourceLock = false
   123  	for _, rscList := range apiResourceLists {
   124  		if rscList.GroupVersion == discoveryAPIGroupV1beta1 {
   125  			for _, rsc := range rscList.APIResources {
   126  				if rsc.Kind == endpointSliceKind {
   127  					cached.capabilities.EndpointSlice = true
   128  					break
   129  				}
   130  			}
   131  		}
   132  		if rscList.GroupVersion == discoveryAPIGroupV1 {
   133  			for _, rsc := range rscList.APIResources {
   134  				if rsc.Kind == endpointSliceKind {
   135  					cached.capabilities.EndpointSlice = true
   136  					cached.capabilities.EndpointSliceV1 = true
   137  					break
   138  				}
   139  			}
   140  		}
   141  
   142  		if rscList.GroupVersion == coordinationV1APIGroup {
   143  			for _, rsc := range rscList.APIResources {
   144  				if rsc.Kind == leaseKind {
   145  					cached.capabilities.LeasesResourceLock = true
   146  					break
   147  				}
   148  			}
   149  		}
   150  	}
   151  }
   152  
   153  // Force forces the use of a specific version
   154  func Force(version string) error {
   155  	ver, err := versioncheck.Version(version)
   156  	if err != nil {
   157  		return err
   158  	}
   159  	updateVersion(ver)
   160  	return nil
   161  }
   162  
   163  func endpointSlicesFallbackDiscovery(client kubernetes.Interface) error {
   164  	// If a k8s version with discovery v1 is used, then do not even bother
   165  	// checking for v1beta1
   166  	cached.mutex.Lock()
   167  	if cached.capabilities.EndpointSliceV1 {
   168  		cached.capabilities.EndpointSlice = true
   169  		cached.mutex.Unlock()
   170  		return nil
   171  	}
   172  	cached.mutex.Unlock()
   173  
   174  	// Discovery of API groups requires the API services of the apiserver to be
   175  	// healthy. Such API services can depend on the readiness of regular pods
   176  	// which require Cilium to function correctly. By treating failure to
   177  	// discover API groups as fatal, a critial loop can be entered in which
   178  	// Cilium cannot start because the API groups can't be discovered.
   179  	//
   180  	// Here we acknowledge the lack of discovery ability as non Fatal and fall back to probing
   181  	// the API directly.
   182  	_, err := client.DiscoveryV1beta1().EndpointSlices("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{})
   183  	if err == nil {
   184  		cached.mutex.Lock()
   185  		cached.capabilities.EndpointSlice = true
   186  		cached.mutex.Unlock()
   187  		return nil
   188  	}
   189  
   190  	if errors.IsNotFound(err) {
   191  		log.WithError(err).Info("Unable to retrieve EndpointSlices for default/kubernetes. Disabling EndpointSlices")
   192  		// StatusNotFound is a safe error, EndpointSlices are
   193  		// disabled and the agent can continue.
   194  		return nil
   195  	}
   196  
   197  	// Unknown error, we can't derive whether to enable or disable
   198  	// EndpointSlices and need to error out.
   199  	return fmt.Errorf("unable to validate EndpointSlices support: %w", err)
   200  }
   201  
   202  func leasesFallbackDiscovery(client kubernetes.Interface, apiDiscoveryEnabled bool) error {
   203  	// apiDiscoveryEnabled is used to fallback leases discovery to directly
   204  	// probing the API when we cannot discover API groups.
   205  	// We require to check for Leases capabilities in operator only, which uses Leases
   206  	// for leader election purposes in HA mode.
   207  	if !apiDiscoveryEnabled {
   208  		log.Debugf("Skipping Leases support fallback discovery")
   209  		return nil
   210  	}
   211  
   212  	// Similar to endpointSlicesFallbackDiscovery here we fallback to probing the Kubernetes
   213  	// API directly. `kube-controller-manager` creates a lease in the kube-system namespace
   214  	// and here we try and see if that Lease exists.
   215  	_, err := client.CoordinationV1().Leases("kube-system").Get(context.TODO(), "kube-controller-manager", metav1.GetOptions{})
   216  	if err == nil {
   217  		cached.mutex.Lock()
   218  		cached.capabilities.LeasesResourceLock = true
   219  		cached.mutex.Unlock()
   220  		return nil
   221  	}
   222  
   223  	if errors.IsNotFound(err) {
   224  		log.WithError(err).Info("Unable to retrieve Leases for kube-controller-manager. Disabling LeasesResourceLock")
   225  		// StatusNotFound is a safe error, Leases are
   226  		// disabled and the agent can continue
   227  		return nil
   228  	}
   229  
   230  	// Unknown error, we can't derive whether to enable or disable
   231  	// LeasesResourceLock and need to error out
   232  	return fmt.Errorf("unable to validate LeasesResourceLock support: %w", err)
   233  }
   234  
   235  func updateK8sServerVersion(client kubernetes.Interface) error {
   236  	var ver semver.Version
   237  
   238  	sv, err := client.Discovery().ServerVersion()
   239  	if err != nil {
   240  		return err
   241  	}
   242  
   243  	// Try GitVersion first. In case of error fallback to MajorMinor
   244  	if sv.GitVersion != "" {
   245  		// This is a string like "v1.9.0"
   246  		ver, err = versioncheck.Version(sv.GitVersion)
   247  		if err == nil {
   248  			updateVersion(ver)
   249  			return nil
   250  		}
   251  	}
   252  
   253  	if sv.Major != "" && sv.Minor != "" {
   254  		ver, err = versioncheck.Version(fmt.Sprintf("%s.%s", sv.Major, sv.Minor))
   255  		if err == nil {
   256  			updateVersion(ver)
   257  			return nil
   258  		}
   259  	}
   260  
   261  	return fmt.Errorf("cannot parse k8s server version from %+v: %w", sv, err)
   262  }
   263  
   264  // Update retrieves the version of the Kubernetes apiserver and derives the
   265  // capabilities. This function must be called after connectivity to the
   266  // apiserver has been established.
   267  //
   268  // Discovery of capabilities only works if the discovery API of the apiserver
   269  // is functional. If it is not available, a warning is logged and the discovery
   270  // falls back to probing individual API endpoints.
   271  func Update(client kubernetes.Interface, apiDiscoveryEnabled bool) error {
   272  	err := updateK8sServerVersion(client)
   273  	if err != nil {
   274  		return err
   275  	}
   276  
   277  	if apiDiscoveryEnabled {
   278  		// Discovery of API groups requires the API services of the
   279  		// apiserver to be healthy. Such API services can depend on the
   280  		// readiness of regular pods which require Cilium to function
   281  		// correctly. By treating failure to discover API groups as
   282  		// fatal, a critical loop can be entered in which Cilium cannot
   283  		// start because the API groups can't be discovered and th API
   284  		// groups will only become discoverable once Cilium is up.
   285  		_, apiResourceLists, err := client.Discovery().ServerGroupsAndResources()
   286  		if err != nil {
   287  			// It doesn't make sense to retry the retrieval of this
   288  			// information at a later point because the capabilities are
   289  			// primiarly used while the agent is starting up. Instead, fall
   290  			// back to probing API endpoints directly.
   291  			log.WithError(err).Warning("Unable to discover API groups and resources")
   292  			if err := endpointSlicesFallbackDiscovery(client); err != nil {
   293  				return err
   294  			}
   295  
   296  			return leasesFallbackDiscovery(client, apiDiscoveryEnabled)
   297  		}
   298  
   299  		updateServerGroupsAndResources(apiResourceLists)
   300  	} else {
   301  		if err := endpointSlicesFallbackDiscovery(client); err != nil {
   302  			return err
   303  		}
   304  
   305  		return leasesFallbackDiscovery(client, apiDiscoveryEnabled)
   306  	}
   307  
   308  	return nil
   309  }