github.com/cilium/cilium@v1.16.2/pkg/k8s/version/version.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 // Package version keeps track of the Kubernetes version the client is 5 // connected to 6 package version 7 8 import ( 9 "context" 10 "fmt" 11 12 "github.com/blang/semver/v4" 13 "k8s.io/apimachinery/pkg/api/errors" 14 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 "k8s.io/client-go/kubernetes" 16 17 "github.com/cilium/cilium/pkg/lock" 18 "github.com/cilium/cilium/pkg/logging" 19 "github.com/cilium/cilium/pkg/logging/logfields" 20 "github.com/cilium/cilium/pkg/versioncheck" 21 ) 22 23 var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "k8s") 24 25 // ServerCapabilities is a list of server capabilities derived based on 26 // version, the Kubernetes discovery API, or probing of individual API 27 // endpoints. 28 type ServerCapabilities struct { 29 // MinimalVersionMet is true when the minimal version of Kubernetes 30 // required to run Cilium has been met 31 MinimalVersionMet bool 32 33 // EndpointSlice is the ability of k8s server to support endpoint slices 34 EndpointSlice bool 35 36 // EndpointSliceV1 is the ability of k8s server to support endpoint slices 37 // v1. This version was introduced in K8s v1.21.0. 38 EndpointSliceV1 bool 39 40 // LeasesResourceLock is the ability of K8s server to support Lease type 41 // from coordination.k8s.io/v1 API for leader election purposes(currently only in operator). 42 // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#lease-v1-coordination-k8s-io 43 // 44 // This capability was introduced in K8s version 1.14, prior to which 45 // we don't support HA mode for the cilium-operator. 46 LeasesResourceLock bool 47 } 48 49 type cachedVersion struct { 50 mutex lock.RWMutex 51 capabilities ServerCapabilities 52 version semver.Version 53 } 54 55 const ( 56 // MinimalVersionConstraint is the minimal version that Cilium supports to 57 // run kubernetes. 58 MinimalVersionConstraint = "1.16.0" 59 ) 60 61 var ( 62 cached = cachedVersion{} 63 64 discoveryAPIGroupV1beta1 = "discovery.k8s.io/v1beta1" 65 discoveryAPIGroupV1 = "discovery.k8s.io/v1" 66 coordinationV1APIGroup = "coordination.k8s.io/v1" 67 endpointSliceKind = "EndpointSlice" 68 leaseKind = "Lease" 69 70 // Constraint to check support for discovery/v1 types. Support for v1 71 // discovery was introduced in K8s version 1.21. 72 isGEThanAPIDiscoveryV1 = versioncheck.MustCompile(">=1.21.0") 73 74 // Constraint to check support for discovery/v1beta1 types. Support for 75 // v1beta1 discovery was introduced in K8s version 1.17. 76 isGEThanAPIDiscoveryV1Beta1 = versioncheck.MustCompile(">=1.17.0") 77 78 // isGEThanMinimalVersionConstraint is the minimal version required to run 79 // Cilium 80 isGEThanMinimalVersionConstraint = versioncheck.MustCompile(">=" + MinimalVersionConstraint) 81 ) 82 83 // Version returns the version of the Kubernetes apiserver 84 func Version() semver.Version { 85 cached.mutex.RLock() 86 c := cached.version 87 cached.mutex.RUnlock() 88 return c 89 } 90 91 // Capabilities returns the capabilities of the Kubernetes apiserver 92 func Capabilities() ServerCapabilities { 93 cached.mutex.RLock() 94 c := cached.capabilities 95 cached.mutex.RUnlock() 96 return c 97 } 98 99 func DisableLeasesResourceLock() { 100 cached.mutex.Lock() 101 defer cached.mutex.Unlock() 102 cached.capabilities.LeasesResourceLock = false 103 } 104 105 func updateVersion(version semver.Version) { 106 cached.mutex.Lock() 107 defer cached.mutex.Unlock() 108 109 cached.version = version 110 111 cached.capabilities.MinimalVersionMet = isGEThanMinimalVersionConstraint(version) 112 cached.capabilities.EndpointSliceV1 = isGEThanAPIDiscoveryV1(version) 113 cached.capabilities.EndpointSlice = isGEThanAPIDiscoveryV1Beta1(version) 114 } 115 116 func updateServerGroupsAndResources(apiResourceLists []*metav1.APIResourceList) { 117 cached.mutex.Lock() 118 defer cached.mutex.Unlock() 119 120 cached.capabilities.EndpointSlice = false 121 cached.capabilities.EndpointSliceV1 = false 122 cached.capabilities.LeasesResourceLock = false 123 for _, rscList := range apiResourceLists { 124 if rscList.GroupVersion == discoveryAPIGroupV1beta1 { 125 for _, rsc := range rscList.APIResources { 126 if rsc.Kind == endpointSliceKind { 127 cached.capabilities.EndpointSlice = true 128 break 129 } 130 } 131 } 132 if rscList.GroupVersion == discoveryAPIGroupV1 { 133 for _, rsc := range rscList.APIResources { 134 if rsc.Kind == endpointSliceKind { 135 cached.capabilities.EndpointSlice = true 136 cached.capabilities.EndpointSliceV1 = true 137 break 138 } 139 } 140 } 141 142 if rscList.GroupVersion == coordinationV1APIGroup { 143 for _, rsc := range rscList.APIResources { 144 if rsc.Kind == leaseKind { 145 cached.capabilities.LeasesResourceLock = true 146 break 147 } 148 } 149 } 150 } 151 } 152 153 // Force forces the use of a specific version 154 func Force(version string) error { 155 ver, err := versioncheck.Version(version) 156 if err != nil { 157 return err 158 } 159 updateVersion(ver) 160 return nil 161 } 162 163 func endpointSlicesFallbackDiscovery(client kubernetes.Interface) error { 164 // If a k8s version with discovery v1 is used, then do not even bother 165 // checking for v1beta1 166 cached.mutex.Lock() 167 if cached.capabilities.EndpointSliceV1 { 168 cached.capabilities.EndpointSlice = true 169 cached.mutex.Unlock() 170 return nil 171 } 172 cached.mutex.Unlock() 173 174 // Discovery of API groups requires the API services of the apiserver to be 175 // healthy. Such API services can depend on the readiness of regular pods 176 // which require Cilium to function correctly. By treating failure to 177 // discover API groups as fatal, a critial loop can be entered in which 178 // Cilium cannot start because the API groups can't be discovered. 179 // 180 // Here we acknowledge the lack of discovery ability as non Fatal and fall back to probing 181 // the API directly. 182 _, err := client.DiscoveryV1beta1().EndpointSlices("default").Get(context.TODO(), "kubernetes", metav1.GetOptions{}) 183 if err == nil { 184 cached.mutex.Lock() 185 cached.capabilities.EndpointSlice = true 186 cached.mutex.Unlock() 187 return nil 188 } 189 190 if errors.IsNotFound(err) { 191 log.WithError(err).Info("Unable to retrieve EndpointSlices for default/kubernetes. Disabling EndpointSlices") 192 // StatusNotFound is a safe error, EndpointSlices are 193 // disabled and the agent can continue. 194 return nil 195 } 196 197 // Unknown error, we can't derive whether to enable or disable 198 // EndpointSlices and need to error out. 199 return fmt.Errorf("unable to validate EndpointSlices support: %w", err) 200 } 201 202 func leasesFallbackDiscovery(client kubernetes.Interface, apiDiscoveryEnabled bool) error { 203 // apiDiscoveryEnabled is used to fallback leases discovery to directly 204 // probing the API when we cannot discover API groups. 205 // We require to check for Leases capabilities in operator only, which uses Leases 206 // for leader election purposes in HA mode. 207 if !apiDiscoveryEnabled { 208 log.Debugf("Skipping Leases support fallback discovery") 209 return nil 210 } 211 212 // Similar to endpointSlicesFallbackDiscovery here we fallback to probing the Kubernetes 213 // API directly. `kube-controller-manager` creates a lease in the kube-system namespace 214 // and here we try and see if that Lease exists. 215 _, err := client.CoordinationV1().Leases("kube-system").Get(context.TODO(), "kube-controller-manager", metav1.GetOptions{}) 216 if err == nil { 217 cached.mutex.Lock() 218 cached.capabilities.LeasesResourceLock = true 219 cached.mutex.Unlock() 220 return nil 221 } 222 223 if errors.IsNotFound(err) { 224 log.WithError(err).Info("Unable to retrieve Leases for kube-controller-manager. Disabling LeasesResourceLock") 225 // StatusNotFound is a safe error, Leases are 226 // disabled and the agent can continue 227 return nil 228 } 229 230 // Unknown error, we can't derive whether to enable or disable 231 // LeasesResourceLock and need to error out 232 return fmt.Errorf("unable to validate LeasesResourceLock support: %w", err) 233 } 234 235 func updateK8sServerVersion(client kubernetes.Interface) error { 236 var ver semver.Version 237 238 sv, err := client.Discovery().ServerVersion() 239 if err != nil { 240 return err 241 } 242 243 // Try GitVersion first. In case of error fallback to MajorMinor 244 if sv.GitVersion != "" { 245 // This is a string like "v1.9.0" 246 ver, err = versioncheck.Version(sv.GitVersion) 247 if err == nil { 248 updateVersion(ver) 249 return nil 250 } 251 } 252 253 if sv.Major != "" && sv.Minor != "" { 254 ver, err = versioncheck.Version(fmt.Sprintf("%s.%s", sv.Major, sv.Minor)) 255 if err == nil { 256 updateVersion(ver) 257 return nil 258 } 259 } 260 261 return fmt.Errorf("cannot parse k8s server version from %+v: %w", sv, err) 262 } 263 264 // Update retrieves the version of the Kubernetes apiserver and derives the 265 // capabilities. This function must be called after connectivity to the 266 // apiserver has been established. 267 // 268 // Discovery of capabilities only works if the discovery API of the apiserver 269 // is functional. If it is not available, a warning is logged and the discovery 270 // falls back to probing individual API endpoints. 271 func Update(client kubernetes.Interface, apiDiscoveryEnabled bool) error { 272 err := updateK8sServerVersion(client) 273 if err != nil { 274 return err 275 } 276 277 if apiDiscoveryEnabled { 278 // Discovery of API groups requires the API services of the 279 // apiserver to be healthy. Such API services can depend on the 280 // readiness of regular pods which require Cilium to function 281 // correctly. By treating failure to discover API groups as 282 // fatal, a critical loop can be entered in which Cilium cannot 283 // start because the API groups can't be discovered and th API 284 // groups will only become discoverable once Cilium is up. 285 _, apiResourceLists, err := client.Discovery().ServerGroupsAndResources() 286 if err != nil { 287 // It doesn't make sense to retry the retrieval of this 288 // information at a later point because the capabilities are 289 // primiarly used while the agent is starting up. Instead, fall 290 // back to probing API endpoints directly. 291 log.WithError(err).Warning("Unable to discover API groups and resources") 292 if err := endpointSlicesFallbackDiscovery(client); err != nil { 293 return err 294 } 295 296 return leasesFallbackDiscovery(client, apiDiscoveryEnabled) 297 } 298 299 updateServerGroupsAndResources(apiResourceLists) 300 } else { 301 if err := endpointSlicesFallbackDiscovery(client); err != nil { 302 return err 303 } 304 305 return leasesFallbackDiscovery(client, apiDiscoveryEnabled) 306 } 307 308 return nil 309 }