sigs.k8s.io/cluster-api@v1.7.1/cmd/clusterctl/client/cluster/upgrader.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cluster 18 19 import ( 20 "context" 21 "sort" 22 "time" 23 24 "github.com/pkg/errors" 25 appsv1 "k8s.io/api/apps/v1" 26 "k8s.io/apimachinery/pkg/util/sets" 27 "k8s.io/apimachinery/pkg/util/version" 28 "k8s.io/apimachinery/pkg/util/wait" 29 "k8s.io/utils/ptr" 30 "sigs.k8s.io/controller-runtime/pkg/client" 31 32 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 33 clusterctlv1 "sigs.k8s.io/cluster-api/cmd/clusterctl/api/v1alpha3" 34 "sigs.k8s.io/cluster-api/cmd/clusterctl/client/config" 35 "sigs.k8s.io/cluster-api/cmd/clusterctl/client/repository" 36 logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log" 37 ) 38 39 // ProviderUpgrader defines methods for supporting provider upgrade. 40 type ProviderUpgrader interface { 41 // Plan returns a set of suggested Upgrade plans for the management cluster. 42 Plan(ctx context.Context) ([]UpgradePlan, error) 43 44 // ApplyPlan executes an upgrade following an UpgradePlan generated by clusterctl. 45 ApplyPlan(ctx context.Context, opts UpgradeOptions, clusterAPIVersion string) error 46 47 // ApplyCustomPlan plan executes an upgrade using the UpgradeItems provided by the user. 48 ApplyCustomPlan(ctx context.Context, opts UpgradeOptions, providersToUpgrade ...UpgradeItem) error 49 } 50 51 // UpgradePlan defines a list of possible upgrade targets for a management cluster. 52 type UpgradePlan struct { 53 Contract string 54 Providers []UpgradeItem 55 } 56 57 // UpgradeOptions defines the options used to upgrade installation. 58 type UpgradeOptions struct { 59 WaitProviders bool 60 WaitProviderTimeout time.Duration 61 } 62 63 // isPartialUpgrade returns true if at least one upgradeItem in the plan does not have a target version. 64 func (u *UpgradePlan) isPartialUpgrade() bool { 65 for _, i := range u.Providers { 66 if i.NextVersion == "" { 67 return true 68 } 69 } 70 return false 71 } 72 73 // UpgradeItem defines a possible upgrade target for a provider in the management cluster. 74 type UpgradeItem struct { 75 clusterctlv1.Provider 76 NextVersion string 77 } 78 79 // UpgradeRef returns a string identifying the upgrade item; this string is derived by the provider. 80 func (u *UpgradeItem) UpgradeRef() string { 81 return u.InstanceName() 82 } 83 84 type providerUpgrader struct { 85 configClient config.Client 86 proxy Proxy 87 repositoryClientFactory RepositoryClientFactory 88 providerInventory InventoryClient 89 providerComponents ComponentsClient 90 } 91 92 var _ ProviderUpgrader = &providerUpgrader{} 93 94 func (u *providerUpgrader) Plan(ctx context.Context) ([]UpgradePlan, error) { 95 log := logf.Log 96 log.Info("Checking new release availability...") 97 98 providerList, err := u.providerInventory.List(ctx) 99 if err != nil { 100 return nil, err 101 } 102 103 // The core provider is driving all the plan logic for entire management cluster, because all the providers 104 // are expected to support the same API Version of Cluster API (contract). 105 // e.g if the core provider supports v1alpha4, all the providers in the same management cluster should support v1alpha4 as well; 106 // all the providers in the management cluster can upgrade to the latest release supporting v1alpha4, or if available, 107 // all the providers can upgrade to the latest release supporting v1alpha5 (not supported in current clusterctl release, 108 // but upgrade plan should report these options) 109 110 // Gets the upgrade info for the core provider. 111 coreProviders := providerList.FilterCore() 112 if len(coreProviders) != 1 { 113 return nil, errors.Errorf("invalid management cluster: there should a core provider, found %d", len(coreProviders)) 114 } 115 coreProvider := coreProviders[0] 116 117 coreUpgradeInfo, err := u.getUpgradeInfo(ctx, coreProvider) 118 if err != nil { 119 return nil, err 120 } 121 122 // Identifies the API Version of Cluster API (contract) that we should consider for the management cluster update (Nb. the core provider is driving the entire management cluster). 123 // This includes the current contract and the new ones available, if any. 124 contractsForUpgrade := coreUpgradeInfo.getContractsForUpgrade() 125 if len(contractsForUpgrade) == 0 { 126 return nil, errors.Wrapf(err, "invalid metadata: unable to find the API Version of Cluster API (contract) supported by the %s provider", coreProvider.InstanceName()) 127 } 128 129 // Creates an UpgradePlan for each contract considered for upgrades; each upgrade plans contains 130 // an UpgradeItem for each provider defining the next available version with the target contract, if available. 131 // e.g. v1alpha4, cluster-api --> v0.4.1, kubeadm bootstrap --> v0.4.1, aws --> v0.X.2 132 // e.g. v1alpha4, cluster-api --> v0.5.1, kubeadm bootstrap --> v0.5.1, aws --> v0.Y.4 (not supported in current clusterctl release, but upgrade plan should report these options). 133 ret := make([]UpgradePlan, 0) 134 for _, contract := range contractsForUpgrade { 135 upgradePlan, err := u.getUpgradePlan(ctx, providerList.Items, contract) 136 if err != nil { 137 return nil, err 138 } 139 140 // If the upgrade plan is partial (at least one upgradeItem in the plan does not have a target version) and 141 // the upgrade plan requires a change of the contract for this management cluster, then drop it 142 // (all the provider in a management cluster are required to change contract at the same time). 143 if upgradePlan.isPartialUpgrade() && coreUpgradeInfo.currentContract != contract { 144 continue 145 } 146 147 ret = append(ret, *upgradePlan) 148 } 149 150 return ret, nil 151 } 152 153 func (u *providerUpgrader) ApplyPlan(ctx context.Context, opts UpgradeOptions, contract string) error { 154 if contract != clusterv1.GroupVersion.Version { 155 return errors.Errorf("current version of clusterctl could only upgrade to %s contract, requested %s", clusterv1.GroupVersion.Version, contract) 156 } 157 158 log := logf.Log 159 log.Info("Performing upgrade...") 160 161 // Gets the upgrade plan for the selected API Version of Cluster API (contract). 162 providerList, err := u.providerInventory.List(ctx) 163 if err != nil { 164 return err 165 } 166 167 upgradePlan, err := u.getUpgradePlan(ctx, providerList.Items, contract) 168 if err != nil { 169 return err 170 } 171 172 // Do the upgrade 173 return u.doUpgrade(ctx, upgradePlan, opts) 174 } 175 176 func (u *providerUpgrader) ApplyCustomPlan(ctx context.Context, opts UpgradeOptions, upgradeItems ...UpgradeItem) error { 177 log := logf.Log 178 log.Info("Performing upgrade...") 179 180 // Create a custom upgrade plan from the upgrade items, taking care of ensuring all the providers in a management 181 // cluster are consistent with the API Version of Cluster API (contract). 182 upgradePlan, err := u.createCustomPlan(ctx, upgradeItems) 183 if err != nil { 184 return err 185 } 186 187 // Do the upgrade 188 return u.doUpgrade(ctx, upgradePlan, opts) 189 } 190 191 // getUpgradePlan returns the upgrade plan for a specific set of providers/contract 192 // NB. this function is used both for upgrade plan and upgrade apply. 193 func (u *providerUpgrader) getUpgradePlan(ctx context.Context, providers []clusterctlv1.Provider, contract string) (*UpgradePlan, error) { 194 upgradeItems := []UpgradeItem{} 195 for _, provider := range providers { 196 // Gets the upgrade info for the provider. 197 providerUpgradeInfo, err := u.getUpgradeInfo(ctx, provider) 198 if err != nil { 199 return nil, err 200 } 201 202 // Identifies the next available version with the target contract for the provider, if available. 203 nextVersion := providerUpgradeInfo.getLatestNextVersion(contract) 204 205 // Append the upgrade item for the provider/with the target contract. 206 upgradeItems = append(upgradeItems, UpgradeItem{ 207 Provider: provider, 208 NextVersion: versionTag(nextVersion), 209 }) 210 } 211 212 return &UpgradePlan{ 213 Contract: contract, 214 Providers: upgradeItems, 215 }, nil 216 } 217 218 // createCustomPlan creates a custom upgrade plan from a set of upgrade items, taking care of ensuring all the providers 219 // in a management cluster are consistent with the API Version of Cluster API (contract). 220 func (u *providerUpgrader) createCustomPlan(ctx context.Context, upgradeItems []UpgradeItem) (*UpgradePlan, error) { 221 // Gets the API Version of Cluster API (contract). 222 // The this is required to ensure all the providers in a management cluster are consistent with the contract supported by the core provider. 223 // e.g if the core provider is v1beta1, all the provider should be v1beta1 as well. 224 225 // The target contract is derived from the current version of the core provider, or, if the core provider is included in the upgrade list, 226 // from its target version. 227 providerList, err := u.providerInventory.List(ctx) 228 if err != nil { 229 return nil, err 230 } 231 coreProviders := providerList.FilterCore() 232 if len(coreProviders) != 1 { 233 return nil, errors.Errorf("invalid management cluster: there should a core provider, found %d", len(coreProviders)) 234 } 235 coreProvider := coreProviders[0] 236 237 targetCoreProviderVersion := coreProvider.Version 238 for _, providerToUpgrade := range upgradeItems { 239 if providerToUpgrade.InstanceName() == coreProvider.InstanceName() { 240 targetCoreProviderVersion = providerToUpgrade.NextVersion 241 break 242 } 243 } 244 245 targetContract, err := u.getProviderContractByVersion(ctx, coreProvider, targetCoreProviderVersion) 246 if err != nil { 247 return nil, err 248 } 249 250 if targetContract != clusterv1.GroupVersion.Version { 251 return nil, errors.Errorf("current version of clusterctl could only upgrade to %s contract, requested %s", clusterv1.GroupVersion.Version, targetContract) 252 } 253 254 // Builds the custom upgrade plan, by adding all the upgrade items after checking consistency with the targetContract. 255 upgradeInstanceNames := sets.Set[string]{} 256 upgradePlan := &UpgradePlan{ 257 Contract: targetContract, 258 } 259 260 for _, upgradeItem := range upgradeItems { 261 // Match the upgrade item with the corresponding provider in the management cluster 262 var provider *clusterctlv1.Provider 263 for i := range providerList.Items { 264 if providerList.Items[i].InstanceName() == upgradeItem.InstanceName() { 265 provider = &providerList.Items[i] 266 break 267 } 268 } 269 if provider == nil { 270 return nil, errors.Errorf("unable to complete that upgrade: the provider %s in not part of the management cluster", upgradeItem.InstanceName()) 271 } 272 273 // Retrieves the contract that is supported by the target version of the provider. 274 contract, err := u.getProviderContractByVersion(ctx, *provider, upgradeItem.NextVersion) 275 if err != nil { 276 return nil, err 277 } 278 279 if contract != targetContract { 280 return nil, errors.Errorf("unable to complete that upgrade: the target version for the provider %s supports the %s API Version of Cluster API (contract), while the management cluster is using %s", upgradeItem.InstanceName(), contract, targetContract) 281 } 282 283 upgradePlan.Providers = append(upgradePlan.Providers, upgradeItem) 284 upgradeInstanceNames.Insert(upgradeItem.InstanceName()) 285 } 286 287 // Before doing upgrades, checks if other providers in the management cluster are lagging behind the target contract. 288 for _, provider := range providerList.Items { 289 // skip providers already included in the upgrade plan 290 if upgradeInstanceNames.Has(provider.InstanceName()) { 291 continue 292 } 293 294 // Retrieves the contract that is supported by the current version of the provider. 295 contract, err := u.getProviderContractByVersion(ctx, provider, provider.Version) 296 if err != nil { 297 return nil, err 298 } 299 300 if contract != targetContract { 301 return nil, errors.Errorf("unable to complete that upgrade: the provider %s supports the %s API Version of Cluster API (contract), while the management cluster is being updated to %s. Please include the %[1]s provider in the upgrade", provider.InstanceName(), contract, targetContract) 302 } 303 } 304 return upgradePlan, nil 305 } 306 307 // getProviderContractByVersion returns the contract that a provider will support if updated to the given target version. 308 func (u *providerUpgrader) getProviderContractByVersion(ctx context.Context, provider clusterctlv1.Provider, targetVersion string) (string, error) { 309 targetSemVersion, err := version.ParseSemantic(targetVersion) 310 if err != nil { 311 return "", errors.Wrapf(err, "failed to parse target version for the %s provider", provider.InstanceName()) 312 } 313 314 // Gets the metadata for the core Provider 315 upgradeInfo, err := u.getUpgradeInfo(ctx, provider) 316 if err != nil { 317 return "", err 318 } 319 320 releaseSeries := upgradeInfo.metadata.GetReleaseSeriesForVersion(targetSemVersion) 321 if releaseSeries == nil { 322 return "", errors.Errorf("invalid target version: version %s for the provider %s does not match any release series", targetVersion, provider.InstanceName()) 323 } 324 return releaseSeries.Contract, nil 325 } 326 327 // getUpgradeComponents returns the provider components for the selected target version. 328 func (u *providerUpgrader) getUpgradeComponents(ctx context.Context, provider UpgradeItem) (repository.Components, error) { 329 configRepository, err := u.configClient.Providers().Get(provider.ProviderName, provider.GetProviderType()) 330 if err != nil { 331 return nil, err 332 } 333 334 providerRepository, err := u.repositoryClientFactory(ctx, configRepository, u.configClient) 335 if err != nil { 336 return nil, err 337 } 338 339 options := repository.ComponentsOptions{ 340 Version: provider.NextVersion, 341 TargetNamespace: provider.Namespace, 342 } 343 components, err := providerRepository.Components().Get(ctx, options) 344 if err != nil { 345 return nil, err 346 } 347 return components, nil 348 } 349 350 func (u *providerUpgrader) doUpgrade(ctx context.Context, upgradePlan *UpgradePlan, opts UpgradeOptions) error { 351 // Check for multiple instances of the same provider if current contract is v1alpha3. 352 // TODO(killianmuldoon) Assess if we can remove this piece of code. 353 if upgradePlan.Contract == clusterv1.GroupVersion.Version { 354 if err := u.providerInventory.CheckSingleProviderInstance(ctx); err != nil { 355 return err 356 } 357 } 358 359 // Ensure Providers are updated in the following order: Core, Bootstrap, ControlPlane, Infrastructure. 360 providers := upgradePlan.Providers 361 sort.Slice(providers, func(a, b int) bool { 362 return providers[a].GetProviderType().Order() < providers[b].GetProviderType().Order() 363 }) 364 365 // Migrate CRs to latest CRD storage version, if necessary. 366 // Note: We have to do this before the providers are scaled down or deleted 367 // so conversion webhooks still work. 368 for _, upgradeItem := range providers { 369 // If there is not a specified next version, skip it (we are already up-to-date). 370 if upgradeItem.NextVersion == "" { 371 continue 372 } 373 374 // Gets the provider components for the target version. 375 components, err := u.getUpgradeComponents(ctx, upgradeItem) 376 if err != nil { 377 return err 378 } 379 380 c, err := u.proxy.NewClient(ctx) 381 if err != nil { 382 return err 383 } 384 385 if err := NewCRDMigrator(c).Run(ctx, components.Objs()); err != nil { 386 return err 387 } 388 } 389 390 // Scale down all providers. 391 // This is done to ensure all Pods of all "old" provider Deployments have been deleted. 392 // Otherwise it can happen that a provider Pod survives the upgrade because we create 393 // a new Deployment with the same selector directly after `Delete`. 394 // This can lead to a failed upgrade because: 395 // * new provider Pods fail to startup because they try to list resources. 396 // * list resources fails, because the API server hits the old provider Pod when trying to 397 // call the conversion webhook for those resources. 398 for _, upgradeItem := range providers { 399 // If there is not a specified next version, skip it (we are already up-to-date). 400 if upgradeItem.NextVersion == "" { 401 continue 402 } 403 404 // Scale down provider. 405 if err := u.scaleDownProvider(ctx, upgradeItem.Provider); err != nil { 406 return err 407 } 408 } 409 410 installQueue := []repository.Components{} 411 412 // Delete old providers and deploy new ones if necessary, i.e. there is a NextVersion. 413 for _, upgradeItem := range providers { 414 // If there is not a specified next version, skip it (we are already up-to-date). 415 if upgradeItem.NextVersion == "" { 416 continue 417 } 418 419 // Gets the provider components for the target version. 420 components, err := u.getUpgradeComponents(ctx, upgradeItem) 421 if err != nil { 422 return err 423 } 424 425 installQueue = append(installQueue, components) 426 427 // Delete the provider, preserving CRD, namespace and the inventory. 428 if err := u.providerComponents.Delete(ctx, DeleteOptions{ 429 Provider: upgradeItem.Provider, 430 IncludeNamespace: false, 431 IncludeCRDs: false, 432 SkipInventory: true, 433 }); err != nil { 434 return err 435 } 436 437 // Install the new version of the provider components. 438 if err := installComponentsAndUpdateInventory(ctx, components, u.providerComponents, u.providerInventory); err != nil { 439 return err 440 } 441 } 442 443 // Delete webhook namespace since it's not needed from v1alpha4. 444 if upgradePlan.Contract == clusterv1.GroupVersion.Version { 445 if err := u.providerComponents.DeleteWebhookNamespace(ctx); err != nil { 446 return err 447 } 448 } 449 450 return waitForProvidersReady(ctx, InstallOptions(opts), installQueue, u.proxy) 451 } 452 453 func (u *providerUpgrader) scaleDownProvider(ctx context.Context, provider clusterctlv1.Provider) error { 454 log := logf.Log 455 log.Info("Scaling down", "Provider", provider.Name, "Version", provider.Version, "Namespace", provider.Namespace) 456 457 cs, err := u.proxy.NewClient(ctx) 458 if err != nil { 459 return err 460 } 461 462 // Fetch all Deployments belonging to a provider. 463 deploymentList := &appsv1.DeploymentList{} 464 if err := cs.List(ctx, 465 deploymentList, 466 client.InNamespace(provider.Namespace), 467 client.MatchingLabels{ 468 clusterctlv1.ClusterctlLabel: "", 469 clusterv1.ProviderNameLabel: provider.ManifestLabel(), 470 }); err != nil { 471 return errors.Wrapf(err, "failed to list Deployments for provider %s", provider.Name) 472 } 473 474 // Scale down provider Deployments. 475 for _, deployment := range deploymentList.Items { 476 log.V(5).Info("Scaling down", "Deployment", deployment.Name, "Namespace", deployment.Namespace) 477 if err := scaleDownDeployment(ctx, cs, deployment); err != nil { 478 return err 479 } 480 } 481 482 return nil 483 } 484 485 // scaleDownDeployment scales down a Deployment to 0 and waits until all replicas have been deleted. 486 func scaleDownDeployment(ctx context.Context, c client.Client, deploy appsv1.Deployment) error { 487 if err := retryWithExponentialBackoff(ctx, newWriteBackoff(), func(ctx context.Context) error { 488 deployment := &appsv1.Deployment{} 489 if err := c.Get(ctx, client.ObjectKeyFromObject(&deploy), deployment); err != nil { 490 return errors.Wrapf(err, "failed to get Deployment/%s", deploy.GetName()) 491 } 492 493 // Deployment already scaled down, return early. 494 if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas == 0 { 495 return nil 496 } 497 498 // Scale down. 499 deployment.Spec.Replicas = ptr.To[int32](0) 500 if err := c.Update(ctx, deployment); err != nil { 501 return errors.Wrapf(err, "failed to update Deployment/%s", deploy.GetName()) 502 } 503 return nil 504 }); err != nil { 505 return errors.Wrapf(err, "failed to scale down Deployment") 506 } 507 508 deploymentScaleToZeroBackOff := wait.Backoff{ 509 Duration: 1 * time.Second, 510 Factor: 1, 511 Steps: 60, 512 Jitter: 0.4, 513 } 514 if err := retryWithExponentialBackoff(ctx, deploymentScaleToZeroBackOff, func(ctx context.Context) error { 515 deployment := &appsv1.Deployment{} 516 if err := c.Get(ctx, client.ObjectKeyFromObject(&deploy), deployment); err != nil { 517 return errors.Wrapf(err, "failed to get Deployment/%s", deploy.GetName()) 518 } 519 520 // Deployment is scaled down. 521 if deployment.Status.Replicas == 0 { 522 return nil 523 } 524 525 return errors.Errorf("Deployment still has %d replicas", deployment.Status.Replicas) 526 }); err != nil { 527 return errors.Wrapf(err, "failed to wait until Deployment is scaled down") 528 } 529 530 return nil 531 } 532 533 func newProviderUpgrader(configClient config.Client, proxy Proxy, repositoryClientFactory RepositoryClientFactory, providerInventory InventoryClient, providerComponents ComponentsClient) *providerUpgrader { 534 return &providerUpgrader{ 535 configClient: configClient, 536 proxy: proxy, 537 repositoryClientFactory: repositoryClientFactory, 538 providerInventory: providerInventory, 539 providerComponents: providerComponents, 540 } 541 }