sigs.k8s.io/cluster-api@v1.7.1/cmd/clusterctl/client/cluster/upgrader.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cluster
    18  
    19  import (
    20  	"context"
    21  	"sort"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	appsv1 "k8s.io/api/apps/v1"
    26  	"k8s.io/apimachinery/pkg/util/sets"
    27  	"k8s.io/apimachinery/pkg/util/version"
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	"k8s.io/utils/ptr"
    30  	"sigs.k8s.io/controller-runtime/pkg/client"
    31  
    32  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    33  	clusterctlv1 "sigs.k8s.io/cluster-api/cmd/clusterctl/api/v1alpha3"
    34  	"sigs.k8s.io/cluster-api/cmd/clusterctl/client/config"
    35  	"sigs.k8s.io/cluster-api/cmd/clusterctl/client/repository"
    36  	logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
    37  )
    38  
    39  // ProviderUpgrader defines methods for supporting provider upgrade.
    40  type ProviderUpgrader interface {
    41  	// Plan returns a set of suggested Upgrade plans for the management cluster.
    42  	Plan(ctx context.Context) ([]UpgradePlan, error)
    43  
    44  	// ApplyPlan executes an upgrade following an UpgradePlan generated by clusterctl.
    45  	ApplyPlan(ctx context.Context, opts UpgradeOptions, clusterAPIVersion string) error
    46  
    47  	// ApplyCustomPlan plan executes an upgrade using the UpgradeItems provided by the user.
    48  	ApplyCustomPlan(ctx context.Context, opts UpgradeOptions, providersToUpgrade ...UpgradeItem) error
    49  }
    50  
    51  // UpgradePlan defines a list of possible upgrade targets for a management cluster.
    52  type UpgradePlan struct {
    53  	Contract  string
    54  	Providers []UpgradeItem
    55  }
    56  
    57  // UpgradeOptions defines the options used to upgrade installation.
    58  type UpgradeOptions struct {
    59  	WaitProviders       bool
    60  	WaitProviderTimeout time.Duration
    61  }
    62  
    63  // isPartialUpgrade returns true if at least one upgradeItem in the plan does not have a target version.
    64  func (u *UpgradePlan) isPartialUpgrade() bool {
    65  	for _, i := range u.Providers {
    66  		if i.NextVersion == "" {
    67  			return true
    68  		}
    69  	}
    70  	return false
    71  }
    72  
    73  // UpgradeItem defines a possible upgrade target for a provider in the management cluster.
    74  type UpgradeItem struct {
    75  	clusterctlv1.Provider
    76  	NextVersion string
    77  }
    78  
    79  // UpgradeRef returns a string identifying the upgrade item; this string is derived by the provider.
    80  func (u *UpgradeItem) UpgradeRef() string {
    81  	return u.InstanceName()
    82  }
    83  
    84  type providerUpgrader struct {
    85  	configClient            config.Client
    86  	proxy                   Proxy
    87  	repositoryClientFactory RepositoryClientFactory
    88  	providerInventory       InventoryClient
    89  	providerComponents      ComponentsClient
    90  }
    91  
    92  var _ ProviderUpgrader = &providerUpgrader{}
    93  
    94  func (u *providerUpgrader) Plan(ctx context.Context) ([]UpgradePlan, error) {
    95  	log := logf.Log
    96  	log.Info("Checking new release availability...")
    97  
    98  	providerList, err := u.providerInventory.List(ctx)
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	// The core provider is driving all the plan logic for entire management cluster, because all the providers
   104  	// are expected to support the same API Version of Cluster API (contract).
   105  	// e.g if the core provider supports v1alpha4, all the providers in the same management cluster should support v1alpha4 as well;
   106  	// all the providers in the management cluster can upgrade to the latest release supporting v1alpha4, or if available,
   107  	// all the providers can upgrade to the latest release supporting v1alpha5 (not supported in current clusterctl release,
   108  	// but upgrade plan should report these options)
   109  
   110  	// Gets the upgrade info for the core provider.
   111  	coreProviders := providerList.FilterCore()
   112  	if len(coreProviders) != 1 {
   113  		return nil, errors.Errorf("invalid management cluster: there should a core provider, found %d", len(coreProviders))
   114  	}
   115  	coreProvider := coreProviders[0]
   116  
   117  	coreUpgradeInfo, err := u.getUpgradeInfo(ctx, coreProvider)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	// Identifies the API Version of Cluster API (contract) that we should consider for the management cluster update (Nb. the core provider is driving the entire management cluster).
   123  	// This includes the current contract and the new ones available, if any.
   124  	contractsForUpgrade := coreUpgradeInfo.getContractsForUpgrade()
   125  	if len(contractsForUpgrade) == 0 {
   126  		return nil, errors.Wrapf(err, "invalid metadata: unable to find the API Version of Cluster API (contract) supported by the %s provider", coreProvider.InstanceName())
   127  	}
   128  
   129  	// Creates an UpgradePlan for each contract considered for upgrades; each upgrade plans contains
   130  	// an UpgradeItem for each provider defining the next available version with the target contract, if available.
   131  	// e.g. v1alpha4, cluster-api --> v0.4.1, kubeadm bootstrap --> v0.4.1, aws --> v0.X.2
   132  	// e.g. v1alpha4, cluster-api --> v0.5.1, kubeadm bootstrap --> v0.5.1, aws --> v0.Y.4 (not supported in current clusterctl release, but upgrade plan should report these options).
   133  	ret := make([]UpgradePlan, 0)
   134  	for _, contract := range contractsForUpgrade {
   135  		upgradePlan, err := u.getUpgradePlan(ctx, providerList.Items, contract)
   136  		if err != nil {
   137  			return nil, err
   138  		}
   139  
   140  		// If the upgrade plan is partial (at least one upgradeItem in the plan does not have a target version) and
   141  		// the upgrade plan requires a change of the contract for this management cluster, then drop it
   142  		// (all the provider in a management cluster are required to change contract at the same time).
   143  		if upgradePlan.isPartialUpgrade() && coreUpgradeInfo.currentContract != contract {
   144  			continue
   145  		}
   146  
   147  		ret = append(ret, *upgradePlan)
   148  	}
   149  
   150  	return ret, nil
   151  }
   152  
   153  func (u *providerUpgrader) ApplyPlan(ctx context.Context, opts UpgradeOptions, contract string) error {
   154  	if contract != clusterv1.GroupVersion.Version {
   155  		return errors.Errorf("current version of clusterctl could only upgrade to %s contract, requested %s", clusterv1.GroupVersion.Version, contract)
   156  	}
   157  
   158  	log := logf.Log
   159  	log.Info("Performing upgrade...")
   160  
   161  	// Gets the upgrade plan for the selected API Version of Cluster API (contract).
   162  	providerList, err := u.providerInventory.List(ctx)
   163  	if err != nil {
   164  		return err
   165  	}
   166  
   167  	upgradePlan, err := u.getUpgradePlan(ctx, providerList.Items, contract)
   168  	if err != nil {
   169  		return err
   170  	}
   171  
   172  	// Do the upgrade
   173  	return u.doUpgrade(ctx, upgradePlan, opts)
   174  }
   175  
   176  func (u *providerUpgrader) ApplyCustomPlan(ctx context.Context, opts UpgradeOptions, upgradeItems ...UpgradeItem) error {
   177  	log := logf.Log
   178  	log.Info("Performing upgrade...")
   179  
   180  	// Create a custom upgrade plan from the upgrade items, taking care of ensuring all the providers in a management
   181  	// cluster are consistent with the API Version of Cluster API (contract).
   182  	upgradePlan, err := u.createCustomPlan(ctx, upgradeItems)
   183  	if err != nil {
   184  		return err
   185  	}
   186  
   187  	// Do the upgrade
   188  	return u.doUpgrade(ctx, upgradePlan, opts)
   189  }
   190  
   191  // getUpgradePlan returns the upgrade plan for a specific set of providers/contract
   192  // NB. this function is used both for upgrade plan and upgrade apply.
   193  func (u *providerUpgrader) getUpgradePlan(ctx context.Context, providers []clusterctlv1.Provider, contract string) (*UpgradePlan, error) {
   194  	upgradeItems := []UpgradeItem{}
   195  	for _, provider := range providers {
   196  		// Gets the upgrade info for the provider.
   197  		providerUpgradeInfo, err := u.getUpgradeInfo(ctx, provider)
   198  		if err != nil {
   199  			return nil, err
   200  		}
   201  
   202  		// Identifies the next available version with the target contract for the provider, if available.
   203  		nextVersion := providerUpgradeInfo.getLatestNextVersion(contract)
   204  
   205  		// Append the upgrade item for the provider/with the target contract.
   206  		upgradeItems = append(upgradeItems, UpgradeItem{
   207  			Provider:    provider,
   208  			NextVersion: versionTag(nextVersion),
   209  		})
   210  	}
   211  
   212  	return &UpgradePlan{
   213  		Contract:  contract,
   214  		Providers: upgradeItems,
   215  	}, nil
   216  }
   217  
   218  // createCustomPlan creates a custom upgrade plan from a set of upgrade items, taking care of ensuring all the providers
   219  // in a management cluster are consistent with the API Version of Cluster API (contract).
   220  func (u *providerUpgrader) createCustomPlan(ctx context.Context, upgradeItems []UpgradeItem) (*UpgradePlan, error) {
   221  	// Gets the API Version of Cluster API (contract).
   222  	// The this is required to ensure all the providers in a management cluster are consistent with the contract supported by the core provider.
   223  	// e.g if the core provider is v1beta1, all the provider should be v1beta1 as well.
   224  
   225  	// The target contract is derived from the current version of the core provider, or, if the core provider is included in the upgrade list,
   226  	// from its target version.
   227  	providerList, err := u.providerInventory.List(ctx)
   228  	if err != nil {
   229  		return nil, err
   230  	}
   231  	coreProviders := providerList.FilterCore()
   232  	if len(coreProviders) != 1 {
   233  		return nil, errors.Errorf("invalid management cluster: there should a core provider, found %d", len(coreProviders))
   234  	}
   235  	coreProvider := coreProviders[0]
   236  
   237  	targetCoreProviderVersion := coreProvider.Version
   238  	for _, providerToUpgrade := range upgradeItems {
   239  		if providerToUpgrade.InstanceName() == coreProvider.InstanceName() {
   240  			targetCoreProviderVersion = providerToUpgrade.NextVersion
   241  			break
   242  		}
   243  	}
   244  
   245  	targetContract, err := u.getProviderContractByVersion(ctx, coreProvider, targetCoreProviderVersion)
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  
   250  	if targetContract != clusterv1.GroupVersion.Version {
   251  		return nil, errors.Errorf("current version of clusterctl could only upgrade to %s contract, requested %s", clusterv1.GroupVersion.Version, targetContract)
   252  	}
   253  
   254  	// Builds the custom upgrade plan, by adding all the upgrade items after checking consistency with the targetContract.
   255  	upgradeInstanceNames := sets.Set[string]{}
   256  	upgradePlan := &UpgradePlan{
   257  		Contract: targetContract,
   258  	}
   259  
   260  	for _, upgradeItem := range upgradeItems {
   261  		// Match the upgrade item with the corresponding provider in the management cluster
   262  		var provider *clusterctlv1.Provider
   263  		for i := range providerList.Items {
   264  			if providerList.Items[i].InstanceName() == upgradeItem.InstanceName() {
   265  				provider = &providerList.Items[i]
   266  				break
   267  			}
   268  		}
   269  		if provider == nil {
   270  			return nil, errors.Errorf("unable to complete that upgrade: the provider %s in not part of the management cluster", upgradeItem.InstanceName())
   271  		}
   272  
   273  		// Retrieves the contract that is supported by the target version of the provider.
   274  		contract, err := u.getProviderContractByVersion(ctx, *provider, upgradeItem.NextVersion)
   275  		if err != nil {
   276  			return nil, err
   277  		}
   278  
   279  		if contract != targetContract {
   280  			return nil, errors.Errorf("unable to complete that upgrade: the target version for the provider %s supports the %s API Version of Cluster API (contract), while the management cluster is using %s", upgradeItem.InstanceName(), contract, targetContract)
   281  		}
   282  
   283  		upgradePlan.Providers = append(upgradePlan.Providers, upgradeItem)
   284  		upgradeInstanceNames.Insert(upgradeItem.InstanceName())
   285  	}
   286  
   287  	// Before doing upgrades, checks if other providers in the management cluster are lagging behind the target contract.
   288  	for _, provider := range providerList.Items {
   289  		// skip providers already included in the upgrade plan
   290  		if upgradeInstanceNames.Has(provider.InstanceName()) {
   291  			continue
   292  		}
   293  
   294  		// Retrieves the contract that is supported by the current version of the provider.
   295  		contract, err := u.getProviderContractByVersion(ctx, provider, provider.Version)
   296  		if err != nil {
   297  			return nil, err
   298  		}
   299  
   300  		if contract != targetContract {
   301  			return nil, errors.Errorf("unable to complete that upgrade: the provider %s supports the %s API Version of Cluster API (contract), while the management cluster is being updated to %s. Please include the %[1]s provider in the upgrade", provider.InstanceName(), contract, targetContract)
   302  		}
   303  	}
   304  	return upgradePlan, nil
   305  }
   306  
   307  // getProviderContractByVersion returns the contract that a provider will support if updated to the given target version.
   308  func (u *providerUpgrader) getProviderContractByVersion(ctx context.Context, provider clusterctlv1.Provider, targetVersion string) (string, error) {
   309  	targetSemVersion, err := version.ParseSemantic(targetVersion)
   310  	if err != nil {
   311  		return "", errors.Wrapf(err, "failed to parse target version for the %s provider", provider.InstanceName())
   312  	}
   313  
   314  	// Gets the metadata for the core Provider
   315  	upgradeInfo, err := u.getUpgradeInfo(ctx, provider)
   316  	if err != nil {
   317  		return "", err
   318  	}
   319  
   320  	releaseSeries := upgradeInfo.metadata.GetReleaseSeriesForVersion(targetSemVersion)
   321  	if releaseSeries == nil {
   322  		return "", errors.Errorf("invalid target version: version %s for the provider %s does not match any release series", targetVersion, provider.InstanceName())
   323  	}
   324  	return releaseSeries.Contract, nil
   325  }
   326  
   327  // getUpgradeComponents returns the provider components for the selected target version.
   328  func (u *providerUpgrader) getUpgradeComponents(ctx context.Context, provider UpgradeItem) (repository.Components, error) {
   329  	configRepository, err := u.configClient.Providers().Get(provider.ProviderName, provider.GetProviderType())
   330  	if err != nil {
   331  		return nil, err
   332  	}
   333  
   334  	providerRepository, err := u.repositoryClientFactory(ctx, configRepository, u.configClient)
   335  	if err != nil {
   336  		return nil, err
   337  	}
   338  
   339  	options := repository.ComponentsOptions{
   340  		Version:         provider.NextVersion,
   341  		TargetNamespace: provider.Namespace,
   342  	}
   343  	components, err := providerRepository.Components().Get(ctx, options)
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  	return components, nil
   348  }
   349  
   350  func (u *providerUpgrader) doUpgrade(ctx context.Context, upgradePlan *UpgradePlan, opts UpgradeOptions) error {
   351  	// Check for multiple instances of the same provider if current contract is v1alpha3.
   352  	// TODO(killianmuldoon) Assess if we can remove this piece of code.
   353  	if upgradePlan.Contract == clusterv1.GroupVersion.Version {
   354  		if err := u.providerInventory.CheckSingleProviderInstance(ctx); err != nil {
   355  			return err
   356  		}
   357  	}
   358  
   359  	// Ensure Providers are updated in the following order: Core, Bootstrap, ControlPlane, Infrastructure.
   360  	providers := upgradePlan.Providers
   361  	sort.Slice(providers, func(a, b int) bool {
   362  		return providers[a].GetProviderType().Order() < providers[b].GetProviderType().Order()
   363  	})
   364  
   365  	// Migrate CRs to latest CRD storage version, if necessary.
   366  	// Note: We have to do this before the providers are scaled down or deleted
   367  	// so conversion webhooks still work.
   368  	for _, upgradeItem := range providers {
   369  		// If there is not a specified next version, skip it (we are already up-to-date).
   370  		if upgradeItem.NextVersion == "" {
   371  			continue
   372  		}
   373  
   374  		// Gets the provider components for the target version.
   375  		components, err := u.getUpgradeComponents(ctx, upgradeItem)
   376  		if err != nil {
   377  			return err
   378  		}
   379  
   380  		c, err := u.proxy.NewClient(ctx)
   381  		if err != nil {
   382  			return err
   383  		}
   384  
   385  		if err := NewCRDMigrator(c).Run(ctx, components.Objs()); err != nil {
   386  			return err
   387  		}
   388  	}
   389  
   390  	// Scale down all providers.
   391  	// This is done to ensure all Pods of all "old" provider Deployments have been deleted.
   392  	// Otherwise it can happen that a provider Pod survives the upgrade because we create
   393  	// a new Deployment with the same selector directly after `Delete`.
   394  	// This can lead to a failed upgrade because:
   395  	// * new provider Pods fail to startup because they try to list resources.
   396  	// * list resources fails, because the API server hits the old provider Pod when trying to
   397  	//   call the conversion webhook for those resources.
   398  	for _, upgradeItem := range providers {
   399  		// If there is not a specified next version, skip it (we are already up-to-date).
   400  		if upgradeItem.NextVersion == "" {
   401  			continue
   402  		}
   403  
   404  		// Scale down provider.
   405  		if err := u.scaleDownProvider(ctx, upgradeItem.Provider); err != nil {
   406  			return err
   407  		}
   408  	}
   409  
   410  	installQueue := []repository.Components{}
   411  
   412  	// Delete old providers and deploy new ones if necessary, i.e. there is a NextVersion.
   413  	for _, upgradeItem := range providers {
   414  		// If there is not a specified next version, skip it (we are already up-to-date).
   415  		if upgradeItem.NextVersion == "" {
   416  			continue
   417  		}
   418  
   419  		// Gets the provider components for the target version.
   420  		components, err := u.getUpgradeComponents(ctx, upgradeItem)
   421  		if err != nil {
   422  			return err
   423  		}
   424  
   425  		installQueue = append(installQueue, components)
   426  
   427  		// Delete the provider, preserving CRD, namespace and the inventory.
   428  		if err := u.providerComponents.Delete(ctx, DeleteOptions{
   429  			Provider:         upgradeItem.Provider,
   430  			IncludeNamespace: false,
   431  			IncludeCRDs:      false,
   432  			SkipInventory:    true,
   433  		}); err != nil {
   434  			return err
   435  		}
   436  
   437  		// Install the new version of the provider components.
   438  		if err := installComponentsAndUpdateInventory(ctx, components, u.providerComponents, u.providerInventory); err != nil {
   439  			return err
   440  		}
   441  	}
   442  
   443  	// Delete webhook namespace since it's not needed from v1alpha4.
   444  	if upgradePlan.Contract == clusterv1.GroupVersion.Version {
   445  		if err := u.providerComponents.DeleteWebhookNamespace(ctx); err != nil {
   446  			return err
   447  		}
   448  	}
   449  
   450  	return waitForProvidersReady(ctx, InstallOptions(opts), installQueue, u.proxy)
   451  }
   452  
   453  func (u *providerUpgrader) scaleDownProvider(ctx context.Context, provider clusterctlv1.Provider) error {
   454  	log := logf.Log
   455  	log.Info("Scaling down", "Provider", provider.Name, "Version", provider.Version, "Namespace", provider.Namespace)
   456  
   457  	cs, err := u.proxy.NewClient(ctx)
   458  	if err != nil {
   459  		return err
   460  	}
   461  
   462  	// Fetch all Deployments belonging to a provider.
   463  	deploymentList := &appsv1.DeploymentList{}
   464  	if err := cs.List(ctx,
   465  		deploymentList,
   466  		client.InNamespace(provider.Namespace),
   467  		client.MatchingLabels{
   468  			clusterctlv1.ClusterctlLabel: "",
   469  			clusterv1.ProviderNameLabel:  provider.ManifestLabel(),
   470  		}); err != nil {
   471  		return errors.Wrapf(err, "failed to list Deployments for provider %s", provider.Name)
   472  	}
   473  
   474  	// Scale down provider Deployments.
   475  	for _, deployment := range deploymentList.Items {
   476  		log.V(5).Info("Scaling down", "Deployment", deployment.Name, "Namespace", deployment.Namespace)
   477  		if err := scaleDownDeployment(ctx, cs, deployment); err != nil {
   478  			return err
   479  		}
   480  	}
   481  
   482  	return nil
   483  }
   484  
   485  // scaleDownDeployment scales down a Deployment to 0 and waits until all replicas have been deleted.
   486  func scaleDownDeployment(ctx context.Context, c client.Client, deploy appsv1.Deployment) error {
   487  	if err := retryWithExponentialBackoff(ctx, newWriteBackoff(), func(ctx context.Context) error {
   488  		deployment := &appsv1.Deployment{}
   489  		if err := c.Get(ctx, client.ObjectKeyFromObject(&deploy), deployment); err != nil {
   490  			return errors.Wrapf(err, "failed to get Deployment/%s", deploy.GetName())
   491  		}
   492  
   493  		// Deployment already scaled down, return early.
   494  		if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas == 0 {
   495  			return nil
   496  		}
   497  
   498  		// Scale down.
   499  		deployment.Spec.Replicas = ptr.To[int32](0)
   500  		if err := c.Update(ctx, deployment); err != nil {
   501  			return errors.Wrapf(err, "failed to update Deployment/%s", deploy.GetName())
   502  		}
   503  		return nil
   504  	}); err != nil {
   505  		return errors.Wrapf(err, "failed to scale down Deployment")
   506  	}
   507  
   508  	deploymentScaleToZeroBackOff := wait.Backoff{
   509  		Duration: 1 * time.Second,
   510  		Factor:   1,
   511  		Steps:    60,
   512  		Jitter:   0.4,
   513  	}
   514  	if err := retryWithExponentialBackoff(ctx, deploymentScaleToZeroBackOff, func(ctx context.Context) error {
   515  		deployment := &appsv1.Deployment{}
   516  		if err := c.Get(ctx, client.ObjectKeyFromObject(&deploy), deployment); err != nil {
   517  			return errors.Wrapf(err, "failed to get Deployment/%s", deploy.GetName())
   518  		}
   519  
   520  		// Deployment is scaled down.
   521  		if deployment.Status.Replicas == 0 {
   522  			return nil
   523  		}
   524  
   525  		return errors.Errorf("Deployment still has %d replicas", deployment.Status.Replicas)
   526  	}); err != nil {
   527  		return errors.Wrapf(err, "failed to wait until Deployment is scaled down")
   528  	}
   529  
   530  	return nil
   531  }
   532  
   533  func newProviderUpgrader(configClient config.Client, proxy Proxy, repositoryClientFactory RepositoryClientFactory, providerInventory InventoryClient, providerComponents ComponentsClient) *providerUpgrader {
   534  	return &providerUpgrader{
   535  		configClient:            configClient,
   536  		proxy:                   proxy,
   537  		repositoryClientFactory: repositoryClientFactory,
   538  		providerInventory:       providerInventory,
   539  		providerComponents:      providerComponents,
   540  	}
   541  }