k8s.io/kubernetes@v1.29.3/test/e2e/cloud/gcp/common/upgrade_mechanics.go (about)

     1  /*
     2  Copyright 2021 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"os/exec"
    24  	"strings"
    25  	"time"
    26  
    27  	"k8s.io/apimachinery/pkg/util/wait"
    28  	"k8s.io/apimachinery/pkg/version"
    29  	clientset "k8s.io/client-go/kubernetes"
    30  	"k8s.io/kubernetes/test/e2e/framework"
    31  	e2enode "k8s.io/kubernetes/test/e2e/framework/node"
    32  	e2eproviders "k8s.io/kubernetes/test/e2e/framework/providers"
    33  	"k8s.io/kubernetes/test/e2e/upgrades"
    34  	"k8s.io/kubernetes/test/utils/junit"
    35  )
    36  
    37  // ControlPlaneUpgradeFunc returns a function that performs control plane upgrade.
    38  func ControlPlaneUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs []string) func(ctx context.Context) {
    39  	return func(ctx context.Context) {
    40  		target := upgCtx.Versions[1].Version.String()
    41  		framework.ExpectNoError(controlPlaneUpgrade(ctx, f, target, controlPlaneExtraEnvs))
    42  		framework.ExpectNoError(checkControlPlaneVersion(ctx, f.ClientSet, target))
    43  	}
    44  }
    45  
    46  // ClusterUpgradeFunc returns a function that performs full cluster upgrade (both control plane and nodes).
    47  func ClusterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs, nodeExtraEnvs []string) func(ctx context.Context) {
    48  	return func(ctx context.Context) {
    49  		target := upgCtx.Versions[1].Version.String()
    50  		image := upgCtx.Versions[1].NodeImage
    51  		framework.ExpectNoError(controlPlaneUpgrade(ctx, f, target, controlPlaneExtraEnvs))
    52  		framework.ExpectNoError(checkControlPlaneVersion(ctx, f.ClientSet, target))
    53  		framework.ExpectNoError(nodeUpgrade(ctx, f, target, image, nodeExtraEnvs))
    54  		framework.ExpectNoError(checkNodesVersions(ctx, f.ClientSet, target))
    55  	}
    56  }
    57  
    58  // ClusterDowngradeFunc returns a function that performs full cluster downgrade (both nodes and control plane).
    59  func ClusterDowngradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs, nodeExtraEnvs []string) func(ctx context.Context) {
    60  	return func(ctx context.Context) {
    61  		target := upgCtx.Versions[1].Version.String()
    62  		image := upgCtx.Versions[1].NodeImage
    63  		// Yes this really is a downgrade. And nodes must downgrade first.
    64  		framework.ExpectNoError(nodeUpgrade(ctx, f, target, image, nodeExtraEnvs))
    65  		framework.ExpectNoError(checkNodesVersions(ctx, f.ClientSet, target))
    66  		framework.ExpectNoError(controlPlaneUpgrade(ctx, f, target, controlPlaneExtraEnvs))
    67  		framework.ExpectNoError(checkControlPlaneVersion(ctx, f.ClientSet, target))
    68  	}
    69  }
    70  
    71  const etcdImage = "3.4.9-1"
    72  
    73  // controlPlaneUpgrade upgrades control plane node on GCE/GKE.
    74  func controlPlaneUpgrade(ctx context.Context, f *framework.Framework, v string, extraEnvs []string) error {
    75  	switch framework.TestContext.Provider {
    76  	case "gce":
    77  		return controlPlaneUpgradeGCE(v, extraEnvs)
    78  	case "gke":
    79  		return e2eproviders.MasterUpgradeGKE(ctx, f.Namespace.Name, v)
    80  	default:
    81  		return fmt.Errorf("controlPlaneUpgrade() is not implemented for provider %s", framework.TestContext.Provider)
    82  	}
    83  }
    84  
    85  func controlPlaneUpgradeGCE(rawV string, extraEnvs []string) error {
    86  	env := append(os.Environ(), extraEnvs...)
    87  	// TODO: Remove these variables when they're no longer needed for downgrades.
    88  	if framework.TestContext.EtcdUpgradeVersion != "" && framework.TestContext.EtcdUpgradeStorage != "" {
    89  		env = append(env,
    90  			"TEST_ETCD_VERSION="+framework.TestContext.EtcdUpgradeVersion,
    91  			"STORAGE_BACKEND="+framework.TestContext.EtcdUpgradeStorage,
    92  			"TEST_ETCD_IMAGE="+etcdImage)
    93  	} else {
    94  		// In e2e tests, we skip the confirmation prompt about
    95  		// implicit etcd upgrades to simulate the user entering "y".
    96  		env = append(env, "TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true")
    97  	}
    98  
    99  	v := "v" + rawV
   100  	_, _, err := framework.RunCmdEnv(env, e2eproviders.GCEUpgradeScript(), "-M", v)
   101  	return err
   102  }
   103  
   104  func traceRouteToControlPlane() {
   105  	traceroute, err := exec.LookPath("traceroute")
   106  	if err != nil {
   107  		framework.Logf("Could not find traceroute program")
   108  		return
   109  	}
   110  	cmd := exec.Command(traceroute, "-I", framework.APIAddress())
   111  	out, err := cmd.Output()
   112  	if len(out) != 0 {
   113  		framework.Logf(string(out))
   114  	}
   115  	if exiterr, ok := err.(*exec.ExitError); err != nil && ok {
   116  		framework.Logf("Error while running traceroute: %s", exiterr.Stderr)
   117  	}
   118  }
   119  
   120  // checkControlPlaneVersion validates the control plane version
   121  func checkControlPlaneVersion(ctx context.Context, c clientset.Interface, want string) error {
   122  	framework.Logf("Checking control plane version")
   123  	var err error
   124  	var v *version.Info
   125  	waitErr := wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) {
   126  		v, err = c.Discovery().ServerVersion()
   127  		if err != nil {
   128  			traceRouteToControlPlane()
   129  			return false, nil
   130  		}
   131  		return true, nil
   132  	})
   133  	if waitErr != nil {
   134  		return fmt.Errorf("CheckControlPlane() couldn't get the control plane version: %w", err)
   135  	}
   136  	// We do prefix trimming and then matching because:
   137  	// want looks like:  0.19.3-815-g50e67d4
   138  	// got  looks like: v0.19.3-815-g50e67d4034e858-dirty
   139  	got := strings.TrimPrefix(v.GitVersion, "v")
   140  	if !strings.HasPrefix(got, want) {
   141  		return fmt.Errorf("control plane had kube-apiserver version %s which does not start with %s", got, want)
   142  	}
   143  	framework.Logf("Control plane is at version %s", want)
   144  	return nil
   145  }
   146  
   147  // nodeUpgrade upgrades nodes on GCE/GKE.
   148  func nodeUpgrade(ctx context.Context, f *framework.Framework, v string, img string, extraEnvs []string) error {
   149  	// Perform the upgrade.
   150  	var err error
   151  	switch framework.TestContext.Provider {
   152  	case "gce":
   153  		err = nodeUpgradeGCE(v, img, extraEnvs)
   154  	case "gke":
   155  		err = nodeUpgradeGKE(ctx, f.Namespace.Name, v, img)
   156  	default:
   157  		err = fmt.Errorf("nodeUpgrade() is not implemented for provider %s", framework.TestContext.Provider)
   158  	}
   159  	if err != nil {
   160  		return err
   161  	}
   162  	return waitForNodesReadyAfterUpgrade(ctx, f)
   163  }
   164  
   165  // TODO(mrhohn): Remove 'enableKubeProxyDaemonSet' when kube-proxy is run as a DaemonSet by default.
   166  func nodeUpgradeGCE(rawV, img string, extraEnvs []string) error {
   167  	v := "v" + rawV
   168  	env := append(os.Environ(), extraEnvs...)
   169  	if img != "" {
   170  		env = append(env, "KUBE_NODE_OS_DISTRIBUTION="+img)
   171  		_, _, err := framework.RunCmdEnv(env, e2eproviders.GCEUpgradeScript(), "-N", "-o", v)
   172  		return err
   173  	}
   174  	_, _, err := framework.RunCmdEnv(env, e2eproviders.GCEUpgradeScript(), "-N", v)
   175  	return err
   176  }
   177  
   178  func nodeUpgradeGKE(ctx context.Context, namespace string, v string, img string) error {
   179  	framework.Logf("Upgrading nodes to version %q and image %q", v, img)
   180  	nps, err := nodePoolsGKE()
   181  	if err != nil {
   182  		return err
   183  	}
   184  	framework.Logf("Found node pools %v", nps)
   185  	for _, np := range nps {
   186  		args := []string{
   187  			"container",
   188  			"clusters",
   189  			fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
   190  			e2eproviders.LocationParamGKE(),
   191  			"upgrade",
   192  			framework.TestContext.CloudConfig.Cluster,
   193  			fmt.Sprintf("--node-pool=%s", np),
   194  			fmt.Sprintf("--cluster-version=%s", v),
   195  			"--quiet",
   196  		}
   197  		if len(img) > 0 {
   198  			args = append(args, fmt.Sprintf("--image-type=%s", img))
   199  		}
   200  		_, _, err = framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...)
   201  
   202  		if err != nil {
   203  			return err
   204  		}
   205  
   206  		e2enode.WaitForSSHTunnels(ctx, namespace)
   207  	}
   208  	return nil
   209  }
   210  
   211  func nodePoolsGKE() ([]string, error) {
   212  	args := []string{
   213  		"container",
   214  		"node-pools",
   215  		fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
   216  		e2eproviders.LocationParamGKE(),
   217  		"list",
   218  		fmt.Sprintf("--cluster=%s", framework.TestContext.CloudConfig.Cluster),
   219  		"--format=get(name)",
   220  	}
   221  	stdout, _, err := framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...)
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  	if len(strings.TrimSpace(stdout)) == 0 {
   226  		return []string{}, nil
   227  	}
   228  	return strings.Fields(stdout), nil
   229  }
   230  
   231  func waitForNodesReadyAfterUpgrade(ctx context.Context, f *framework.Framework) error {
   232  	// Wait for it to complete and validate nodes are healthy.
   233  	//
   234  	// TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in
   235  	// GKE; the operation shouldn't return until they all are.
   236  	numNodes, err := e2enode.TotalRegistered(ctx, f.ClientSet)
   237  	if err != nil {
   238  		return fmt.Errorf("couldn't detect number of nodes")
   239  	}
   240  	framework.Logf("Waiting up to %v for all %d nodes to be ready after the upgrade", framework.RestartNodeReadyAgainTimeout, numNodes)
   241  	if _, err := e2enode.CheckReady(ctx, f.ClientSet, numNodes, framework.RestartNodeReadyAgainTimeout); err != nil {
   242  		return err
   243  	}
   244  	return nil
   245  }
   246  
   247  // checkNodesVersions validates the nodes versions
   248  func checkNodesVersions(ctx context.Context, cs clientset.Interface, want string) error {
   249  	l, err := e2enode.GetReadySchedulableNodes(ctx, cs)
   250  	if err != nil {
   251  		return err
   252  	}
   253  	for _, n := range l.Items {
   254  		// We do prefix trimming and then matching because:
   255  		// want   looks like:  0.19.3-815-g50e67d4
   256  		// kv/kvp look  like: v0.19.3-815-g50e67d4034e858-dirty
   257  		kv, kpv := strings.TrimPrefix(n.Status.NodeInfo.KubeletVersion, "v"),
   258  			strings.TrimPrefix(n.Status.NodeInfo.KubeProxyVersion, "v")
   259  		if !strings.HasPrefix(kv, want) {
   260  			return fmt.Errorf("node %s had kubelet version %s which does not start with %s",
   261  				n.ObjectMeta.Name, kv, want)
   262  		}
   263  		if !strings.HasPrefix(kpv, want) {
   264  			return fmt.Errorf("node %s had kube-proxy version %s which does not start with %s",
   265  				n.ObjectMeta.Name, kpv, want)
   266  		}
   267  	}
   268  	return nil
   269  }