k8s.io/kubernetes@v1.29.3/test/e2e/cloud/gcp/common/upgrade_mechanics.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "os/exec" 24 "strings" 25 "time" 26 27 "k8s.io/apimachinery/pkg/util/wait" 28 "k8s.io/apimachinery/pkg/version" 29 clientset "k8s.io/client-go/kubernetes" 30 "k8s.io/kubernetes/test/e2e/framework" 31 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 32 e2eproviders "k8s.io/kubernetes/test/e2e/framework/providers" 33 "k8s.io/kubernetes/test/e2e/upgrades" 34 "k8s.io/kubernetes/test/utils/junit" 35 ) 36 37 // ControlPlaneUpgradeFunc returns a function that performs control plane upgrade. 38 func ControlPlaneUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs []string) func(ctx context.Context) { 39 return func(ctx context.Context) { 40 target := upgCtx.Versions[1].Version.String() 41 framework.ExpectNoError(controlPlaneUpgrade(ctx, f, target, controlPlaneExtraEnvs)) 42 framework.ExpectNoError(checkControlPlaneVersion(ctx, f.ClientSet, target)) 43 } 44 } 45 46 // ClusterUpgradeFunc returns a function that performs full cluster upgrade (both control plane and nodes). 47 func ClusterUpgradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs, nodeExtraEnvs []string) func(ctx context.Context) { 48 return func(ctx context.Context) { 49 target := upgCtx.Versions[1].Version.String() 50 image := upgCtx.Versions[1].NodeImage 51 framework.ExpectNoError(controlPlaneUpgrade(ctx, f, target, controlPlaneExtraEnvs)) 52 framework.ExpectNoError(checkControlPlaneVersion(ctx, f.ClientSet, target)) 53 framework.ExpectNoError(nodeUpgrade(ctx, f, target, image, nodeExtraEnvs)) 54 framework.ExpectNoError(checkNodesVersions(ctx, f.ClientSet, target)) 55 } 56 } 57 58 // ClusterDowngradeFunc returns a function that performs full cluster downgrade (both nodes and control plane). 59 func ClusterDowngradeFunc(f *framework.Framework, upgCtx *upgrades.UpgradeContext, testCase *junit.TestCase, controlPlaneExtraEnvs, nodeExtraEnvs []string) func(ctx context.Context) { 60 return func(ctx context.Context) { 61 target := upgCtx.Versions[1].Version.String() 62 image := upgCtx.Versions[1].NodeImage 63 // Yes this really is a downgrade. And nodes must downgrade first. 64 framework.ExpectNoError(nodeUpgrade(ctx, f, target, image, nodeExtraEnvs)) 65 framework.ExpectNoError(checkNodesVersions(ctx, f.ClientSet, target)) 66 framework.ExpectNoError(controlPlaneUpgrade(ctx, f, target, controlPlaneExtraEnvs)) 67 framework.ExpectNoError(checkControlPlaneVersion(ctx, f.ClientSet, target)) 68 } 69 } 70 71 const etcdImage = "3.4.9-1" 72 73 // controlPlaneUpgrade upgrades control plane node on GCE/GKE. 74 func controlPlaneUpgrade(ctx context.Context, f *framework.Framework, v string, extraEnvs []string) error { 75 switch framework.TestContext.Provider { 76 case "gce": 77 return controlPlaneUpgradeGCE(v, extraEnvs) 78 case "gke": 79 return e2eproviders.MasterUpgradeGKE(ctx, f.Namespace.Name, v) 80 default: 81 return fmt.Errorf("controlPlaneUpgrade() is not implemented for provider %s", framework.TestContext.Provider) 82 } 83 } 84 85 func controlPlaneUpgradeGCE(rawV string, extraEnvs []string) error { 86 env := append(os.Environ(), extraEnvs...) 87 // TODO: Remove these variables when they're no longer needed for downgrades. 88 if framework.TestContext.EtcdUpgradeVersion != "" && framework.TestContext.EtcdUpgradeStorage != "" { 89 env = append(env, 90 "TEST_ETCD_VERSION="+framework.TestContext.EtcdUpgradeVersion, 91 "STORAGE_BACKEND="+framework.TestContext.EtcdUpgradeStorage, 92 "TEST_ETCD_IMAGE="+etcdImage) 93 } else { 94 // In e2e tests, we skip the confirmation prompt about 95 // implicit etcd upgrades to simulate the user entering "y". 96 env = append(env, "TEST_ALLOW_IMPLICIT_ETCD_UPGRADE=true") 97 } 98 99 v := "v" + rawV 100 _, _, err := framework.RunCmdEnv(env, e2eproviders.GCEUpgradeScript(), "-M", v) 101 return err 102 } 103 104 func traceRouteToControlPlane() { 105 traceroute, err := exec.LookPath("traceroute") 106 if err != nil { 107 framework.Logf("Could not find traceroute program") 108 return 109 } 110 cmd := exec.Command(traceroute, "-I", framework.APIAddress()) 111 out, err := cmd.Output() 112 if len(out) != 0 { 113 framework.Logf(string(out)) 114 } 115 if exiterr, ok := err.(*exec.ExitError); err != nil && ok { 116 framework.Logf("Error while running traceroute: %s", exiterr.Stderr) 117 } 118 } 119 120 // checkControlPlaneVersion validates the control plane version 121 func checkControlPlaneVersion(ctx context.Context, c clientset.Interface, want string) error { 122 framework.Logf("Checking control plane version") 123 var err error 124 var v *version.Info 125 waitErr := wait.PollUntilContextTimeout(ctx, 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { 126 v, err = c.Discovery().ServerVersion() 127 if err != nil { 128 traceRouteToControlPlane() 129 return false, nil 130 } 131 return true, nil 132 }) 133 if waitErr != nil { 134 return fmt.Errorf("CheckControlPlane() couldn't get the control plane version: %w", err) 135 } 136 // We do prefix trimming and then matching because: 137 // want looks like: 0.19.3-815-g50e67d4 138 // got looks like: v0.19.3-815-g50e67d4034e858-dirty 139 got := strings.TrimPrefix(v.GitVersion, "v") 140 if !strings.HasPrefix(got, want) { 141 return fmt.Errorf("control plane had kube-apiserver version %s which does not start with %s", got, want) 142 } 143 framework.Logf("Control plane is at version %s", want) 144 return nil 145 } 146 147 // nodeUpgrade upgrades nodes on GCE/GKE. 148 func nodeUpgrade(ctx context.Context, f *framework.Framework, v string, img string, extraEnvs []string) error { 149 // Perform the upgrade. 150 var err error 151 switch framework.TestContext.Provider { 152 case "gce": 153 err = nodeUpgradeGCE(v, img, extraEnvs) 154 case "gke": 155 err = nodeUpgradeGKE(ctx, f.Namespace.Name, v, img) 156 default: 157 err = fmt.Errorf("nodeUpgrade() is not implemented for provider %s", framework.TestContext.Provider) 158 } 159 if err != nil { 160 return err 161 } 162 return waitForNodesReadyAfterUpgrade(ctx, f) 163 } 164 165 // TODO(mrhohn): Remove 'enableKubeProxyDaemonSet' when kube-proxy is run as a DaemonSet by default. 166 func nodeUpgradeGCE(rawV, img string, extraEnvs []string) error { 167 v := "v" + rawV 168 env := append(os.Environ(), extraEnvs...) 169 if img != "" { 170 env = append(env, "KUBE_NODE_OS_DISTRIBUTION="+img) 171 _, _, err := framework.RunCmdEnv(env, e2eproviders.GCEUpgradeScript(), "-N", "-o", v) 172 return err 173 } 174 _, _, err := framework.RunCmdEnv(env, e2eproviders.GCEUpgradeScript(), "-N", v) 175 return err 176 } 177 178 func nodeUpgradeGKE(ctx context.Context, namespace string, v string, img string) error { 179 framework.Logf("Upgrading nodes to version %q and image %q", v, img) 180 nps, err := nodePoolsGKE() 181 if err != nil { 182 return err 183 } 184 framework.Logf("Found node pools %v", nps) 185 for _, np := range nps { 186 args := []string{ 187 "container", 188 "clusters", 189 fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), 190 e2eproviders.LocationParamGKE(), 191 "upgrade", 192 framework.TestContext.CloudConfig.Cluster, 193 fmt.Sprintf("--node-pool=%s", np), 194 fmt.Sprintf("--cluster-version=%s", v), 195 "--quiet", 196 } 197 if len(img) > 0 { 198 args = append(args, fmt.Sprintf("--image-type=%s", img)) 199 } 200 _, _, err = framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) 201 202 if err != nil { 203 return err 204 } 205 206 e2enode.WaitForSSHTunnels(ctx, namespace) 207 } 208 return nil 209 } 210 211 func nodePoolsGKE() ([]string, error) { 212 args := []string{ 213 "container", 214 "node-pools", 215 fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), 216 e2eproviders.LocationParamGKE(), 217 "list", 218 fmt.Sprintf("--cluster=%s", framework.TestContext.CloudConfig.Cluster), 219 "--format=get(name)", 220 } 221 stdout, _, err := framework.RunCmd("gcloud", framework.AppendContainerCommandGroupIfNeeded(args)...) 222 if err != nil { 223 return nil, err 224 } 225 if len(strings.TrimSpace(stdout)) == 0 { 226 return []string{}, nil 227 } 228 return strings.Fields(stdout), nil 229 } 230 231 func waitForNodesReadyAfterUpgrade(ctx context.Context, f *framework.Framework) error { 232 // Wait for it to complete and validate nodes are healthy. 233 // 234 // TODO(ihmccreery) We shouldn't have to wait for nodes to be ready in 235 // GKE; the operation shouldn't return until they all are. 236 numNodes, err := e2enode.TotalRegistered(ctx, f.ClientSet) 237 if err != nil { 238 return fmt.Errorf("couldn't detect number of nodes") 239 } 240 framework.Logf("Waiting up to %v for all %d nodes to be ready after the upgrade", framework.RestartNodeReadyAgainTimeout, numNodes) 241 if _, err := e2enode.CheckReady(ctx, f.ClientSet, numNodes, framework.RestartNodeReadyAgainTimeout); err != nil { 242 return err 243 } 244 return nil 245 } 246 247 // checkNodesVersions validates the nodes versions 248 func checkNodesVersions(ctx context.Context, cs clientset.Interface, want string) error { 249 l, err := e2enode.GetReadySchedulableNodes(ctx, cs) 250 if err != nil { 251 return err 252 } 253 for _, n := range l.Items { 254 // We do prefix trimming and then matching because: 255 // want looks like: 0.19.3-815-g50e67d4 256 // kv/kvp look like: v0.19.3-815-g50e67d4034e858-dirty 257 kv, kpv := strings.TrimPrefix(n.Status.NodeInfo.KubeletVersion, "v"), 258 strings.TrimPrefix(n.Status.NodeInfo.KubeProxyVersion, "v") 259 if !strings.HasPrefix(kv, want) { 260 return fmt.Errorf("node %s had kubelet version %s which does not start with %s", 261 n.ObjectMeta.Name, kv, want) 262 } 263 if !strings.HasPrefix(kpv, want) { 264 return fmt.Errorf("node %s had kube-proxy version %s which does not start with %s", 265 n.ObjectMeta.Name, kpv, want) 266 } 267 } 268 return nil 269 }