github.com/zhyoulun/cilium@v1.6.12/test/k8sT/Updates.go (about) 1 package k8sTest 2 3 import ( 4 "fmt" 5 "strconv" 6 "strings" 7 8 . "github.com/cilium/cilium/test/ginkgo-ext" 9 "github.com/cilium/cilium/test/helpers" 10 . "github.com/onsi/gomega" 11 ) 12 13 var ( 14 demoPath = helpers.ManifestGet("demo.yaml") 15 l7Policy = helpers.ManifestGet("l7-policy.yaml") 16 migrateSVCClient = helpers.ManifestGet("migrate-svc-client.yaml") 17 migrateSVCServer = helpers.ManifestGet("migrate-svc-server.yaml") 18 ) 19 20 var _ = Describe("K8sUpdates", func() { 21 22 // This test runs 8 steps as following: 23 // 1 - delete all pods. Clean cilium, this can be, and should be achieved by 24 // `clean-cilium-state: "true"` option that we have in configmap 25 // 2 - install cilium `cilium:v${LATEST_STABLE}` 26 // 3 - make endpoints talk with each other with policy 27 // 4 - upgrade cilium to `k8s1:5000/cilium/cilium-dev:latest` 28 // 5 - make endpoints talk with each other with policy 29 // 6 - downgrade cilium to `cilium:v${LATEST_STABLE}` 30 // 7 - make endpoints talk with each other with policy 31 // 8 - delete all pods. Clean cilium, this can be, and should be achieved by 32 // `clean-cilium-state: "true"` option that we have in configmap. 33 // This makes sure the upgrade tests won't affect any other test 34 // 9 - re install cilium:latest image for remaining tests. 35 36 var ( 37 kubectl *helpers.Kubectl 38 39 cleanupCallback = func() { return } 40 ) 41 42 BeforeAll(func() { 43 kubectl = helpers.CreateKubectl(helpers.K8s1VMName(), logger) 44 45 _ = kubectl.Delete(helpers.DNSDeployment()) 46 47 kubectl.Delete(migrateSVCClient) 48 kubectl.Delete(migrateSVCServer) 49 kubectl.Delete(l7Policy) 50 kubectl.Delete(demoPath) 51 52 // Delete kube-dns because if not will be a restore the old endpoints 53 // from master instead of create the new ones. 54 _ = kubectl.DeleteResource( 55 "deploy", fmt.Sprintf("-n %s kube-dns", helpers.KubeSystemNamespace)) 56 57 _ = kubectl.DeleteResource( 58 "deploy", fmt.Sprintf("-n %s cilium-operator", helpers.KubeSystemNamespace)) 59 // Sometimes PolicyGen has a lot of pods running around without delete 60 // it. Using this we are sure that we delete before this test start 61 kubectl.Exec(fmt.Sprintf( 62 "%s delete --all pods,svc,cnp -n %s", helpers.KubectlCmd, helpers.DefaultNamespace)) 63 64 kubectl.DeleteETCDOperator() 65 66 ExpectAllPodsTerminated(kubectl) 67 }) 68 69 AfterAll(func() { 70 kubectl.CloseSSHClient() 71 }) 72 73 AfterFailed(func() { 74 kubectl.CiliumReport(helpers.KubeSystemNamespace, "cilium endpoint list") 75 }) 76 77 JustAfterEach(func() { 78 kubectl.ValidateNoErrorsInLogs(CurrentGinkgoTestDescription().Duration) 79 }) 80 81 AfterEach(func() { 82 cleanupCallback() 83 ExpectAllPodsTerminated(kubectl) 84 }) 85 86 It("Tests upgrade and downgrade from a Cilium stable image to master", func() { 87 var assertUpgradeSuccessful func() 88 assertUpgradeSuccessful, cleanupCallback = 89 InstallAndValidateCiliumUpgrades(kubectl, helpers.CiliumStableVersion, helpers.CiliumDeveloperImage) 90 assertUpgradeSuccessful() 91 }) 92 }) 93 94 // InstallAndValidateCiliumUpgrades installs and tests if the oldVersion can be 95 // upgrade to the newVersion and if the newVersion can be downgraded to the 96 // oldVersion. It returns two callbacks, the first one is the assertfunction 97 // that need to run, and the second one are the cleanup actions 98 func InstallAndValidateCiliumUpgrades(kubectl *helpers.Kubectl, oldVersion, newVersion string) (func(), func()) { 99 canRun, err := helpers.CanRunK8sVersion(oldVersion, helpers.GetCurrentK8SEnv()) 100 ExpectWithOffset(1, err).To(BeNil(), "Unable to get k8s constraints for %s", oldVersion) 101 if !canRun { 102 Skip(fmt.Sprintf( 103 "Cilium %q is not supported in K8s %q. Skipping upgrade/downgrade tests.", 104 oldVersion, helpers.GetCurrentK8SEnv())) 105 return func() {}, func() {} 106 } 107 108 SkipIfFlannel() 109 110 apps := []string{helpers.App1, helpers.App2, helpers.App3} 111 app1Service := "app1-service" 112 113 cleanupCallback := func() { 114 kubectl.Delete(migrateSVCClient) 115 kubectl.Delete(migrateSVCServer) 116 kubectl.Delete(l7Policy) 117 kubectl.Delete(demoPath) 118 119 kubectl.DeleteETCDOperator() 120 121 ExpectAllPodsTerminated(kubectl) 122 123 // make sure we clean everything up before doing any other test 124 err := kubectl.CiliumInstall([]string{ 125 "--set global.cleanState=true", 126 }) 127 128 ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be deployed", newVersion) 129 err = kubectl.WaitForCiliumInitContainerToFinish() 130 ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be clean up environment", newVersion) 131 132 if res := kubectl.Delete(helpers.DNSDeployment()); !res.WasSuccessful() { 133 log.Warningf("Unable to delete CoreDNS deployment: %s", res.OutputPrettyPrint()) 134 } 135 136 if err := kubectl.CiliumUninstall([]string{}); err != nil { 137 log.WithError(err).Warning("Unable to uninstall Cilium") 138 } 139 } 140 141 testfunc := func() { 142 By("Deleting Cilium, CoreDNS, and etcd-operator...") 143 // Making sure that we deleted the cilium ds. No assert 144 // message because maybe is not present 145 if res := kubectl.DeleteResource("ds", fmt.Sprintf("-n %s cilium", helpers.KubeSystemNamespace)); !res.WasSuccessful() { 146 log.Warningf("Unable to delete Cilium DaemonSet: %s", res.OutputPrettyPrint()) 147 } 148 149 // Delete kube-dns because if not will be a restore the old 150 // endpoints from master instead of create the new ones. 151 if res := kubectl.Delete(helpers.DNSDeployment()); !res.WasSuccessful() { 152 log.Warningf("Unable to delete CoreDNS deployment: %s", res.OutputPrettyPrint()) 153 } 154 155 // Delete all etcd pods otherwise they will be kept running but 156 // the bpf endpoints will be cleaned up when we restart cilium 157 // with a clean state a couple lines bellow 158 kubectl.DeleteETCDOperator() 159 160 By("Waiting for pods to be terminated..") 161 ExpectAllPodsTerminated(kubectl) 162 163 By("Cleaning Cilium state") 164 err = kubectl.CiliumInstallVersion( 165 "cilium-ds-clean-only.yaml", 166 "cilium-cm-patch-clean-cilium-state.yaml", 167 oldVersion, 168 ) 169 Expect(err).To(BeNil(), "Cilium %q was not able to be deployed", oldVersion) 170 171 err := kubectl.WaitforPods(helpers.KubeSystemNamespace, "-l k8s-app=cilium", longTimeout) 172 ExpectWithOffset(1, err).Should(BeNil(), "Cleaning state did not complete in time") 173 174 By("Deploying Cilium") 175 err = kubectl.CiliumInstallVersion( 176 helpers.CiliumDefaultDSPatch, 177 "cilium-cm-patch.yaml", 178 oldVersion, 179 ) 180 Expect(err).To(BeNil(), "Cilium %q was not able to be deployed", oldVersion) 181 182 By("Installing kube-dns") 183 _ = kubectl.ApplyDefault(helpers.DNSDeployment()) 184 185 // Cilium is only ready if kvstore is ready, the kvstore is ready if 186 // kube-dns is running. 187 By("Cilium %q is installed and running", oldVersion) 188 ExpectCiliumReady(kubectl) 189 190 ExpectETCDOperatorReady(kubectl) 191 ExpectCiliumOperatorReady(kubectl) 192 193 By("Installing Microscope") 194 microscopeErr, microscopeCancel := kubectl.MicroscopeStart() 195 ExpectWithOffset(1, microscopeErr).To(BeNil(), "Microscope cannot be started") 196 defer microscopeCancel() 197 198 validatedImage := func(image string) { 199 By("Checking that installed image is %q", image) 200 201 filter := `{.items[*].status.containerStatuses[0].image}` 202 data, err := kubectl.GetPods( 203 helpers.KubeSystemNamespace, "-l k8s-app=cilium").Filter(filter) 204 ExpectWithOffset(1, err).To(BeNil(), "Cannot get cilium pods") 205 206 for _, val := range strings.Split(data.String(), " ") { 207 ExpectWithOffset(1, val).To(ContainSubstring(image), "Cilium image didn't update correctly") 208 } 209 } 210 211 validateEndpointsConnection := func() { 212 By("Validate that endpoints are ready before making any connection") 213 err := kubectl.CiliumEndpointWaitReady() 214 ExpectWithOffset(1, err).To(BeNil(), "Endpoints are not ready after timeout") 215 216 ExpectKubeDNSReady(kubectl) 217 218 err = kubectl.WaitForKubeDNSEntry(app1Service, helpers.DefaultNamespace) 219 ExpectWithOffset(1, err).To(BeNil(), "DNS entry is not ready after timeout") 220 221 appPods := helpers.GetAppPods(apps, helpers.DefaultNamespace, kubectl, "id") 222 223 err = kubectl.WaitForKubeDNSEntry(app1Service, helpers.DefaultNamespace) 224 ExpectWithOffset(1, err).To(BeNil(), "DNS entry is not ready after timeout") 225 226 By("Making L7 requests between endpoints") 227 res := kubectl.ExecPodCmd( 228 helpers.DefaultNamespace, appPods[helpers.App2], 229 helpers.CurlFail("http://%s/public", app1Service)) 230 ExpectWithOffset(1, res).Should(helpers.CMDSuccess(), "Cannot curl app1-service") 231 232 res = kubectl.ExecPodCmd( 233 helpers.DefaultNamespace, appPods[helpers.App2], 234 helpers.CurlFail("http://%s/private", app1Service)) 235 ExpectWithOffset(1, res).ShouldNot(helpers.CMDSuccess(), "Expect a 403 from app1-service") 236 } 237 238 // checkNoInteruptsInSVCFlows checks whether there are no 239 // interrupts in established connections to the migrate-svc service 240 // after Cilium has been upgraded / downgraded. 241 // 242 // The check is based on restart count of the Pods. We can do it so, because 243 // any interrupt in the flow makes a client to panic which makes the Pod 244 // to restart. 245 lastCount := -1 246 checkNoInteruptsInSVCFlows := func() { 247 By("No interrupts in migrated svc flows") 248 249 filter := `{.items[*].status.containerStatuses[0].restartCount}` 250 restartCount, err := kubectl.GetPods(helpers.DefaultNamespace, 251 "-l zgroup=migrate-svc").Filter(filter) 252 ExpectWithOffset(1, err).To(BeNil(), "Failed to query \"migrate-svc-server\" Pod") 253 254 currentCount := 0 255 for _, c := range strings.Split(restartCount.String(), " ") { 256 count, err := strconv.Atoi(c) 257 ExpectWithOffset(1, err).To(BeNil(), "Failed to convert count value") 258 currentCount += count 259 } 260 // The check is invoked for the first time 261 if lastCount == -1 { 262 lastCount = currentCount 263 } 264 Expect(lastCount).Should(BeIdenticalTo(currentCount), 265 "migrate-svc restart count values do not match") 266 } 267 268 By("Creating some endpoints and L7 policy") 269 270 res := kubectl.ApplyDefault(demoPath) 271 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "cannot apply dempo application") 272 273 err = kubectl.WaitforPods(helpers.DefaultNamespace, "-l zgroup=testapp", timeout) 274 Expect(err).Should(BeNil(), "Test pods are not ready after timeout") 275 276 ExpectKubeDNSReady(kubectl) 277 278 _, err = kubectl.CiliumPolicyAction( 279 helpers.DefaultNamespace, l7Policy, helpers.KubectlApply, timeout) 280 Expect(err).Should(BeNil(), "cannot import l7 policy: %v", l7Policy) 281 282 By("Creating service and clients for migration") 283 284 res = kubectl.ApplyDefault(migrateSVCServer) 285 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "cannot apply migrate-svc-server") 286 err = kubectl.WaitforPods(helpers.DefaultNamespace, "-l app=migrate-svc-server", timeout) 287 Expect(err).Should(BeNil(), "migrate-svc-server pods are not ready after timeout") 288 289 res = kubectl.ApplyDefault(migrateSVCClient) 290 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "cannot apply migrate-svc-client") 291 err = kubectl.WaitforPods(helpers.DefaultNamespace, "-l app=migrate-svc-client", timeout) 292 Expect(err).Should(BeNil(), "migrate-svc-client pods are not ready after timeout") 293 294 validateEndpointsConnection() 295 checkNoInteruptsInSVCFlows() 296 297 waitForUpdateImage := func(image string) func() bool { 298 return func() bool { 299 pods, err := kubectl.GetCiliumPods(helpers.KubeSystemNamespace) 300 if err != nil { 301 return false 302 } 303 304 filter := `{.items[*].status.containerStatuses[0].image}` 305 data, err := kubectl.GetPods( 306 helpers.KubeSystemNamespace, "-l k8s-app=cilium").Filter(filter) 307 if err != nil { 308 return false 309 } 310 number := strings.Count(data.String(), image) 311 if number == len(pods) { 312 return true 313 } 314 log.Infof("Only '%v' of '%v' cilium pods updated to the new image", 315 number, len(pods)) 316 return false 317 } 318 } 319 320 By("Install Cilium pre-flight check DaemonSet") 321 322 res = kubectl.ExecMiddle("helm template " + 323 helpers.HelmTemplate + " " + 324 "--namespace=kube-system " + 325 "--set preflight.enabled=true " + 326 fmt.Sprintf("--set preflight.image=%s ", helpers.CiliumDeveloperImage) + 327 "--set agent.enabled=false " + 328 "--set config.enabled=false " + 329 "--set operator.enabled=false " + 330 "> cilium-preflight.yaml") 331 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "Unable to generate preflight YAML") 332 333 res = kubectl.ApplyDefault("cilium-preflight.yaml") 334 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "Unable to deploy preflight manifest") 335 ExpectCiliumPreFlightInstallReady(kubectl) 336 337 // Once they are installed we can remove it 338 By("Removing Cilium pre-flight check DaemonSet") 339 kubectl.Delete("cilium-preflight.yaml") 340 341 // Need to run using the kvstore-based allocator because upgrading from 342 // kvstore-based allocator to CRD-based allocator is not currently 343 // supported at this time. 344 By("Installing Cilium using kvstore-based allocator") 345 err = kubectl.CiliumInstall([]string{ 346 "--set global.identityAllocationMode=kvstore", 347 "--set global.etcd.enabled=true", 348 "--set global.etcd.managed=true", 349 }) 350 ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be deployed", newVersion) 351 352 err = helpers.WithTimeout( 353 waitForUpdateImage(newVersion), 354 "Cilium Pods are not updating correctly", 355 &helpers.TimeoutConfig{Timeout: timeout}) 356 ExpectWithOffset(1, err).To(BeNil(), "Pods are not updating") 357 358 err = kubectl.WaitforPods( 359 helpers.KubeSystemNamespace, "-l k8s-app=cilium", timeout) 360 ExpectWithOffset(1, err).Should(BeNil(), "Cilium is not ready after timeout") 361 362 validatedImage(newVersion) 363 ExpectCiliumReady(kubectl) 364 ExpectCiliumOperatorReady(kubectl) 365 366 validateEndpointsConnection() 367 checkNoInteruptsInSVCFlows() 368 369 By("Downgrading cilium to %s image", oldVersion) 370 371 err = kubectl.CiliumInstallVersion( 372 helpers.CiliumDefaultDSPatch, 373 helpers.CiliumConfigMapPatch, 374 oldVersion, 375 ) 376 ExpectWithOffset(1, err).To(BeNil(), "Cilium %q was not able to be deployed", oldVersion) 377 378 err = helpers.WithTimeout( 379 waitForUpdateImage(oldVersion), 380 "Cilium Pods are not updating correctly", 381 &helpers.TimeoutConfig{Timeout: timeout}) 382 ExpectWithOffset(1, err).To(BeNil(), "Pods are not updating") 383 384 err = kubectl.WaitforPods( 385 helpers.KubeSystemNamespace, "-l k8s-app=cilium", timeout) 386 ExpectWithOffset(1, err).Should(BeNil(), "Cilium is not ready after timeout") 387 388 validatedImage(oldVersion) 389 ExpectCiliumOperatorReady(kubectl) 390 391 validateEndpointsConnection() 392 checkNoInteruptsInSVCFlows() 393 } 394 return testfunc, cleanupCallback 395 }