github.com/cilium/cilium@v1.16.2/test/k8s/fqdn.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package k8sTest 5 6 import ( 7 "context" 8 "fmt" 9 "net" 10 11 . "github.com/onsi/gomega" 12 13 . "github.com/cilium/cilium/test/ginkgo-ext" 14 "github.com/cilium/cilium/test/helpers" 15 ) 16 17 // The 5.4 CI job is intended to catch BPF complexity regressions and as such 18 // doesn't need to execute this test suite. 19 var _ = SkipDescribeIf(helpers.RunsOn54Kernel, "K8sAgentFQDNTest", func() { 20 var ( 21 kubectl *helpers.Kubectl 22 23 demoManifest = "" 24 ciliumFilename string 25 26 apps = []string{helpers.App2, helpers.App3} 27 appPods map[string]string 28 29 // The IPs are updated in BeforeAll 30 worldTarget = "vagrant-cache.ci.cilium.io" 31 worldTargetIP = "147.75.38.95" 32 worldInvalidTarget = "cilium.io" 33 worldInvalidTargetIP = "104.198.14.52" 34 ) 35 36 BeforeAll(func() { 37 // In case the IPs changed from above, update them here 38 var lookupErr error 39 err := helpers.WithTimeout(func() bool { 40 addrs, err2 := net.LookupHost(worldTarget) 41 if err2 != nil { 42 lookupErr = fmt.Errorf("error looking up target domain: %w", err2) 43 return false 44 } 45 worldTargetIP = addrs[0] 46 return true 47 }, fmt.Sprintf("Could not get %s IP", worldTarget), &helpers.TimeoutConfig{Timeout: helpers.HelperTimeout}) 48 Expect(err).Should(BeNil(), "Error obtaining IP for test: %s", lookupErr) 49 50 lookupErr = nil 51 err = helpers.WithTimeout(func() bool { 52 addrs, err2 := net.LookupHost(worldInvalidTarget) 53 if err2 != nil { 54 lookupErr = fmt.Errorf("error looking up target domain: %w", err2) 55 return false 56 } 57 worldInvalidTargetIP = addrs[0] 58 return true 59 }, fmt.Sprintf("Could not get %s IP", worldInvalidTarget), &helpers.TimeoutConfig{Timeout: helpers.HelperTimeout}) 60 Expect(err).Should(BeNil(), "Error obtaining IP for test: %s", lookupErr) 61 62 kubectl = helpers.CreateKubectl(helpers.K8s1VMName(), logger) 63 demoManifest = helpers.ManifestGet(kubectl.BasePath(), "demo.yaml") 64 65 ciliumFilename = helpers.TimestampFilename("cilium.yaml") 66 DeployCiliumAndDNS(kubectl, ciliumFilename) 67 68 By("Applying demo manifest") 69 res := kubectl.ApplyDefault(demoManifest) 70 res.ExpectSuccess("Demo config cannot be deployed") 71 72 err = kubectl.WaitforPods(helpers.DefaultNamespace, "-l zgroup=testapp", helpers.HelperTimeout) 73 Expect(err).Should(BeNil(), "Testapp is not ready after timeout") 74 75 appPods = helpers.GetAppPods(apps, helpers.DefaultNamespace, kubectl, "id") 76 77 // Validate that coredns is reachable from test pods 78 err = kubectl.NslookupInPod(helpers.DefaultNamespace, appPods[helpers.App2], "kube-dns.kube-system.svc.cluster.local") 79 Expect(err).Should(BeNil(), "Error reaching kube-dns before test: %s", err) 80 }) 81 82 AfterFailed(func() { 83 kubectl.CiliumReport("cilium-dbg service list", "cilium-dbg endpoint list") 84 }) 85 86 AfterAll(func() { 87 _ = kubectl.Delete(demoManifest) 88 ExpectAllPodsTerminated(kubectl) 89 90 UninstallCiliumFromManifest(kubectl, ciliumFilename) 91 kubectl.CloseSSHClient() 92 }) 93 94 AfterEach(func() { 95 _ = kubectl.Exec(fmt.Sprintf("%s delete --all cnp", helpers.KubectlCmd)) 96 }) 97 98 It("Restart Cilium validate that FQDN is still working", func() { 99 // Test functionality: 100 // - When Cilium is running) Connectivity from App2 application can 101 // connect to DNS because dns-proxy filter the DNS request. If the 102 // connection is made correctly the IP is whitelisted by the FQDN rule 103 // until the DNS TTL expires. 104 // - When Cilium is not running) The DNS-proxy is not working, so the IP 105 // connectivity to an existing IP that was queried before will work, 106 // meanwhile connections using new DNS request will fail. 107 // - On restart) Cilium will restore the IPS that were white-listted in 108 // the FQDN and connection will work as normal. 109 110 ciliumPodK8s1, err := kubectl.GetCiliumPodOnNode(helpers.K8s1) 111 Expect(err).Should(BeNil(), "Cannot get cilium pod on k8s1") 112 monitorRes1, monitorCancel1 := kubectl.MonitorStart(ciliumPodK8s1) 113 ciliumPodK8s2, err := kubectl.GetCiliumPodOnNode(helpers.K8s2) 114 Expect(err).Should(BeNil(), "Cannot get cilium pod on k8s2") 115 monitorRes2, monitorCancel2 := kubectl.MonitorStart(ciliumPodK8s2) 116 defer func() { 117 monitorCancel1() 118 monitorCancel2() 119 helpers.WriteToReportFile(monitorRes1.CombineOutput().Bytes(), "fqdn-restart-cilium-monitor-k8s1.log") 120 helpers.WriteToReportFile(monitorRes2.CombineOutput().Bytes(), "fqdn-restart-cilium-monitor-k8s2.log") 121 }() 122 123 connectivityTest := func() { 124 By("Testing that connection from %q to %q should work", 125 appPods[helpers.App2], worldTarget) 126 res := kubectl.ExecPodCmd( 127 helpers.DefaultNamespace, appPods[helpers.App2], 128 helpers.CurlFail(worldTarget)) 129 ExpectWithOffset(1, res).To(helpers.CMDSuccess(), "%q cannot curl to %q", 130 appPods[helpers.App2], worldTarget) 131 132 By("Testing that connection from %q to %q shouldn't work", 133 appPods[helpers.App2], worldInvalidTarget) 134 res = kubectl.ExecPodCmd( 135 helpers.DefaultNamespace, appPods[helpers.App2], 136 helpers.CurlFail(worldInvalidTarget)) 137 ExpectWithOffset(1, res).ShouldNot(helpers.CMDSuccess(), 138 "%q can curl to %q when it should fail", appPods[helpers.App2], worldInvalidTarget) 139 140 By("Testing that connection from %q to %q works", 141 appPods[helpers.App2], worldTargetIP) 142 res = kubectl.ExecPodCmd( 143 helpers.DefaultNamespace, appPods[helpers.App2], 144 helpers.CurlFail(worldTargetIP)) 145 res.ExpectSuccess("%q cannot curl to %q during restart", helpers.App2, worldTargetIP) 146 147 By("Testing that connection from %q to %q should not work", 148 appPods[helpers.App2], worldInvalidTargetIP) 149 res = kubectl.ExecPodCmd( 150 helpers.DefaultNamespace, appPods[helpers.App2], 151 helpers.CurlFail(worldInvalidTargetIP)) 152 res.ExpectFail("%q can connect when it should not work", helpers.App2) 153 } 154 155 fqndProxyPolicy := helpers.ManifestGet(kubectl.BasePath(), "fqdn-proxy-policy.yaml") 156 157 _, err = kubectl.CiliumPolicyAction( 158 helpers.DefaultNamespace, fqndProxyPolicy, 159 helpers.KubectlApply, helpers.HelperTimeout) 160 Expect(err).To(BeNil(), "Cannot install fqdn proxy policy") 161 162 connectivityTest() 163 164 // Collect numeric identity of worldTargetIP and the worldTarget selector 165 ctx, cancel := context.WithCancel(context.Background()) 166 defer cancel() 167 cmdTargetIdentity := fmt.Sprintf(`cilium-dbg ip list -o json | jq -r '.[] | select(.cidr == "%s/32") | .identity'`, worldTargetIP) 168 worldTargetIdentityBefore, err := kubectl.CiliumExecContext(ctx, ciliumPodK8s1, cmdTargetIdentity).IntOutput() 169 Expect(err).To(BeNil(), "Identity of IP %s for ToFQDN selector %s not found in IPCache", worldTargetIP, worldTarget) 170 Expect(worldTargetIdentityBefore).NotTo(BeZero()) 171 172 cmdTargetSelector := fmt.Sprintf(`cilium-dbg policy selectors list -o json | jq '.[] | select(.selector | test("%s")) | .identities[] | .'`, worldTarget) 173 worldTargetSelectorBefore, err := kubectl.CiliumExecContext(ctx, ciliumPodK8s1, cmdTargetSelector).IntOutput() 174 Expect(err).To(BeNil(), "ToFQDN selector %s does not seem to select a numeric identity", worldTarget) 175 Expect(worldTargetSelectorBefore).NotTo(BeZero()) 176 177 Expect(worldTargetIdentityBefore).To(Equal(worldTargetSelectorBefore), "Identity selected by ToFQDN selector %s does not match identity of IP %s", worldTarget, worldTargetIP) 178 179 By("restarting cilium pods") 180 181 // kill pid 1 in each cilium pod 182 cmd := fmt.Sprintf("%[1]s get pods -l k8s-app=cilium -n %[2]s | tail -n +2 | cut -d ' ' -f 1 | xargs -I{} %[1]s exec -n %[2]s {} -- kill 1", 183 helpers.KubectlCmd, helpers.CiliumNamespace) 184 quit, run := kubectl.RepeatCommandInBackground(cmd) 185 channelClosed := false 186 defer func() { 187 if !channelClosed { 188 close(quit) 189 } 190 }() 191 <-run // waiting for first run to finish 192 193 By("Testing connectivity when cilium is restoring using IPS without DNS") 194 res := kubectl.ExecPodCmd( 195 helpers.DefaultNamespace, appPods[helpers.App2], 196 helpers.CurlFail(worldTargetIP)) 197 res.ExpectSuccess("%q cannot curl to %q during restart", helpers.App2, worldTargetIP) 198 199 res = kubectl.ExecPodCmd( 200 helpers.DefaultNamespace, appPods[helpers.App2], 201 helpers.CurlFail(worldInvalidTargetIP)) 202 res.ExpectFail("%q can connect when it should not work", helpers.App2) 203 204 // This test is failing consistently in quarantine, see #11213. Disable it for now 205 // to verify the rest of the test is running stable in quarantine. Once this is the 206 // case we could move the rest of the test out of quarantine and quarantine only 207 // this part. 208 if false { 209 // Re-run connectivity test while Cilium is still restarting. This should succeed as the same 210 // DNS names were used in a connectivity test before the restart. 211 connectivityTest() 212 } 213 214 channelClosed = true 215 close(quit) 216 217 ExpectAllPodsTerminated(kubectl) 218 ExpectCiliumReady(kubectl) 219 220 // Restart monitoring after Cilium restart 221 monitorRes1After, monitorCancel1After := kubectl.MonitorStart(ciliumPodK8s1) 222 monitorRes2After, monitorCancel2After := kubectl.MonitorStart(ciliumPodK8s2) 223 defer func() { 224 monitorCancel1After() 225 monitorCancel2After() 226 helpers.WriteToReportFile(monitorRes1After.CombineOutput().Bytes(), "fqdn-after-restart-cilium-monitor-k8s1.log") 227 helpers.WriteToReportFile(monitorRes2After.CombineOutput().Bytes(), "fqdn-after-restart-cilium-monitor-k8s2.log") 228 }() 229 230 // @TODO This endpoint ready call SHOULD NOT be here 231 // Here some packets can be lost due to two different scenarios: 232 // 233 // 1) On restore the endpoint/fqdn policies, the identity ID for the 234 // CIDRSet can be different, so if one endpoint start to regenerate and 235 // other still have the old identity things can mess around and some 236 // IPs are not white listed correctly. To prevent this, a restore for 237 // local-identities will be added in the future. 238 // 239 // 2) On restore, the Kubernetes watcher is sending the CNP back to 240 // Cilium, and before the endoint is restored the CNP can be applied 241 // without the ToCIDRSet, this means that there is no TOCIDR rule in 242 // the cilium policy and traffic will be drop. 243 244 // As mentioned above, these endpoints ready should not be there, the only 245 // reason to have this piece of code here is to reduce a flaky test. 246 err = kubectl.CiliumEndpointWaitReady() 247 Expect(err).To(BeNil(), "Endpoints are not ready after Cilium restarts") 248 249 // Collect numeric identity of worldTargetIP and the worldTarget selector after restart 250 worldTargetIdentityAfter, err := kubectl.CiliumExecContext(ctx, ciliumPodK8s1, cmdTargetIdentity).IntOutput() 251 Expect(err).To(BeNil(), "Identity of IP %s for ToFQDN selector %s not found in IPCache after restart", worldTargetIP, worldTarget) 252 Expect(worldTargetIdentityAfter).NotTo(BeZero()) 253 254 worldTargetSelectorAfter, err := kubectl.CiliumExecContext(ctx, ciliumPodK8s1, cmdTargetSelector).IntOutput() 255 Expect(err).To(BeNil(), "ToFQDN selector %s does not seem to select a numeric identity after restart", worldTarget) 256 Expect(worldTargetSelectorAfter).NotTo(BeZero()) 257 258 Expect(worldTargetIdentityAfter).To(Equal(worldTargetSelectorAfter), "Identity selected by ToFQDN selector %s does not match identity of IP %s after restart", worldTarget, worldTargetIP) 259 Expect(worldTargetIdentityAfter).To(Equal(worldTargetIdentityBefore), "Identity IP %s changed after restart", worldTargetIP) 260 261 By("Testing connectivity when cilium is *restored* using IPS without DNS") 262 res = kubectl.ExecPodCmd( 263 helpers.DefaultNamespace, appPods[helpers.App2], 264 helpers.CurlFail(worldTargetIP)) 265 res.ExpectSuccess("%q cannot curl to %q after restart", helpers.App2, worldTargetIP) 266 267 res = kubectl.ExecPodCmd( 268 helpers.DefaultNamespace, appPods[helpers.App2], 269 helpers.CurlFail(worldInvalidTargetIP)) 270 res.ExpectFail("%q can connect when it should not work", helpers.App2) 271 272 By("Testing connectivity using DNS request when cilium is restored correctly") 273 connectivityTest() 274 }) 275 276 SkipItIf(helpers.RunsOnAKS, "Validate that multiple specs are working correctly", func() { 277 // To make sure that UUID in multiple specs are plumbed correctly to 278 // Cilium Policy 279 fqdnPolicy := helpers.ManifestGet(kubectl.BasePath(), "fqdn-proxy-multiple-specs.yaml") 280 world1Target := worldTarget 281 world2Target := worldInvalidTarget 282 283 _, err := kubectl.CiliumPolicyAction( 284 helpers.DefaultNamespace, fqdnPolicy, 285 helpers.KubectlApply, helpers.HelperTimeout) 286 Expect(err).To(BeNil(), "Cannot install fqdn proxy policy") 287 288 By("Validating APP2 policy connectivity") 289 res := kubectl.ExecPodCmd( 290 helpers.DefaultNamespace, appPods[helpers.App2], 291 helpers.CurlFail("--retry 5 "+world1Target)) 292 res.ExpectSuccess("Can't connect to a valid target when it should work") 293 294 res = kubectl.ExecPodCmd( 295 helpers.DefaultNamespace, appPods[helpers.App2], 296 helpers.CurlFail(world2Target)) 297 res.ExpectFail("Can connect to a valid target when it should NOT work") 298 299 By("Validating APP3 policy connectivity") 300 301 res = kubectl.ExecPodCmd( 302 helpers.DefaultNamespace, appPods[helpers.App3], 303 helpers.CurlWithRetries(world2Target, 5, true)) 304 res.ExpectSuccess("Can't connect to a valid target when it should work") 305 306 res = kubectl.ExecPodCmd( 307 helpers.DefaultNamespace, appPods[helpers.App3], 308 helpers.CurlFail(world1Target)) 309 res.ExpectFail("Can connect to a valid target when it should NOT work") 310 }) 311 312 SkipItIf(helpers.RunsOnAKS, "Validate that FQDN policy continues to work after being updated", func() { 313 // To make sure that UUID in multiple specs are plumbed correctly to 314 // Cilium Policy 315 fqdnPolicy := helpers.ManifestGet(kubectl.BasePath(), "fqdn-proxy-multiple-specs.yaml") 316 world1Target := worldTarget 317 world2Target := worldInvalidTarget 318 319 _, err := kubectl.CiliumPolicyAction( 320 helpers.DefaultNamespace, fqdnPolicy, 321 helpers.KubectlApply, helpers.HelperTimeout) 322 Expect(err).To(BeNil(), "Cannot install fqdn proxy policy") 323 324 By("Validating APP2 policy connectivity") 325 res := kubectl.ExecPodCmd( 326 helpers.DefaultNamespace, appPods[helpers.App2], 327 helpers.CurlFail("--retry 5 "+world1Target)) 328 res.ExpectSuccess("Can't connect to a valid target when it should work") 329 330 res = kubectl.ExecPodCmd( 331 helpers.DefaultNamespace, appPods[helpers.App2], 332 helpers.CurlFail(world2Target)) 333 res.ExpectFail("Can connect to a valid target when it should NOT work") 334 335 By("Updating the policy to include an extra FQDN allow statement") 336 fqdnPolicy2 := helpers.ManifestGet(kubectl.BasePath(), "fqdn-proxy-multiple-specs-v2.yaml") 337 _, err = kubectl.CiliumPolicyAction( 338 helpers.DefaultNamespace, fqdnPolicy2, 339 helpers.KubectlApply, helpers.HelperTimeout) 340 Expect(err).To(BeNil(), "Cannot install fqdn proxy policy") 341 342 By("Validating APP2 policy connectivity after policy change") 343 res = kubectl.ExecPodCmd( 344 helpers.DefaultNamespace, appPods[helpers.App2], 345 helpers.CurlFail("--retry 5 "+world1Target)) 346 res.ExpectSuccess("Can't connect to a valid target when it should work") 347 348 res = kubectl.ExecPodCmd( 349 helpers.DefaultNamespace, appPods[helpers.App2], 350 helpers.CurlFail(world2Target)) 351 res.ExpectFail("Can connect to a valid target when it should NOT work") 352 }) 353 354 })