github.phpd.cn/cilium/cilium@v1.6.12/test/runtime/chaos.go (about) 1 // Copyright 2017 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package RuntimeTest 16 17 import ( 18 "context" 19 "crypto/md5" 20 "fmt" 21 "strings" 22 "sync" 23 "time" 24 25 "github.com/cilium/cilium/pkg/identity" 26 . "github.com/cilium/cilium/test/ginkgo-ext" 27 "github.com/cilium/cilium/test/helpers" 28 "github.com/cilium/cilium/test/helpers/constants" 29 30 . "github.com/onsi/gomega" 31 ) 32 33 var _ = Describe("RuntimeChaos", func() { 34 35 var vm *helpers.SSHMeta 36 37 BeforeAll(func() { 38 vm = helpers.InitRuntimeHelper(helpers.Runtime, logger) 39 ExpectCiliumReady(vm) 40 41 vm.ContainerCreate(helpers.Client, constants.NetperfImage, helpers.CiliumDockerNetwork, "-l id.client") 42 vm.ContainerCreate(helpers.Server, constants.NetperfImage, helpers.CiliumDockerNetwork, "-l id.server") 43 }) 44 45 BeforeEach(func() { 46 Expect(vm.WaitEndpointsReady()).Should(BeTrue(), "Endpoints are not ready after timeout") 47 }) 48 49 AfterAll(func() { 50 vm.ContainerRm(helpers.Client) 51 vm.ContainerRm(helpers.Server) 52 vm.SampleContainersActions(helpers.Delete, helpers.CiliumDockerNetwork) 53 vm.CloseSSHClient() 54 }) 55 56 AfterEach(func() { 57 vm.PolicyDelAll() 58 }) 59 60 JustAfterEach(func() { 61 vm.ValidateNoErrorsInLogs(CurrentGinkgoTestDescription().Duration) 62 ExpectDockerContainersMatchCiliumEndpoints(vm) 63 }) 64 65 AfterFailed(func() { 66 vm.ReportFailed() 67 }) 68 69 It("Endpoint recovery on restart", func() { 70 hasher := md5.New() 71 72 originalIps := vm.Exec(` 73 curl -s --unix-socket /var/run/cilium/cilium.sock \ 74 http://localhost/v1beta/healthz/ | jq ".ipam.ipv4|length"`) 75 76 // List the endpoints, but skip the reserved:health endpoint 77 // (4) because it doesn't matter if that endpoint is different. 78 // Remove fields that are expected to change across restart. 79 // 80 // We don't use -o jsonpath... here due to GH-2395. 81 // 82 // jq 'map(select(.status.identity.id != 4), del(.status.controllers, ..., (.status.identity.labels | sort)))' 83 filterHealthEP := fmt.Sprintf("select(.status.identity.id != %d)", helpers.ReservedIdentityHealth) 84 nonPersistentEndpointFields := strings.Join([]string{ 85 ".status.controllers", // Timestamps, UUIDs 86 ".status.labels", // Slice ordering 87 ".status.log", // Timestamp 88 ".status.identity.labels", // Slice ordering 89 ".status.policy", // Allowed identities order 90 }, ", ") 91 // Delete fields we're not interested in 92 filterFields := fmt.Sprintf("del(%s)", nonPersistentEndpointFields) 93 // Go back and add the identity labels back into the output 94 getSortedLabels := "(.status.identity.labels | sort)" 95 jqCmd := fmt.Sprintf("jq 'map(%s) | map(%s, %s)'", filterHealthEP, filterFields, getSortedLabels) 96 endpointListCmd := fmt.Sprintf("cilium endpoint list -o json | %s", jqCmd) 97 originalEndpointList := vm.Exec(endpointListCmd) 98 99 err := vm.RestartCilium() 100 Expect(err).Should(BeNil(), "restarting Cilium failed") 101 102 ips := vm.Exec(` 103 curl -s --unix-socket /var/run/cilium/cilium.sock \ 104 http://localhost/v1beta/healthz/ | jq ".ipam.ipv4|length"`) 105 Expect(originalIps.Output().String()).To(Equal(ips.Output().String())) 106 107 EndpointList := vm.Exec(endpointListCmd) 108 By("original: %s", originalEndpointList.Output().String()) 109 By("new: %s", EndpointList.Output().String()) 110 Expect(EndpointList.Output().String()).To(Equal(originalEndpointList.Output().String())) 111 Expect(hasher.Sum(EndpointList.Output().Bytes())).To( 112 Equal(hasher.Sum(originalEndpointList.Output().Bytes()))) 113 114 }, 300) 115 116 It("removing leftover Cilium interfaces", func() { 117 originalLinks, err := vm.Exec("sudo ip link show | wc -l").IntOutput() 118 Expect(err).Should(BeNil()) 119 120 _ = vm.Exec("sudo ip link add lxc12345 type veth peer name tmp54321") 121 122 err = vm.RestartCilium() 123 Expect(err).Should(BeNil(), "restarting Cilium failed") 124 125 status := vm.Exec("sudo ip link show lxc12345") 126 status.ExpectFail("leftover interface were not properly cleaned up") 127 128 links, err := vm.Exec("sudo ip link show | wc -l").IntOutput() 129 Expect(err).Should(BeNil(), "Cannot get link layer information") 130 Expect(links).Should(Equal(originalLinks), 131 "Some network interfaces were accidentally removed!") 132 }, 300) 133 134 It("Checking for file-descriptor leak", func() { 135 threshold := 5000 136 fds, err := vm.Exec("sudo lsof -p `pidof cilium-node-monitor` -p `pidof cilium-agent` -p `pidof cilium-docker` 2>/dev/null | wc -l").IntOutput() 137 Expect(err).Should(BeNil()) 138 139 Expect(fds).To(BeNumerically("<", threshold), 140 fmt.Sprintf("%d file descriptors open from Cilium processes", fds)) 141 }, 300) 142 143 It("Checking that during restart no traffic is dropped using Egress + Ingress Traffic", func() { 144 By("Installing sample containers") 145 vm.SampleContainersActions(helpers.Create, helpers.CiliumDockerNetwork) 146 vm.PolicyDelAll().ExpectSuccess("Cannot deleted all policies") 147 148 _, err := vm.PolicyImportAndWait(vm.GetFullPath(policiesL4Json), helpers.HelperTimeout) 149 Expect(err).Should(BeNil(), "Cannot install L4 policy") 150 151 areEndpointsReady := vm.WaitEndpointsReady() 152 Expect(areEndpointsReady).Should(BeTrue(), "Endpoints are not ready after timeout") 153 154 By("Starting background connection from app2 to httpd1 container") 155 ctx, cancel := context.WithCancel(context.Background()) 156 defer cancel() 157 srvIP, err := vm.ContainerInspectNet(helpers.Httpd1) 158 Expect(err).Should(BeNil(), "Cannot get httpd1 server address") 159 type BackgroundTestAsserts struct { 160 res *helpers.CmdRes 161 time time.Time 162 } 163 backgroundChecks := []*BackgroundTestAsserts{} 164 var wg sync.WaitGroup 165 wg.Add(1) 166 go func() { 167 for { 168 select { 169 default: 170 res := vm.ContainerExec( 171 helpers.App1, 172 helpers.CurlFail("http://%s/", srvIP[helpers.IPv4])) 173 assert := &BackgroundTestAsserts{ 174 res: res, 175 time: time.Now(), 176 } 177 backgroundChecks = append(backgroundChecks, assert) 178 case <-ctx.Done(): 179 wg.Done() 180 return 181 } 182 } 183 }() 184 // Sleep a bit to make sure that the goroutine starts. 185 time.Sleep(50 * time.Millisecond) 186 187 err = vm.RestartCilium() 188 Expect(err).Should(BeNil(), "restarting Cilium failed") 189 190 By("Stopping background connections") 191 cancel() 192 wg.Wait() 193 194 GinkgoPrint("Made %d connections in total", len(backgroundChecks)) 195 Expect(backgroundChecks).ShouldNot(BeEmpty(), "No background connections were made") 196 for _, check := range backgroundChecks { 197 check.res.ExpectSuccess("Curl from app2 to httpd1 should work but it failed at %s", check.time) 198 } 199 }) 200 201 It("Validate that delete events on KVStore do not release in use identities", func() { 202 // This validates that if a kvstore delete event is send the identity 203 // is not release if it is in use. For more info issue #7240 204 205 prefix := "http://127.0.0.1:8500/v1/kv/cilium/state/identities/v1/id" 206 identities, err := vm.GetEndpointsIdentityIds() 207 Expect(err).To(BeNil(), "Cannot get identities") 208 209 for _, identityID := range identities { 210 action := helpers.CurlFail("%s/%s -X DELETE", prefix, identityID) 211 vm.Exec(action).ExpectSuccess("Key %s cannot be deleted correctly", identityID) 212 } 213 214 newidentities, err := vm.GetEndpointsIdentityIds() 215 Expect(err).To(BeNil(), "Cannot get identities after delete keys") 216 217 Expect(newidentities).To(Equal(identities), 218 "Identities are not the same after delete keys from kvstore") 219 220 for _, identityID := range newidentities { 221 id, err := identity.ParseNumericIdentity(identityID) 222 Expect(err).To(BeNil(), "Cannot parse identity") 223 if id.IsReservedIdentity() { 224 continue 225 } 226 action := helpers.CurlFail("%s/%s", prefix, identityID) 227 vm.Exec(action).ExpectSuccess("Key %s cannot is not restored correctly", identityID) 228 } 229 }) 230 231 It("Delete event on KVStore with CIDR identities", func() { 232 // Validate that if when a delete event happens on kvstore the CIDR 233 // identity (local one) is not deleted. This happens on the past where 234 // other cilium agent executes a deletion of a key that was used by 235 // another cilium agent, that means that on policy regeneration the 236 // identity was not present. 237 jqFilter := `jq -r '.[] | select(.labels|join("") | contains("cidr")) | .id'` 238 prefix := "http://127.0.0.1:8500/v1/kv/cilium/state/identities/v1/id" 239 240 By("Installing CIDR policy") 241 policy := ` 242 [{ 243 "endpointSelector": {"matchLabels":{"test":""}}, 244 "egress": 245 [{ 246 "toCIDR": [ 247 "10.10.10.10/32" 248 ] 249 }] 250 }] 251 ` 252 _, err := vm.PolicyRenderAndImport(policy) 253 Expect(err).To(BeNil(), "Unable to import policy: %s", err) 254 255 CIDRIdentities := vm.Exec(fmt.Sprintf(`cilium identity list -o json| %s`, jqFilter)) 256 CIDRIdentities.ExpectSuccess("Cannot get cidr identities") 257 258 for _, identityID := range CIDRIdentities.ByLines() { 259 action := helpers.CurlFail("%s/%s -X DELETE", prefix, identityID) 260 vm.Exec(action).ExpectSuccess("Key %s cannot be deleted correctly", identityID) 261 } 262 263 newCIDRIdentities := vm.Exec(fmt.Sprintf(`cilium identity list -o json| %s`, jqFilter)) 264 newCIDRIdentities.ExpectSuccess("Cannot get cidr identities") 265 266 Expect(CIDRIdentities.ByLines()).To(Equal(newCIDRIdentities.ByLines()), 267 "Identities are deleted in kvstore delete event") 268 }) 269 })