github.com/zhyoulun/cilium@v1.6.12/test/runtime/chaos.go (about)

     1  // Copyright 2017 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package RuntimeTest
    16  
    17  import (
    18  	"context"
    19  	"crypto/md5"
    20  	"fmt"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/cilium/cilium/pkg/identity"
    26  	. "github.com/cilium/cilium/test/ginkgo-ext"
    27  	"github.com/cilium/cilium/test/helpers"
    28  	"github.com/cilium/cilium/test/helpers/constants"
    29  
    30  	. "github.com/onsi/gomega"
    31  )
    32  
    33  var _ = Describe("RuntimeChaos", func() {
    34  
    35  	var vm *helpers.SSHMeta
    36  
    37  	BeforeAll(func() {
    38  		vm = helpers.InitRuntimeHelper(helpers.Runtime, logger)
    39  		ExpectCiliumReady(vm)
    40  
    41  		vm.ContainerCreate(helpers.Client, constants.NetperfImage, helpers.CiliumDockerNetwork, "-l id.client")
    42  		vm.ContainerCreate(helpers.Server, constants.NetperfImage, helpers.CiliumDockerNetwork, "-l id.server")
    43  	})
    44  
    45  	BeforeEach(func() {
    46  		Expect(vm.WaitEndpointsReady()).Should(BeTrue(), "Endpoints are not ready after timeout")
    47  	})
    48  
    49  	AfterAll(func() {
    50  		vm.ContainerRm(helpers.Client)
    51  		vm.ContainerRm(helpers.Server)
    52  		vm.SampleContainersActions(helpers.Delete, helpers.CiliumDockerNetwork)
    53  		vm.CloseSSHClient()
    54  	})
    55  
    56  	AfterEach(func() {
    57  		vm.PolicyDelAll()
    58  	})
    59  
    60  	JustAfterEach(func() {
    61  		vm.ValidateNoErrorsInLogs(CurrentGinkgoTestDescription().Duration)
    62  		ExpectDockerContainersMatchCiliumEndpoints(vm)
    63  	})
    64  
    65  	AfterFailed(func() {
    66  		vm.ReportFailed()
    67  	})
    68  
    69  	It("Endpoint recovery on restart", func() {
    70  		hasher := md5.New()
    71  
    72  		originalIps := vm.Exec(`
    73  		curl -s --unix-socket /var/run/cilium/cilium.sock \
    74  		http://localhost/v1beta/healthz/ | jq ".ipam.ipv4|length"`)
    75  
    76  		// List the endpoints, but skip the reserved:health endpoint
    77  		// (4) because it doesn't matter if that endpoint is different.
    78  		// Remove fields that are expected to change across restart.
    79  		//
    80  		// We don't use -o jsonpath... here due to GH-2395.
    81  		//
    82  		// jq 'map(select(.status.identity.id != 4), del(.status.controllers, ..., (.status.identity.labels | sort)))'
    83  		filterHealthEP := fmt.Sprintf("select(.status.identity.id != %d)", helpers.ReservedIdentityHealth)
    84  		nonPersistentEndpointFields := strings.Join([]string{
    85  			".status.controllers",     // Timestamps, UUIDs
    86  			".status.labels",          // Slice ordering
    87  			".status.log",             // Timestamp
    88  			".status.identity.labels", // Slice ordering
    89  			".status.policy",          // Allowed identities order
    90  		}, ", ")
    91  		// Delete fields we're not interested in
    92  		filterFields := fmt.Sprintf("del(%s)", nonPersistentEndpointFields)
    93  		// Go back and add the identity labels back into the output
    94  		getSortedLabels := "(.status.identity.labels | sort)"
    95  		jqCmd := fmt.Sprintf("jq 'map(%s) | map(%s, %s)'", filterHealthEP, filterFields, getSortedLabels)
    96  		endpointListCmd := fmt.Sprintf("cilium endpoint list -o json | %s", jqCmd)
    97  		originalEndpointList := vm.Exec(endpointListCmd)
    98  
    99  		err := vm.RestartCilium()
   100  		Expect(err).Should(BeNil(), "restarting Cilium failed")
   101  
   102  		ips := vm.Exec(`
   103  		curl -s --unix-socket /var/run/cilium/cilium.sock \
   104  		http://localhost/v1beta/healthz/ | jq ".ipam.ipv4|length"`)
   105  		Expect(originalIps.Output().String()).To(Equal(ips.Output().String()))
   106  
   107  		EndpointList := vm.Exec(endpointListCmd)
   108  		By("original: %s", originalEndpointList.Output().String())
   109  		By("new: %s", EndpointList.Output().String())
   110  		Expect(EndpointList.Output().String()).To(Equal(originalEndpointList.Output().String()))
   111  		Expect(hasher.Sum(EndpointList.Output().Bytes())).To(
   112  			Equal(hasher.Sum(originalEndpointList.Output().Bytes())))
   113  
   114  	}, 300)
   115  
   116  	It("removing leftover Cilium interfaces", func() {
   117  		originalLinks, err := vm.Exec("sudo ip link show | wc -l").IntOutput()
   118  		Expect(err).Should(BeNil())
   119  
   120  		_ = vm.Exec("sudo ip link add lxc12345 type veth peer name tmp54321")
   121  
   122  		err = vm.RestartCilium()
   123  		Expect(err).Should(BeNil(), "restarting Cilium failed")
   124  
   125  		status := vm.Exec("sudo ip link show lxc12345")
   126  		status.ExpectFail("leftover interface were not properly cleaned up")
   127  
   128  		links, err := vm.Exec("sudo ip link show | wc -l").IntOutput()
   129  		Expect(err).Should(BeNil(), "Cannot get link layer information")
   130  		Expect(links).Should(Equal(originalLinks),
   131  			"Some network interfaces were accidentally removed!")
   132  	}, 300)
   133  
   134  	It("Checking for file-descriptor leak", func() {
   135  		threshold := 5000
   136  		fds, err := vm.Exec("sudo lsof -p `pidof cilium-node-monitor` -p `pidof cilium-agent` -p `pidof cilium-docker` 2>/dev/null | wc -l").IntOutput()
   137  		Expect(err).Should(BeNil())
   138  
   139  		Expect(fds).To(BeNumerically("<", threshold),
   140  			fmt.Sprintf("%d file descriptors open from Cilium processes", fds))
   141  	}, 300)
   142  
   143  	It("Checking that during restart no traffic is dropped using Egress + Ingress Traffic", func() {
   144  		By("Installing sample containers")
   145  		vm.SampleContainersActions(helpers.Create, helpers.CiliumDockerNetwork)
   146  		vm.PolicyDelAll().ExpectSuccess("Cannot deleted all policies")
   147  
   148  		_, err := vm.PolicyImportAndWait(vm.GetFullPath(policiesL4Json), helpers.HelperTimeout)
   149  		Expect(err).Should(BeNil(), "Cannot install L4 policy")
   150  
   151  		areEndpointsReady := vm.WaitEndpointsReady()
   152  		Expect(areEndpointsReady).Should(BeTrue(), "Endpoints are not ready after timeout")
   153  
   154  		By("Starting background connection from app2 to httpd1 container")
   155  		ctx, cancel := context.WithCancel(context.Background())
   156  		defer cancel()
   157  		srvIP, err := vm.ContainerInspectNet(helpers.Httpd1)
   158  		Expect(err).Should(BeNil(), "Cannot get httpd1 server address")
   159  		type BackgroundTestAsserts struct {
   160  			res  *helpers.CmdRes
   161  			time time.Time
   162  		}
   163  		backgroundChecks := []*BackgroundTestAsserts{}
   164  		var wg sync.WaitGroup
   165  		wg.Add(1)
   166  		go func() {
   167  			for {
   168  				select {
   169  				default:
   170  					res := vm.ContainerExec(
   171  						helpers.App1,
   172  						helpers.CurlFail("http://%s/", srvIP[helpers.IPv4]))
   173  					assert := &BackgroundTestAsserts{
   174  						res:  res,
   175  						time: time.Now(),
   176  					}
   177  					backgroundChecks = append(backgroundChecks, assert)
   178  				case <-ctx.Done():
   179  					wg.Done()
   180  					return
   181  				}
   182  			}
   183  		}()
   184  		// Sleep a bit to make sure that the goroutine starts.
   185  		time.Sleep(50 * time.Millisecond)
   186  
   187  		err = vm.RestartCilium()
   188  		Expect(err).Should(BeNil(), "restarting Cilium failed")
   189  
   190  		By("Stopping background connections")
   191  		cancel()
   192  		wg.Wait()
   193  
   194  		GinkgoPrint("Made %d connections in total", len(backgroundChecks))
   195  		Expect(backgroundChecks).ShouldNot(BeEmpty(), "No background connections were made")
   196  		for _, check := range backgroundChecks {
   197  			check.res.ExpectSuccess("Curl from app2 to httpd1 should work but it failed at %s", check.time)
   198  		}
   199  	})
   200  
   201  	It("Validate that delete events on KVStore do not release in use identities", func() {
   202  		// This validates that if a kvstore delete event is send the identity
   203  		// is not release if it is in use. For more info issue #7240
   204  
   205  		prefix := "http://127.0.0.1:8500/v1/kv/cilium/state/identities/v1/id"
   206  		identities, err := vm.GetEndpointsIdentityIds()
   207  		Expect(err).To(BeNil(), "Cannot get identities")
   208  
   209  		for _, identityID := range identities {
   210  			action := helpers.CurlFail("%s/%s -X DELETE", prefix, identityID)
   211  			vm.Exec(action).ExpectSuccess("Key %s cannot be deleted correctly", identityID)
   212  		}
   213  
   214  		newidentities, err := vm.GetEndpointsIdentityIds()
   215  		Expect(err).To(BeNil(), "Cannot get identities after delete keys")
   216  
   217  		Expect(newidentities).To(Equal(identities),
   218  			"Identities are not the same after delete keys from kvstore")
   219  
   220  		for _, identityID := range newidentities {
   221  			id, err := identity.ParseNumericIdentity(identityID)
   222  			Expect(err).To(BeNil(), "Cannot parse identity")
   223  			if id.IsReservedIdentity() {
   224  				continue
   225  			}
   226  			action := helpers.CurlFail("%s/%s", prefix, identityID)
   227  			vm.Exec(action).ExpectSuccess("Key %s cannot is not restored correctly", identityID)
   228  		}
   229  	})
   230  
   231  	It("Delete event on KVStore with CIDR identities", func() {
   232  		// Validate that if when a delete event happens on kvstore the CIDR
   233  		// identity (local one) is not deleted.  This happens on the past where
   234  		// other cilium agent executes a deletion of a key that was used by
   235  		// another cilium agent, that means that on policy regeneration the
   236  		// identity was not present.
   237  		jqFilter := `jq -r '.[] | select(.labels|join("") | contains("cidr")) | .id'`
   238  		prefix := "http://127.0.0.1:8500/v1/kv/cilium/state/identities/v1/id"
   239  
   240  		By("Installing CIDR policy")
   241  		policy := `
   242  		[{
   243  			"endpointSelector": {"matchLabels":{"test":""}},
   244  			"egress":
   245  			[{
   246  				"toCIDR": [
   247  					"10.10.10.10/32"
   248  				]
   249  			}]
   250  		}]
   251  		`
   252  		_, err := vm.PolicyRenderAndImport(policy)
   253  		Expect(err).To(BeNil(), "Unable to import policy: %s", err)
   254  
   255  		CIDRIdentities := vm.Exec(fmt.Sprintf(`cilium identity list -o json| %s`, jqFilter))
   256  		CIDRIdentities.ExpectSuccess("Cannot get cidr identities")
   257  
   258  		for _, identityID := range CIDRIdentities.ByLines() {
   259  			action := helpers.CurlFail("%s/%s -X DELETE", prefix, identityID)
   260  			vm.Exec(action).ExpectSuccess("Key %s cannot be deleted correctly", identityID)
   261  		}
   262  
   263  		newCIDRIdentities := vm.Exec(fmt.Sprintf(`cilium identity list -o json| %s`, jqFilter))
   264  		newCIDRIdentities.ExpectSuccess("Cannot get cidr identities")
   265  
   266  		Expect(CIDRIdentities.ByLines()).To(Equal(newCIDRIdentities.ByLines()),
   267  			"Identities are deleted in kvstore delete event")
   268  	})
   269  })