github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/test/helpers/kubectl.go (about)

     1  // Copyright 2018-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package helpers
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"os"
    24  	"path/filepath"
    25  	"regexp"
    26  	"strconv"
    27  	"strings"
    28  	"sync"
    29  	"text/tabwriter"
    30  	"time"
    31  
    32  	"github.com/cilium/cilium/api/v1/models"
    33  	"github.com/cilium/cilium/pkg/annotation"
    34  	cnpv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    35  	"github.com/cilium/cilium/test/config"
    36  	"github.com/cilium/cilium/test/ginkgo-ext"
    37  	"github.com/cilium/cilium/test/helpers/logutils"
    38  
    39  	"github.com/asaskevich/govalidator"
    40  	"github.com/sirupsen/logrus"
    41  	"k8s.io/api/core/v1"
    42  )
    43  
    44  const (
    45  	// KubectlCmd Kubernetes controller command
    46  	KubectlCmd      = "kubectl"
    47  	manifestsPath   = "k8sT/manifests/"
    48  	descriptorsPath = "../examples/kubernetes"
    49  	kubeDNSLabel    = "k8s-app=kube-dns"
    50  
    51  	// DNSHelperTimeout is a predefined timeout value for K8s DNS commands. It
    52  	// must be larger than 5 minutes because kubedns has a hardcoded resync
    53  	// period of 5 minutes. We have experienced test failures because kubedns
    54  	// needed this time to recover from a connection problem to kube-apiserver.
    55  	// The kubedns resyncPeriod is defined at
    56  	// https://github.com/kubernetes/dns/blob/80fdd88276adba36a87c4f424b66fdf37cd7c9a8/pkg/dns/dns.go#L53
    57  	DNSHelperTimeout = 7 * time.Minute
    58  
    59  	// EnableMicroscope is true when microscope should be enabled
    60  	EnableMicroscope = false
    61  
    62  	// CIIntegrationFlannel contains the constant to be used when flannel is
    63  	// used in the CI.
    64  	CIIntegrationFlannel = "flannel"
    65  )
    66  
    67  var (
    68  	defaultHelmOptions = map[string]string{
    69  		"global.registry":               "k8s1:5000/cilium",
    70  		"agent.image":                   "cilium-dev",
    71  		"global.tag":                    "latest",
    72  		"operator.image":                "operator",
    73  		"operator.tag":                  "latest",
    74  		"managed-etcd.registry":         "docker.io/cilium",
    75  		"global.debug.enabled":          "true",
    76  		"global.k8s.requireIPv4PodCIDR": "true",
    77  		"global.pprof.enabled":          "true",
    78  		"global.logSystemLoad":          "true",
    79  		"global.bpf.preallocateMaps":    "true",
    80  		"global.etcd.leaseTTL":          "30s",
    81  		"global.ipv4.enabled":           "true",
    82  		"global.ipv6.enabled":           "true",
    83  	}
    84  
    85  	flannelHelmOverrides = map[string]string{
    86  		"global.flannel.enabled": "true",
    87  		"global.ipv6.enabled":    "false",
    88  		"global.tunnel":          "disabled",
    89  	}
    90  )
    91  
    92  // GetCurrentK8SEnv returns the value of K8S_VERSION from the OS environment.
    93  func GetCurrentK8SEnv() string { return os.Getenv("K8S_VERSION") }
    94  
    95  // GetCurrentIntegration returns CI integration set up to run against Cilium.
    96  func GetCurrentIntegration() string {
    97  	switch strings.ToLower(os.Getenv("CNI_INTEGRATION")) {
    98  	case CIIntegrationFlannel:
    99  		return CIIntegrationFlannel
   100  	default:
   101  		return ""
   102  	}
   103  }
   104  
   105  // Kubectl is a wrapper around an SSHMeta. It is used to run Kubernetes-specific
   106  // commands on the node which is accessible via the SSH metadata stored in its
   107  // SSHMeta.
   108  type Kubectl struct {
   109  	*SSHMeta
   110  	*serviceCache
   111  }
   112  
   113  // CreateKubectl initializes a Kubectl helper with the provided vmName and log
   114  // It marks the test as Fail if cannot get the ssh meta information or cannot
   115  // execute a `ls` on the virtual machine.
   116  func CreateKubectl(vmName string, log *logrus.Entry) *Kubectl {
   117  	node := GetVagrantSSHMeta(vmName)
   118  	if node == nil {
   119  		ginkgoext.Fail(fmt.Sprintf("Cannot connect to vmName  '%s'", vmName), 1)
   120  		return nil
   121  	}
   122  	// This `ls` command is a sanity check, sometimes the meta ssh info is not
   123  	// nil but new commands cannot be executed using SSH, tests failed and it
   124  	// was hard to debug.
   125  	res := node.ExecShort("ls /tmp/")
   126  	if !res.WasSuccessful() {
   127  		ginkgoext.Fail(fmt.Sprintf(
   128  			"Cannot execute ls command on vmName '%s'", vmName), 1)
   129  		return nil
   130  	}
   131  	node.logger = log
   132  
   133  	return &Kubectl{
   134  		SSHMeta: node,
   135  	}
   136  }
   137  
   138  // CepGet returns the endpoint model for the given pod name in the specified
   139  // namespaces. If the pod is not present it returns nil
   140  func (kub *Kubectl) CepGet(namespace string, pod string) *cnpv2.EndpointStatus {
   141  	log := kub.logger.WithFields(logrus.Fields{
   142  		"cep":       pod,
   143  		"namespace": namespace})
   144  
   145  	cmd := fmt.Sprintf("%s -n %s get cep %s -o json | jq '.status'", KubectlCmd, namespace, pod)
   146  	res := kub.ExecShort(cmd)
   147  	if !res.WasSuccessful() {
   148  		log.Debug("cep is not present")
   149  		return nil
   150  	}
   151  
   152  	var data *cnpv2.EndpointStatus
   153  	err := res.Unmarshal(&data)
   154  	if err != nil {
   155  		log.WithError(err).Error("cannot Unmarshal json")
   156  		return nil
   157  	}
   158  	return data
   159  }
   160  
   161  // GetNumNodes returns the number of Kubernetes nodes running
   162  func (kub *Kubectl) GetNumNodes() int {
   163  	getNodesCmd := fmt.Sprintf("%s get nodes -o jsonpath='{.items.*.metadata.name}'", KubectlCmd)
   164  	res := kub.ExecShort(getNodesCmd)
   165  	if !res.WasSuccessful() {
   166  		return 0
   167  	}
   168  
   169  	return len(strings.Split(res.SingleOut(), " "))
   170  }
   171  
   172  // ExecKafkaPodCmd executes shell command with arguments arg in the specified pod residing in the specified
   173  // namespace. It returns the stdout of the command that was executed.
   174  // The kafka producer and consumer scripts do not return error if command
   175  // leads to TopicAuthorizationException or any other error. Hence the
   176  // function needs to also take into account the stderr messages returned.
   177  func (kub *Kubectl) ExecKafkaPodCmd(namespace string, pod string, arg string) error {
   178  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, arg)
   179  	res := kub.Exec(command)
   180  	if !res.WasSuccessful() {
   181  		return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed %s",
   182  			res.GetCmd(), res.OutputPrettyPrint())
   183  	}
   184  
   185  	if strings.Contains(res.GetStdErr(), "ERROR") {
   186  		return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed '%s'",
   187  			res.GetCmd(), res.OutputPrettyPrint())
   188  	}
   189  	return nil
   190  }
   191  
   192  // ExecPodCmd executes command cmd in the specified pod residing in the specified
   193  // namespace. It returns a pointer to CmdRes with all the output
   194  func (kub *Kubectl) ExecPodCmd(namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes {
   195  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
   196  	return kub.Exec(command, options...)
   197  }
   198  
   199  // ExecPodCmdContext synchronously executes command cmd in the specified pod residing in the
   200  // specified namespace. It returns a pointer to CmdRes with all the output.
   201  func (kub *Kubectl) ExecPodCmdContext(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes {
   202  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
   203  	return kub.ExecContext(ctx, command, options...)
   204  }
   205  
   206  // ExecPodCmdBackground executes command cmd in background in the specified pod residing
   207  // in the specified namespace. It returns a pointer to CmdRes with all the
   208  // output
   209  //
   210  // To receive the output of this function, the caller must invoke either
   211  // kub.WaitUntilFinish() or kub.WaitUntilMatch() then subsequently fetch the
   212  // output out of the result.
   213  func (kub *Kubectl) ExecPodCmdBackground(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes {
   214  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
   215  	return kub.ExecInBackground(ctx, command, options...)
   216  }
   217  
   218  // Get retrieves the provided Kubernetes objects from the specified namespace.
   219  func (kub *Kubectl) Get(namespace string, command string) *CmdRes {
   220  	return kub.ExecShort(fmt.Sprintf(
   221  		"%s -n %s get %s -o json", KubectlCmd, namespace, command))
   222  }
   223  
   224  // GetFromAllNS retrieves provided Kubernetes objects from all namespaces
   225  func (kub *Kubectl) GetFromAllNS(kind string) *CmdRes {
   226  	return kub.ExecShort(fmt.Sprintf(
   227  		"%s get %s --all-namespaces -o json", KubectlCmd, kind))
   228  }
   229  
   230  // GetCNP retrieves the output of `kubectl get cnp` in the given namespace for
   231  // the given CNP and return a CNP struct. If the CNP does not exists or cannot
   232  // unmarshal the Json output will return nil.
   233  func (kub *Kubectl) GetCNP(namespace string, cnp string) *cnpv2.CiliumNetworkPolicy {
   234  	log := kub.logger.WithFields(logrus.Fields{
   235  		"fn":  "GetCNP",
   236  		"cnp": cnp,
   237  		"ns":  namespace,
   238  	})
   239  	res := kub.Get(namespace, fmt.Sprintf("cnp %s", cnp))
   240  	if !res.WasSuccessful() {
   241  		log.WithField("error", res.CombineOutput()).Info("cannot get CNP")
   242  		return nil
   243  	}
   244  	var result cnpv2.CiliumNetworkPolicy
   245  	err := res.Unmarshal(&result)
   246  	if err != nil {
   247  		log.WithError(err).Errorf("cannot unmarshal CNP output")
   248  		return nil
   249  	}
   250  	return &result
   251  }
   252  
   253  func (kub *Kubectl) WaitForCRDCount(filter string, count int, timeout time.Duration) error {
   254  	// Set regexp flag m for multi-line matching, then add the
   255  	// matches for beginning and end of a line, so that we count
   256  	// at most one match per line (like "grep <filter> | wc -l")
   257  	regex := regexp.MustCompile("(?m:^.*(?:" + filter + ").*$)")
   258  	body := func() bool {
   259  		res := kub.ExecShort(fmt.Sprintf("%s get crds", KubectlCmd))
   260  		if !res.WasSuccessful() {
   261  			log.Error(res.GetErr("kubectl get crds failed"))
   262  			return false
   263  		}
   264  		return len(regex.FindAllString(res.GetStdOut(), -1)) == count
   265  	}
   266  	return WithTimeout(
   267  		body,
   268  		fmt.Sprintf("timed out waiting for %d CRDs matching filter \"%s\" to be ready", count, filter),
   269  		&TimeoutConfig{Timeout: timeout})
   270  }
   271  
   272  // GetPods gets all of the pods in the given namespace that match the provided
   273  // filter.
   274  func (kub *Kubectl) GetPods(namespace string, filter string) *CmdRes {
   275  	return kub.ExecShort(fmt.Sprintf("%s -n %s get pods %s -o json", KubectlCmd, namespace, filter))
   276  }
   277  
   278  // GetPodsNodes returns a map with pod name as a key and node name as value. It
   279  // only gets pods in the given namespace that match the provided filter. It
   280  // returns an error if pods cannot be retrieved correctly
   281  func (kub *Kubectl) GetPodsNodes(namespace string, filter string) (map[string]string, error) {
   282  	jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.spec.nodeName}{"\n"}{end}`
   283  	res := kub.Exec(fmt.Sprintf("%s -n %s get pods %s -o jsonpath='%s'",
   284  		KubectlCmd, namespace, filter, jsonFilter))
   285  	if !res.WasSuccessful() {
   286  		return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput())
   287  	}
   288  	return res.KVOutput(), nil
   289  }
   290  
   291  // GetPodsIPs returns a map with pod name as a key and pod IP name as value. It
   292  // only gets pods in the given namespace that match the provided filter. It
   293  // returns an error if pods cannot be retrieved correctly
   294  func (kub *Kubectl) GetPodsIPs(namespace string, filter string) (map[string]string, error) {
   295  	jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.status.podIP}{"\n"}{end}`
   296  	res := kub.ExecShort(fmt.Sprintf("%s -n %s get pods -l %s -o jsonpath='%s'",
   297  		KubectlCmd, namespace, filter, jsonFilter))
   298  	if !res.WasSuccessful() {
   299  		return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput())
   300  	}
   301  	return res.KVOutput(), nil
   302  }
   303  
   304  // GetEndpoints gets all of the endpoints in the given namespace that match the
   305  // provided filter.
   306  func (kub *Kubectl) GetEndpoints(namespace string, filter string) *CmdRes {
   307  	return kub.ExecShort(fmt.Sprintf("%s -n %s get endpoints %s -o json", KubectlCmd, namespace, filter))
   308  }
   309  
   310  // GetAllPods returns a slice of all pods present in Kubernetes cluster, along
   311  // with an error if the pods could not be retrieved via `kubectl`, or if the
   312  // pod objects are unable to be marshaled from JSON.
   313  func (kub *Kubectl) GetAllPods(ctx context.Context, options ...ExecOptions) ([]v1.Pod, error) {
   314  	var ops ExecOptions
   315  	if len(options) > 0 {
   316  		ops = options[0]
   317  	}
   318  
   319  	getPodsCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout)
   320  	defer cancel()
   321  
   322  	var podsList v1.List
   323  	err := kub.ExecContext(getPodsCtx,
   324  		fmt.Sprintf("%s get pods --all-namespaces -o json", KubectlCmd),
   325  		ExecOptions{SkipLog: ops.SkipLog}).Unmarshal(&podsList)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  
   330  	pods := make([]v1.Pod, len(podsList.Items))
   331  	for _, item := range podsList.Items {
   332  		var pod v1.Pod
   333  		err = json.Unmarshal(item.Raw, &pod)
   334  		if err != nil {
   335  			return nil, err
   336  		}
   337  		pods = append(pods, pod)
   338  	}
   339  
   340  	return pods, nil
   341  }
   342  
   343  // GetPodNames returns the names of all of the pods that are labeled with label
   344  // in the specified namespace, along with an error if the pod names cannot be
   345  // retrieved.
   346  func (kub *Kubectl) GetPodNames(namespace string, label string) ([]string, error) {
   347  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
   348  	defer cancel()
   349  	return kub.GetPodNamesContext(ctx, namespace, label)
   350  }
   351  
   352  // GetPodNamesContext returns the names of all of the pods that are labeled with
   353  // label in the specified namespace, along with an error if the pod names cannot
   354  // be retrieved.
   355  func (kub *Kubectl) GetPodNamesContext(ctx context.Context, namespace string, label string) ([]string, error) {
   356  	stdout := new(bytes.Buffer)
   357  	filter := "-o jsonpath='{.items[*].metadata.name}'"
   358  
   359  	cmd := fmt.Sprintf("%s -n %s get pods -l %s %s", KubectlCmd, namespace, label, filter)
   360  
   361  	// Taking more than 30 seconds to get pods means that something is wrong
   362  	// connecting to the node.
   363  	podNamesCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout)
   364  	defer cancel()
   365  	err := kub.ExecuteContext(podNamesCtx, cmd, stdout, nil)
   366  
   367  	if err != nil {
   368  		return nil, fmt.Errorf(
   369  			"could not find pods in namespace '%v' with label '%v': %s", namespace, label, err)
   370  	}
   371  
   372  	out := strings.Trim(stdout.String(), "\n")
   373  	if len(out) == 0 {
   374  		//Small hack. String split always return an array with an empty string
   375  		return []string{}, nil
   376  	}
   377  	return strings.Split(out, " "), nil
   378  }
   379  
   380  // GetServiceHostPort returns the host and the first port for the given service name.
   381  // It will return an error if service cannot be retrieved.
   382  func (kub *Kubectl) GetServiceHostPort(namespace string, service string) (string, int, error) {
   383  	var data v1.Service
   384  	err := kub.Get(namespace, fmt.Sprintf("service %s", service)).Unmarshal(&data)
   385  	if err != nil {
   386  		return "", 0, err
   387  	}
   388  	if len(data.Spec.Ports) == 0 {
   389  		return "", 0, fmt.Errorf("Service '%s' does not have ports defined", service)
   390  	}
   391  	return data.Spec.ClusterIP, int(data.Spec.Ports[0].Port), nil
   392  }
   393  
   394  // Logs returns a CmdRes with containing the resulting metadata from the
   395  // execution of `kubectl logs <pod> -n <namespace>`.
   396  func (kub *Kubectl) Logs(namespace string, pod string) *CmdRes {
   397  	return kub.Exec(
   398  		fmt.Sprintf("%s -n %s logs %s", KubectlCmd, namespace, pod))
   399  }
   400  
   401  // MicroscopeStart installs (if it is not installed) a new microscope pod,
   402  // waits until pod is ready, and runs microscope in background. It returns an
   403  // error in the case where microscope cannot be installed, or it is not ready after
   404  // a timeout. It also returns a callback function to stop the monitor and save
   405  // the output to `helpers.monitorLogFileName` file. Takes an optional list of
   406  // arguments to pass to mircoscope.
   407  func (kub *Kubectl) MicroscopeStart(microscopeOptions ...string) (error, func() error) {
   408  	if !EnableMicroscope {
   409  		return nil, func() error { return nil }
   410  	}
   411  
   412  	microscope := "microscope"
   413  	var microscopeCmd string
   414  	if len(microscopeOptions) == 0 {
   415  		microscopeCmd = "microscope"
   416  	} else {
   417  		microscopeCmd = fmt.Sprintf("%s %s", microscope, strings.Join(microscopeOptions, " "))
   418  	}
   419  	var microscopeCmdWithTimestamps = microscopeCmd + "| ts '[%Y-%m-%d %H:%M:%S]'"
   420  	var cb = func() error { return nil }
   421  	cmd := fmt.Sprintf("%[1]s -ti -n %[2]s exec %[3]s -- %[4]s",
   422  		KubectlCmd, KubeSystemNamespace, microscope, microscopeCmdWithTimestamps)
   423  	microscopePath := ManifestGet(microscopeManifest)
   424  	_ = kub.ApplyDefault(microscopePath)
   425  
   426  	err := kub.WaitforPods(
   427  		KubeSystemNamespace,
   428  		fmt.Sprintf("-l k8s-app=%s", microscope),
   429  		HelperTimeout)
   430  	if err != nil {
   431  		return err, cb
   432  	}
   433  
   434  	ctx, cancel := context.WithCancel(context.Background())
   435  	res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true})
   436  
   437  	cb = func() error {
   438  		cancel()
   439  		<-ctx.Done()
   440  		testPath, err := CreateReportDirectory()
   441  		if err != nil {
   442  			kub.logger.WithError(err).Errorf(
   443  				"cannot create test results path '%s'", testPath)
   444  			return err
   445  		}
   446  
   447  		err = WriteOrAppendToFile(
   448  			filepath.Join(testPath, MonitorLogFileName),
   449  			res.CombineOutput().Bytes(),
   450  			LogPerm)
   451  		if err != nil {
   452  			log.WithError(err).Errorf("cannot create monitor log file")
   453  			return err
   454  		}
   455  		res := kub.Exec(fmt.Sprintf("%s -n %s delete pod --grace-period=0 --force microscope", KubectlCmd, KubeSystemNamespace))
   456  		if !res.WasSuccessful() {
   457  			return fmt.Errorf("error deleting microscope pod: %s", res.OutputPrettyPrint())
   458  		}
   459  		return nil
   460  	}
   461  
   462  	return nil, cb
   463  }
   464  
   465  // MonitorStart runs cilium monitor in the background and dumps the contents
   466  // into a log file for later debugging
   467  func (kub *Kubectl) MonitorStart(namespace, pod, filename string) func() error {
   468  	cmd := fmt.Sprintf("%s exec -n %s %s -- cilium monitor -v", KubectlCmd, namespace, pod)
   469  	ctx, cancel := context.WithCancel(context.Background())
   470  	res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true})
   471  
   472  	cb := func() error {
   473  		cancel()
   474  		<-ctx.Done()
   475  		testPath, err := CreateReportDirectory()
   476  		if err != nil {
   477  			kub.logger.WithError(err).Errorf(
   478  				"cannot create test results path '%s'", testPath)
   479  			return err
   480  		}
   481  
   482  		err = WriteOrAppendToFile(
   483  			filepath.Join(testPath, filename),
   484  			res.CombineOutput().Bytes(),
   485  			LogPerm)
   486  		if err != nil {
   487  			log.WithError(err).Errorf("cannot create monitor log file %s", filename)
   488  			return err
   489  		}
   490  		return nil
   491  	}
   492  
   493  	return cb
   494  }
   495  
   496  // BackgroundReport dumps the result of the given commands on cilium pods each
   497  // five seconds.
   498  func (kub *Kubectl) BackgroundReport(commands ...string) (context.CancelFunc, error) {
   499  	backgroundCtx, cancel := context.WithCancel(context.Background())
   500  	pods, err := kub.GetCiliumPods(KubeSystemNamespace)
   501  	if err != nil {
   502  		return cancel, fmt.Errorf("Cannot retrieve cilium pods: %s", err)
   503  	}
   504  	retrieveInfo := func() {
   505  		for _, pod := range pods {
   506  			for _, cmd := range commands {
   507  				kub.CiliumExec(pod, cmd)
   508  			}
   509  		}
   510  	}
   511  	go func(ctx context.Context) {
   512  		ticker := time.NewTicker(5 * time.Second)
   513  		defer ticker.Stop()
   514  		for {
   515  			select {
   516  			case <-ctx.Done():
   517  				return
   518  			case <-ticker.C:
   519  				retrieveInfo()
   520  			}
   521  		}
   522  	}(backgroundCtx)
   523  	return cancel, nil
   524  }
   525  
   526  // PprofReport runs pprof on cilium nodes each 5 minutes and saves the data
   527  // into the test folder saved with pprof suffix.
   528  func (kub *Kubectl) PprofReport() {
   529  	PProfCadence := 5 * time.Minute
   530  	ticker := time.NewTicker(PProfCadence)
   531  	log := kub.logger.WithField("subsys", "pprofReport")
   532  
   533  	retrievePProf := func(pod, testPath string) {
   534  		res := kub.ExecPodCmd(KubeSystemNamespace, pod, "gops pprof-cpu 1")
   535  		if !res.WasSuccessful() {
   536  			log.Errorf("cannot execute pprof: %s", res.OutputPrettyPrint())
   537  			return
   538  		}
   539  		files := kub.ExecPodCmd(KubeSystemNamespace, pod, `ls -1 /tmp/`)
   540  		for _, file := range files.ByLines() {
   541  			if !strings.Contains(file, "profile") {
   542  				continue
   543  			}
   544  
   545  			dest := filepath.Join(
   546  				BasePath, testPath,
   547  				fmt.Sprintf("%s-profile-%s.pprof", pod, file))
   548  			_ = kub.Exec(fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s %[5]s",
   549  				KubectlCmd, KubeSystemNamespace, pod, file, dest),
   550  				ExecOptions{SkipLog: true})
   551  
   552  			_ = kub.ExecPodCmd(KubeSystemNamespace, pod, fmt.Sprintf(
   553  				"rm %s", filepath.Join("/tmp/", file)))
   554  		}
   555  	}
   556  
   557  	for {
   558  		select {
   559  		case <-ticker.C:
   560  
   561  			testPath, err := CreateReportDirectory()
   562  			if err != nil {
   563  				log.WithError(err).Errorf("cannot create test result path '%s'", testPath)
   564  				return
   565  			}
   566  
   567  			pods, err := kub.GetCiliumPods(KubeSystemNamespace)
   568  			if err != nil {
   569  				log.Errorf("cannot get cilium pods")
   570  			}
   571  
   572  			for _, pod := range pods {
   573  				retrievePProf(pod, testPath)
   574  			}
   575  
   576  		}
   577  	}
   578  }
   579  
   580  // NodeCleanMetadata annotates each node in the Kubernetes cluster with the
   581  // annotation.V4CIDRName and annotation.V6CIDRName annotations. It returns an
   582  // error if the nodes cannot be retrieved via the Kubernetes API.
   583  func (kub *Kubectl) NodeCleanMetadata() error {
   584  	metadata := []string{
   585  		annotation.V4CIDRName,
   586  		annotation.V6CIDRName,
   587  	}
   588  
   589  	data := kub.ExecShort(fmt.Sprintf("%s get nodes -o jsonpath='{.items[*].metadata.name}'", KubectlCmd))
   590  	if !data.WasSuccessful() {
   591  		return fmt.Errorf("could not get nodes via %s: %s", KubectlCmd, data.CombineOutput())
   592  	}
   593  	for _, node := range strings.Split(data.Output().String(), " ") {
   594  		for _, label := range metadata {
   595  			kub.ExecShort(fmt.Sprintf("%s annotate --overwrite nodes %s %s=''", KubectlCmd, node, label))
   596  		}
   597  	}
   598  	return nil
   599  }
   600  
   601  // NamespaceCreate creates a new Kubernetes namespace with the given name
   602  func (kub *Kubectl) NamespaceCreate(name string) *CmdRes {
   603  	ginkgoext.By("Creating namespace %s", name)
   604  	return kub.ExecShort(fmt.Sprintf("%s create namespace %s", KubectlCmd, name))
   605  }
   606  
   607  // NamespaceDelete deletes a given Kubernetes namespace
   608  func (kub *Kubectl) NamespaceDelete(name string) *CmdRes {
   609  	return kub.ExecShort(fmt.Sprintf("%s delete namespace %s", KubectlCmd, name))
   610  }
   611  
   612  // WaitforPods waits up until timeout seconds have elapsed for all pods in the
   613  // specified namespace that match the provided JSONPath filter to have their
   614  // containterStatuses equal to "ready". Returns true if all pods achieve
   615  // the aforementioned desired state within timeout seconds. Returns false and
   616  // an error if the command failed or the timeout was exceeded.
   617  func (kub *Kubectl) WaitforPods(namespace string, filter string, timeout time.Duration) error {
   618  	return kub.waitForNPods(checkReady, namespace, filter, 0, timeout)
   619  }
   620  
   621  // checkPodStatusFunc returns true if the pod is in the desired state, or false
   622  // otherwise.
   623  type checkPodStatusFunc func(v1.Pod) bool
   624  
   625  // checkRunning checks that the pods are running, but not necessarily ready.
   626  func checkRunning(pod v1.Pod) bool {
   627  	if pod.Status.Phase != v1.PodRunning || pod.ObjectMeta.DeletionTimestamp != nil {
   628  		return false
   629  	}
   630  	return true
   631  }
   632  
   633  // checkReady determines whether the pods are running and ready.
   634  func checkReady(pod v1.Pod) bool {
   635  	if !checkRunning(pod) {
   636  		return false
   637  	}
   638  
   639  	for _, container := range pod.Status.ContainerStatuses {
   640  		if !container.Ready {
   641  			return false
   642  		}
   643  	}
   644  	return true
   645  }
   646  
   647  // WaitforNPodsRunning waits up until timeout duration has elapsed for at least
   648  // minRequired pods in the specified namespace that match the provided JSONPath
   649  // filter to have their containterStatuses equal to "running".
   650  // Returns no error if minRequired pods achieve the aforementioned desired
   651  // state within timeout seconds. Returns an error if the command failed or the
   652  // timeout was exceeded.
   653  // When minRequired is 0, the function will derive required pod count from number
   654  // of pods in the cluster for every iteration.
   655  func (kub *Kubectl) WaitforNPodsRunning(namespace string, filter string, minRequired int, timeout time.Duration) error {
   656  	return kub.waitForNPods(checkRunning, namespace, filter, minRequired, timeout)
   657  }
   658  
   659  // WaitforNPods waits up until timeout seconds have elapsed for at least
   660  // minRequired pods in the specified namespace that match the provided JSONPath
   661  // filter to have their containterStatuses equal to "ready".
   662  // Returns no error if minRequired pods achieve the aforementioned desired
   663  // state within timeout seconds. Returns an error if the command failed or the
   664  // timeout was exceeded.
   665  // When minRequired is 0, the function will derive required pod count from number
   666  // of pods in the cluster for every iteration.
   667  func (kub *Kubectl) WaitforNPods(namespace string, filter string, minRequired int, timeout time.Duration) error {
   668  	return kub.waitForNPods(checkReady, namespace, filter, minRequired, timeout)
   669  }
   670  
   671  func (kub *Kubectl) waitForNPods(checkStatus checkPodStatusFunc, namespace string, filter string, minRequired int, timeout time.Duration) error {
   672  	body := func() bool {
   673  		podList := &v1.PodList{}
   674  		err := kub.GetPods(namespace, filter).Unmarshal(podList)
   675  		if err != nil {
   676  			kub.logger.Infof("Error while getting PodList: %s", err)
   677  			return false
   678  		}
   679  
   680  		if len(podList.Items) == 0 {
   681  			return false
   682  		}
   683  
   684  		var required int
   685  
   686  		if minRequired == 0 {
   687  			required = len(podList.Items)
   688  		} else {
   689  			required = minRequired
   690  		}
   691  
   692  		if len(podList.Items) < required {
   693  			return false
   694  		}
   695  
   696  		// For each pod, count it as running when all conditions are true:
   697  		//  - It is scheduled via Phase == v1.PodRunning
   698  		//  - It is not scheduled for deletion when DeletionTimestamp is set
   699  		//  - All containers in the pod have passed the liveness check via
   700  		//  containerStatuses.Ready
   701  		currScheduled := 0
   702  		for _, pod := range podList.Items {
   703  			if checkStatus(pod) {
   704  				currScheduled++
   705  			}
   706  		}
   707  
   708  		return currScheduled >= required
   709  	}
   710  
   711  	return WithTimeout(
   712  		body,
   713  		fmt.Sprintf("timed out waiting for pods with filter %s to be ready", filter),
   714  		&TimeoutConfig{Timeout: timeout})
   715  }
   716  
   717  // WaitForServiceEndpoints waits up until timeout seconds have elapsed for all
   718  // endpoints in the specified namespace that match the provided JSONPath
   719  // filter. Returns true if all pods achieve the aforementioned desired state
   720  // within timeout seconds. Returns false and an error if the command failed or
   721  // the timeout was exceeded.
   722  func (kub *Kubectl) WaitForServiceEndpoints(namespace string, filter string, service string, timeout time.Duration) error {
   723  	body := func() bool {
   724  		var jsonPath = fmt.Sprintf("{.items[?(@.metadata.name == '%s')].subsets[0].ports[0].port}", service)
   725  		data, err := kub.GetEndpoints(namespace, filter).Filter(jsonPath)
   726  
   727  		if err != nil {
   728  			kub.logger.WithError(err)
   729  			return false
   730  		}
   731  
   732  		if data.String() != "" {
   733  			return true
   734  		}
   735  
   736  		kub.logger.WithFields(logrus.Fields{
   737  			"namespace": namespace,
   738  			"filter":    filter,
   739  			"data":      data,
   740  			"service":   service,
   741  		}).Info("WaitForServiceEndpoints: service endpoint not ready")
   742  		return false
   743  	}
   744  
   745  	return WithTimeout(body, "could not get service endpoints", &TimeoutConfig{Timeout: timeout})
   746  }
   747  
   748  // Action performs the specified ResourceLifeCycleAction on the Kubernetes
   749  // manifest located at path filepath in the given namespace
   750  func (kub *Kubectl) Action(action ResourceLifeCycleAction, filePath string, namespace ...string) *CmdRes {
   751  	if len(namespace) == 0 {
   752  		kub.logger.Debugf("performing '%v' on '%v'", action, filePath)
   753  		return kub.ExecShort(fmt.Sprintf("%s %s -f %s", KubectlCmd, action, filePath))
   754  	}
   755  
   756  	kub.logger.Debugf("performing '%v' on '%v' in namespace '%v'", action, filePath, namespace[0])
   757  	return kub.ExecShort(fmt.Sprintf("%s %s -f %s -n %s", KubectlCmd, action, filePath, namespace[0]))
   758  }
   759  
   760  // ApplyOptions stores options for kubectl apply command
   761  type ApplyOptions struct {
   762  	FilePath  string
   763  	Namespace string
   764  	Force     bool
   765  	DryRun    bool
   766  	Output    string
   767  	Piped     string
   768  }
   769  
   770  // Apply applies the Kubernetes manifest located at path filepath.
   771  func (kub *Kubectl) Apply(options ApplyOptions) *CmdRes {
   772  	var force string
   773  	if options.Force {
   774  		force = "--force=true"
   775  	} else {
   776  		force = "--force=false"
   777  	}
   778  
   779  	cmd := fmt.Sprintf("%s apply %s -f %s", KubectlCmd, force, options.FilePath)
   780  
   781  	if options.DryRun {
   782  		cmd = cmd + " --dry-run"
   783  	}
   784  
   785  	if len(options.Output) > 0 {
   786  		cmd = cmd + " -o " + options.Output
   787  	}
   788  
   789  	if len(options.Namespace) == 0 {
   790  		kub.logger.Debugf("applying %s", options.FilePath)
   791  	} else {
   792  		kub.logger.Debugf("applying %s in namespace %s", options.FilePath, options.Namespace)
   793  		cmd = cmd + " -n " + options.Namespace
   794  	}
   795  
   796  	if len(options.Piped) > 0 {
   797  		cmd = options.Piped + " | " + cmd
   798  	}
   799  	return kub.ExecMiddle(cmd)
   800  }
   801  
   802  // ApplyDefault applies give filepath with other options set to default
   803  func (kub *Kubectl) ApplyDefault(filePath string) *CmdRes {
   804  	return kub.Apply(ApplyOptions{FilePath: filePath})
   805  }
   806  
   807  // Create creates the Kubernetes kanifest located at path filepath.
   808  func (kub *Kubectl) Create(filePath string) *CmdRes {
   809  	kub.logger.Debugf("creating %s", filePath)
   810  	return kub.ExecShort(
   811  		fmt.Sprintf("%s create -f  %s", KubectlCmd, filePath))
   812  }
   813  
   814  // CreateResource is a wrapper around `kubernetes create <resource>
   815  // <resourceName>.
   816  func (kub *Kubectl) CreateResource(resource, resourceName string) *CmdRes {
   817  	kub.logger.Debug(fmt.Sprintf("creating resource %s with name %s", resource, resourceName))
   818  	return kub.ExecShort(fmt.Sprintf("kubectl create %s %s", resource, resourceName))
   819  }
   820  
   821  // DeleteResource is a wrapper around `kubernetes delete <resource>
   822  // resourceName>.
   823  func (kub *Kubectl) DeleteResource(resource, resourceName string) *CmdRes {
   824  	kub.logger.Debug(fmt.Sprintf("deleting resource %s with name %s", resource, resourceName))
   825  	return kub.Exec(fmt.Sprintf("kubectl delete %s %s", resource, resourceName))
   826  }
   827  
   828  // Delete deletes the Kubernetes manifest at path filepath.
   829  func (kub *Kubectl) Delete(filePath string) *CmdRes {
   830  	kub.logger.Debugf("deleting %s", filePath)
   831  	return kub.ExecShort(
   832  		fmt.Sprintf("%s delete -f  %s", KubectlCmd, filePath))
   833  }
   834  
   835  // WaitKubeDNS waits until the kubeDNS pods are ready. In case of exceeding the
   836  // default timeout it returns an error.
   837  func (kub *Kubectl) WaitKubeDNS() error {
   838  	return kub.WaitforPods(KubeSystemNamespace, fmt.Sprintf("-l %s", kubeDNSLabel), DNSHelperTimeout)
   839  }
   840  
   841  // WaitForKubeDNSEntry waits until the given DNS entry exists in the kube-dns
   842  // service. If the container is not ready after timeout it returns an error. The
   843  // name's format query should be `${name}.${namespace}`. If `svc.cluster.local`
   844  // is not present, it appends to the given name and it checks the service's FQDN.
   845  func (kub *Kubectl) WaitForKubeDNSEntry(serviceName, serviceNamespace string) error {
   846  	svcSuffix := "svc.cluster.local"
   847  	logger := kub.logger.WithFields(logrus.Fields{"serviceName": serviceName, "serviceNamespace": serviceNamespace})
   848  
   849  	serviceNameWithNamespace := fmt.Sprintf("%s.%s", serviceName, serviceNamespace)
   850  	if !strings.HasSuffix(serviceNameWithNamespace, svcSuffix) {
   851  		serviceNameWithNamespace = fmt.Sprintf("%s.%s", serviceNameWithNamespace, svcSuffix)
   852  	}
   853  	// https://bugs.launchpad.net/ubuntu/+source/bind9/+bug/854705
   854  	digCMD := "dig +short %s @%s | grep -v -e '^;'"
   855  
   856  	// If it fails we want to know if it's because of connection cannot be
   857  	// established or DNS does not exist.
   858  	digCMDFallback := "dig +tcp %s @%s"
   859  
   860  	dnsClusterIP, _, err := kub.GetServiceHostPort(KubeSystemNamespace, "kube-dns")
   861  	if err != nil {
   862  		logger.WithError(err).Error("cannot get kube-dns service IP")
   863  		return err
   864  	}
   865  
   866  	body := func() bool {
   867  		serviceIP, _, err := kub.GetServiceHostPort(serviceNamespace, serviceName)
   868  		if err != nil {
   869  			log.WithError(err).Errorf("cannot get service IP for service %s", serviceNameWithNamespace)
   870  			return false
   871  		}
   872  
   873  		// ClusterIPNone denotes that this service is headless; there is no
   874  		// service IP for this service, and thus the IP returned by `dig` is
   875  		// an IP of the pod itself, not ClusterIPNone, which is what Kubernetes
   876  		// shows as the IP for the service for headless services.
   877  		if serviceIP == v1.ClusterIPNone {
   878  			res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP))
   879  			_ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP))
   880  			return res.WasSuccessful()
   881  		}
   882  		log.Debugf("service is not headless; checking whether IP retrieved from DNS matches the IP for the service stored in Kubernetes")
   883  		res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP))
   884  		serviceIPFromDNS := res.SingleOut()
   885  		if !govalidator.IsIP(serviceIPFromDNS) {
   886  			logger.Debugf("output of dig (%s) did not return an IP", serviceIPFromDNS)
   887  			return false
   888  		}
   889  
   890  		// Due to lag between new IPs for the same service being synced between
   891  		// kube-apiserver and DNS, check if the IP for the service that is
   892  		// stored in K8s matches the IP of the service cached in DNS. These
   893  		// can be different, because some tests use the same service names.
   894  		// Wait accordingly for services to match, and for resolving the service
   895  		// name to resolve via DNS.
   896  		if !strings.Contains(serviceIPFromDNS, serviceIP) {
   897  			logger.Debugf("service IP retrieved from DNS (%s) does not match the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP)
   898  			_ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP))
   899  			return false
   900  		}
   901  		logger.Debugf("service IP retrieved from DNS (%s) matches the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP)
   902  		return true
   903  	}
   904  
   905  	return WithTimeout(
   906  		body,
   907  		fmt.Sprintf("DNS '%s' is not ready after timeout", serviceNameWithNamespace),
   908  		&TimeoutConfig{Timeout: DNSHelperTimeout})
   909  }
   910  
   911  // WaitCleanAllTerminatingPods waits until all nodes that are in `Terminating`
   912  // state are deleted correctly in the platform. In case of excedding the
   913  // given timeout (in seconds) it returns an error
   914  func (kub *Kubectl) WaitCleanAllTerminatingPods(timeout time.Duration) error {
   915  	body := func() bool {
   916  		res := kub.ExecShort(fmt.Sprintf(
   917  			"%s get pods --all-namespaces -o jsonpath='{.items[*].metadata.deletionTimestamp}'",
   918  			KubectlCmd))
   919  		if !res.WasSuccessful() {
   920  			return false
   921  		}
   922  
   923  		if res.Output().String() == "" {
   924  			// Output is empty so no terminating containers
   925  			return true
   926  		}
   927  
   928  		podsTerminating := len(strings.Split(res.Output().String(), " "))
   929  		kub.logger.WithField("Terminating pods", podsTerminating).Info("List of pods terminating")
   930  		if podsTerminating > 0 {
   931  			return false
   932  		}
   933  		return true
   934  	}
   935  
   936  	err := WithTimeout(
   937  		body,
   938  		"Pods are still not deleted after a timeout",
   939  		&TimeoutConfig{Timeout: timeout})
   940  	return err
   941  }
   942  
   943  // DeployPatch deploys the original kubernetes descriptor with the given patch.
   944  func (kub *Kubectl) DeployPatch(original, patch string) error {
   945  	// debugYaml only dumps the full created yaml file to the test output if
   946  	// the cilium manifest can not be created correctly.
   947  	debugYaml := func(original, patch string) {
   948  		// dry-run is only available since k8s 1.11
   949  		switch GetCurrentK8SEnv() {
   950  		case "1.8", "1.9", "1.10":
   951  			_ = kub.ExecShort(fmt.Sprintf(
   952  				`%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`,
   953  				KubectlCmd, original, patch))
   954  		default:
   955  			_ = kub.ExecShort(fmt.Sprintf(
   956  				`%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run -o yaml`,
   957  				KubectlCmd, original, patch))
   958  		}
   959  	}
   960  
   961  	var res *CmdRes
   962  	// validation 1st
   963  	// dry-run is only available since k8s 1.11
   964  	switch GetCurrentK8SEnv() {
   965  	case "1.8", "1.9", "1.10":
   966  	default:
   967  		res = kub.ExecShort(fmt.Sprintf(
   968  			`%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run`,
   969  			KubectlCmd, original, patch))
   970  		if !res.WasSuccessful() {
   971  			debugYaml(original, patch)
   972  			return res.GetErr("Cilium patch validation failed")
   973  		}
   974  	}
   975  
   976  	res = kub.Apply(ApplyOptions{
   977  		FilePath: "-",
   978  		Force:    true,
   979  		Piped: fmt.Sprintf(
   980  			`%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`,
   981  			KubectlCmd, original, patch),
   982  	})
   983  	if !res.WasSuccessful() {
   984  		debugYaml(original, patch)
   985  		return res.GetErr("Cilium manifest patch instalation failed")
   986  	}
   987  	return nil
   988  }
   989  
   990  // ciliumInstall installs all Cilium descriptors into kubernetes.
   991  // dsPatchName corresponds to the DaemonSet patch, found by
   992  // getK8sDescriptorPatch, that will be applied to the original Cilium DaemonSet
   993  // descriptor, found by getK8sDescriptor.
   994  // cmPatchName corresponds to the ConfigMap patch, found by
   995  // getK8sDescriptorPatch, that will be applied to the original Cilium ConfigMap
   996  // descriptor, found by getK8sDescriptor.
   997  // Returns an error if any patch or if any original descriptors files were not
   998  // found.
   999  func (kub *Kubectl) ciliumInstall(dsPatchName, cmPatchName string, getK8sDescriptor, getK8sDescriptorPatch func(filename string) string) error {
  1000  	cmPathname := getK8sDescriptor("cilium-cm.yaml")
  1001  	if cmPathname == "" {
  1002  		return fmt.Errorf("Cilium ConfigMap descriptor not found")
  1003  	}
  1004  	dsPathname := getK8sDescriptor("cilium-ds.yaml")
  1005  	if dsPathname == "" {
  1006  		return fmt.Errorf("Cilium DaemonSet descriptor not found")
  1007  	}
  1008  	rbacPathname := getK8sDescriptor("cilium-rbac.yaml")
  1009  	if rbacPathname == "" {
  1010  		return fmt.Errorf("Cilium RBAC descriptor not found")
  1011  	}
  1012  
  1013  	deployOriginal := func(original string) error {
  1014  		// debugYaml only dumps the full created yaml file to the test output if
  1015  		// the cilium manifest can not be created correctly.
  1016  		debugYaml := func(original string) {
  1017  			kub.Apply(ApplyOptions{
  1018  				FilePath: original,
  1019  				DryRun:   true,
  1020  				Output:   "yaml",
  1021  			})
  1022  		}
  1023  
  1024  		// validation 1st
  1025  		res := kub.Apply(ApplyOptions{
  1026  			FilePath: original,
  1027  			DryRun:   true,
  1028  		})
  1029  		if !res.WasSuccessful() {
  1030  			debugYaml(original)
  1031  			return res.GetErr("Cilium manifest validation fails")
  1032  		}
  1033  
  1034  		res = kub.ApplyDefault(original)
  1035  		if !res.WasSuccessful() {
  1036  			debugYaml(original)
  1037  			return res.GetErr("Cannot apply Cilium manifest")
  1038  		}
  1039  		return nil
  1040  	}
  1041  
  1042  	if err := deployOriginal(rbacPathname); err != nil {
  1043  		return err
  1044  	}
  1045  
  1046  	if err := kub.DeployPatch(cmPathname, getK8sDescriptorPatch(cmPatchName)); err != nil {
  1047  		return err
  1048  	}
  1049  
  1050  	if err := kub.DeployPatch(dsPathname, getK8sDescriptorPatch(dsPatchName)); err != nil {
  1051  		return err
  1052  	}
  1053  
  1054  	cmdRes := kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorSA))
  1055  	if !cmdRes.WasSuccessful() {
  1056  		return fmt.Errorf("Unable to deploy descriptor of etcd-operator SA %s: %s", ciliumEtcdOperatorSA, cmdRes.OutputPrettyPrint())
  1057  	}
  1058  
  1059  	cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorRBAC))
  1060  	if !cmdRes.WasSuccessful() {
  1061  		return fmt.Errorf("Unable to deploy descriptor of etcd-operator RBAC %s: %s", ciliumEtcdOperatorRBAC, cmdRes.OutputPrettyPrint())
  1062  	}
  1063  
  1064  	cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperator))
  1065  	if !cmdRes.WasSuccessful() {
  1066  		return fmt.Errorf("Unable to deploy descriptor of etcd-operator %s: %s", ciliumEtcdOperator, cmdRes.OutputPrettyPrint())
  1067  	}
  1068  
  1069  	_ = kub.ApplyDefault(getK8sDescriptor("cilium-operator-sa.yaml"))
  1070  	err := kub.DeployPatch(getK8sDescriptor("cilium-operator.yaml"), getK8sDescriptorPatch("cilium-operator-patch.yaml"))
  1071  	if err != nil {
  1072  		return fmt.Errorf("Unable to deploy descriptor of cilium-operators: %s", err)
  1073  	}
  1074  
  1075  	return nil
  1076  }
  1077  
  1078  func addIfNotOverwritten(options []string, field, value string) []string {
  1079  	for _, s := range options {
  1080  		if strings.HasPrefix(s, "--set "+field) {
  1081  			return options
  1082  		}
  1083  	}
  1084  
  1085  	options = append(options, "--set "+field+"="+value)
  1086  	return options
  1087  }
  1088  
  1089  func (kub *Kubectl) generateCiliumYaml(options []string, filename string) error {
  1090  	for key, value := range defaultHelmOptions {
  1091  		options = addIfNotOverwritten(options, key, value)
  1092  	}
  1093  
  1094  	switch GetCurrentIntegration() {
  1095  	case CIIntegrationFlannel:
  1096  		// Appending the options will override earlier options on CLI.
  1097  		for k, v := range flannelHelmOverrides {
  1098  			options = append(options, fmt.Sprintf("--set %s=%s", k, v))
  1099  		}
  1100  	default:
  1101  	}
  1102  
  1103  	// TODO GH-8753: Use helm rendering library instead of shelling out to
  1104  	// helm template
  1105  	res := kub.ExecMiddle(fmt.Sprintf("helm template %s --namespace=kube-system %s > %s",
  1106  		HelmTemplate, strings.Join(options, " "), filename))
  1107  	if !res.WasSuccessful() {
  1108  		return res.GetErr("Unable to generate YAML")
  1109  	}
  1110  
  1111  	return nil
  1112  }
  1113  
  1114  // ciliumInstallHelm installs Cilium with the Helm options provided.
  1115  func (kub *Kubectl) ciliumInstallHelm(options []string) error {
  1116  	if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil {
  1117  		return err
  1118  	}
  1119  
  1120  	res := kub.Apply(ApplyOptions{FilePath: "cilium.yaml", Force: true})
  1121  	if !res.WasSuccessful() {
  1122  		return res.GetErr("Unable to apply YAML")
  1123  	}
  1124  
  1125  	return nil
  1126  }
  1127  
  1128  // ciliumUninstallHelm uninstalls Cilium with the Helm options provided.
  1129  func (kub *Kubectl) ciliumUninstallHelm(options []string) error {
  1130  	if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil {
  1131  		return err
  1132  	}
  1133  
  1134  	res := kub.Delete("cilium.yaml")
  1135  	if !res.WasSuccessful() {
  1136  		return res.GetErr("Unable to delete YAML")
  1137  	}
  1138  
  1139  	return nil
  1140  }
  1141  
  1142  // CiliumInstall installs Cilium with the provided Helm options.
  1143  func (kub *Kubectl) CiliumInstall(options []string) error {
  1144  	return kub.ciliumInstallHelm(options)
  1145  }
  1146  
  1147  // CiliumUninstall uninstalls Cilium with the provided Helm options.
  1148  func (kub *Kubectl) CiliumUninstall(options []string) error {
  1149  	return kub.ciliumUninstallHelm(options)
  1150  }
  1151  
  1152  // CiliumInstallVersion installs all Cilium descriptors into kubernetes for
  1153  // a given Cilium Version tag.
  1154  // dsPatchName corresponds to the DaemonSet patch that will be applied to the
  1155  // original Cilium DaemonSet descriptor of that given Cilium Version tag.
  1156  // cmPatchName corresponds to the ConfigMap patch that will be applied to the
  1157  // original Cilium ConfigMap descriptor of that given Cilium Version tag.
  1158  // Returns an error if any patch or if any original descriptors files were not
  1159  // found.
  1160  func (kub *Kubectl) CiliumInstallVersion(dsPatchName, cmPatchName, versionTag string) error {
  1161  	getK8sDescriptorPatch := func(filename string) string {
  1162  		// try dependent Cilium, k8s and integration version patch file
  1163  		ginkgoVersionedPath := filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), GetCurrentIntegration(), filename)
  1164  		_, err := os.Stat(ginkgoVersionedPath)
  1165  		if err == nil {
  1166  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1167  		}
  1168  		// try dependent Cilium version and integration patch file
  1169  		ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentIntegration(), filename)
  1170  		_, err = os.Stat(ginkgoVersionedPath)
  1171  		if err == nil {
  1172  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1173  		}
  1174  		// try dependent Cilium and k8s version patch file
  1175  		ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), filename)
  1176  		_, err = os.Stat(ginkgoVersionedPath)
  1177  		if err == nil {
  1178  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1179  		}
  1180  		// try dependent Cilium version patch file
  1181  		ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, filename)
  1182  		_, err = os.Stat(ginkgoVersionedPath)
  1183  		if err == nil {
  1184  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1185  		}
  1186  		// try dependent integration patch file
  1187  		ginkgoVersionedPath = filepath.Join(manifestsPath, GetCurrentIntegration(), filename)
  1188  		_, err = os.Stat(ginkgoVersionedPath)
  1189  		if err == nil {
  1190  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1191  		}
  1192  		return filepath.Join(BasePath, manifestsPath, filename)
  1193  	}
  1194  	getK8sDescriptor := func(filename string) string {
  1195  		return fmt.Sprintf("https://raw.githubusercontent.com/cilium/cilium/%s/examples/kubernetes/%s/%s", versionTag, GetCurrentK8SEnv(), filename)
  1196  	}
  1197  	return kub.ciliumInstall(dsPatchName, cmPatchName, getK8sDescriptor, getK8sDescriptorPatch)
  1198  }
  1199  
  1200  // GetCiliumPods returns a list of all Cilium pods in the specified namespace,
  1201  // and an error if the Cilium pods were not able to be retrieved.
  1202  func (kub *Kubectl) GetCiliumPods(namespace string) ([]string, error) {
  1203  	return kub.GetPodNames(namespace, "k8s-app=cilium")
  1204  }
  1205  
  1206  // GetCiliumPodsContext returns a list of all Cilium pods in the specified
  1207  // namespace, and an error if the Cilium pods were not able to be retrieved.
  1208  func (kub *Kubectl) GetCiliumPodsContext(ctx context.Context, namespace string) ([]string, error) {
  1209  	return kub.GetPodNamesContext(ctx, namespace, "k8s-app=cilium")
  1210  }
  1211  
  1212  // CiliumEndpointsList returns the result of `cilium endpoint list` from the
  1213  // specified pod.
  1214  func (kub *Kubectl) CiliumEndpointsList(ctx context.Context, pod string) *CmdRes {
  1215  	return kub.CiliumExecContext(ctx, pod, "cilium endpoint list -o json")
  1216  }
  1217  
  1218  // CiliumEndpointsStatus returns a mapping  of a pod name to it is corresponding
  1219  // endpoint's status
  1220  func (kub *Kubectl) CiliumEndpointsStatus(pod string) map[string]string {
  1221  	filter := `{range [*]}{@.status.external-identifiers.pod-name}{"="}{@.status.state}{"\n"}{end}`
  1222  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1223  	defer cancel()
  1224  	return kub.CiliumExecContext(ctx, pod, fmt.Sprintf(
  1225  		"cilium endpoint list -o jsonpath='%s'", filter)).KVOutput()
  1226  }
  1227  
  1228  // CiliumEndpointWaitReady waits until all endpoints managed by all Cilium pod
  1229  // are ready. Returns an error if the Cilium pods cannot be retrieved via
  1230  // Kubernetes, or endpoints are not ready after a specified timeout
  1231  func (kub *Kubectl) CiliumEndpointWaitReady() error {
  1232  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  1233  	if err != nil {
  1234  		kub.logger.WithError(err).Error("cannot get Cilium pods")
  1235  		return err
  1236  	}
  1237  
  1238  	body := func(ctx context.Context) (bool, error) {
  1239  		var wg sync.WaitGroup
  1240  		queue := make(chan bool, len(ciliumPods))
  1241  		endpointsReady := func(pod string) {
  1242  			valid := false
  1243  			defer func() {
  1244  				queue <- valid
  1245  				wg.Done()
  1246  			}()
  1247  			logCtx := kub.logger.WithField("pod", pod)
  1248  			status, err := kub.CiliumEndpointsList(ctx, pod).Filter(`{range [*]}{.status.state}{"="}{.status.identity.id}{"\n"}{end}`)
  1249  			if err != nil {
  1250  				logCtx.WithError(err).Errorf("cannot get endpoints states on Cilium pod")
  1251  				return
  1252  			}
  1253  			total := 0
  1254  			invalid := 0
  1255  			for _, line := range strings.Split(status.String(), "\n") {
  1256  				if line == "" {
  1257  					continue
  1258  				}
  1259  				// each line is like status=identityID.
  1260  				// IdentityID is needed because the reserved:init identity
  1261  				// means that the pod is not ready to accept traffic.
  1262  				total++
  1263  				vals := strings.Split(line, "=")
  1264  				if len(vals) != 2 {
  1265  					logCtx.Errorf("Endpoint list does not have a correct output '%s'", line)
  1266  					return
  1267  				}
  1268  				if vals[0] != "ready" {
  1269  					invalid++
  1270  				}
  1271  				// Consider an endpoint with reserved identity 5 (reserved:init) as not ready.
  1272  				if vals[1] == "5" {
  1273  					invalid++
  1274  				}
  1275  			}
  1276  			logCtx.WithFields(logrus.Fields{
  1277  				"total":   total,
  1278  				"invalid": invalid,
  1279  			}).Info("Waiting for cilium endpoints to be ready")
  1280  
  1281  			if invalid != 0 {
  1282  				return
  1283  			}
  1284  			valid = true
  1285  			return
  1286  		}
  1287  		wg.Add(len(ciliumPods))
  1288  		for _, pod := range ciliumPods {
  1289  			go endpointsReady(pod)
  1290  		}
  1291  
  1292  		wg.Wait()
  1293  		close(queue)
  1294  
  1295  		for status := range queue {
  1296  			if status == false {
  1297  				return false, nil
  1298  			}
  1299  		}
  1300  		return true, nil
  1301  	}
  1302  
  1303  	ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1304  	defer cancel()
  1305  	err = WithContext(ctx, body, 1*time.Second)
  1306  	if err == nil {
  1307  		return err
  1308  	}
  1309  
  1310  	callback := func() string {
  1311  		ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1312  		defer cancel()
  1313  
  1314  		var errorMessage string
  1315  		for _, pod := range ciliumPods {
  1316  			var endpoints []models.Endpoint
  1317  			cmdRes := kub.CiliumEndpointsList(ctx, pod)
  1318  			if !cmdRes.WasSuccessful() {
  1319  				errorMessage += fmt.Sprintf(
  1320  					"\tCilium Pod: %s \terror: unable to get endpoint list: %s",
  1321  					pod, cmdRes.err)
  1322  				continue
  1323  			}
  1324  			err := cmdRes.Unmarshal(&endpoints)
  1325  			if err != nil {
  1326  				errorMessage += fmt.Sprintf(
  1327  					"\tCilium Pod: %s \terror: unable to parse endpoint list: %s",
  1328  					pod, err)
  1329  				continue
  1330  			}
  1331  			for _, ep := range endpoints {
  1332  				errorMessage += fmt.Sprintf(
  1333  					"\tCilium Pod: %s \tEndpoint: %d \tIdentity: %d\t State: %s\n",
  1334  					pod, ep.ID, ep.Status.Identity.ID, ep.Status.State)
  1335  			}
  1336  		}
  1337  		return errorMessage
  1338  	}
  1339  	return NewSSHMetaError(err.Error(), callback)
  1340  }
  1341  
  1342  // WaitForCEPIdentity waits for a particular CEP to have an identity present.
  1343  func (kub *Kubectl) WaitForCEPIdentity(ns, podName string) error {
  1344  	body := func(ctx context.Context) (bool, error) {
  1345  		ep := kub.CepGet(ns, podName)
  1346  		if ep == nil {
  1347  			return false, nil
  1348  		}
  1349  		if ep.Identity == nil {
  1350  			return false, nil
  1351  		}
  1352  		return ep.Identity.ID != 0, nil
  1353  	}
  1354  
  1355  	ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1356  	defer cancel()
  1357  	return WithContext(ctx, body, 1*time.Second)
  1358  }
  1359  
  1360  // CiliumExecContext runs cmd in the specified Cilium pod with the given context.
  1361  func (kub *Kubectl) CiliumExecContext(ctx context.Context, pod string, cmd string) *CmdRes {
  1362  	limitTimes := 5
  1363  	execute := func() *CmdRes {
  1364  		command := fmt.Sprintf("%s exec -n kube-system %s -- %s", KubectlCmd, pod, cmd)
  1365  		return kub.ExecContext(ctx, command)
  1366  	}
  1367  	var res *CmdRes
  1368  	// Sometimes Kubectl returns 126 exit code, It use to happen in Nightly
  1369  	// tests when a lot of exec are in place (Cgroups issue). The upstream
  1370  	// changes did not fix the isse, and we need to make this workaround to
  1371  	// avoid Kubectl issue.
  1372  	// https://github.com/openshift/origin/issues/16246
  1373  	for i := 0; i < limitTimes; i++ {
  1374  		res = execute()
  1375  		if res.GetExitCode() != 126 {
  1376  			break
  1377  		}
  1378  		time.Sleep(200 * time.Millisecond)
  1379  	}
  1380  	return res
  1381  }
  1382  
  1383  // CiliumExec runs cmd in the specified Cilium pod.
  1384  // Deprecated: use CiliumExecContext instead
  1385  func (kub *Kubectl) CiliumExec(pod string, cmd string) *CmdRes {
  1386  	ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1387  	defer cancel()
  1388  	return kub.CiliumExecContext(ctx, pod, cmd)
  1389  }
  1390  
  1391  // CiliumExecUntilMatch executes the specified command repeatedly for the
  1392  // specified Cilium pod until the given substring is present in stdout.
  1393  // If the timeout is reached it will return an error.
  1394  func (kub *Kubectl) CiliumExecUntilMatch(pod, cmd, substr string) error {
  1395  	body := func() bool {
  1396  		ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1397  		defer cancel()
  1398  		res := kub.CiliumExecContext(ctx, pod, cmd)
  1399  		return strings.Contains(res.Output().String(), substr)
  1400  	}
  1401  
  1402  	return WithTimeout(
  1403  		body,
  1404  		fmt.Sprintf("%s is not in the output after timeout", substr),
  1405  		&TimeoutConfig{Timeout: HelperTimeout})
  1406  }
  1407  
  1408  // WaitForCiliumInitContainerToFinish waits for all Cilium init containers to
  1409  // finish
  1410  func (kub *Kubectl) WaitForCiliumInitContainerToFinish() error {
  1411  	body := func() bool {
  1412  		podList := &v1.PodList{}
  1413  		err := kub.GetPods("kube-system", "-l k8s-app=cilium").Unmarshal(podList)
  1414  		if err != nil {
  1415  			kub.logger.Infof("Error while getting PodList: %s", err)
  1416  			return false
  1417  		}
  1418  		if len(podList.Items) == 0 {
  1419  			return false
  1420  		}
  1421  		for _, pod := range podList.Items {
  1422  			for _, v := range pod.Status.InitContainerStatuses {
  1423  				if v.State.Terminated != nil && (v.State.Terminated.Reason != "Completed" || v.State.Terminated.ExitCode != 0) {
  1424  					kub.logger.WithFields(logrus.Fields{
  1425  						"podName":      pod.Name,
  1426  						"currentState": v.State.String(),
  1427  					}).Infof("Cilium Init container not completed")
  1428  					return false
  1429  				}
  1430  			}
  1431  		}
  1432  		return true
  1433  	}
  1434  
  1435  	return WithTimeout(body, "Cilium Init Container was not able to initialize or had a successful run", &TimeoutConfig{Timeout: HelperTimeout})
  1436  }
  1437  
  1438  // CiliumNodesWait waits until all nodes in the Kubernetes cluster are annotated
  1439  // with Cilium annotations. Its runtime is bounded by a maximum of `HelperTimeout`.
  1440  // When a node is annotated with said annotations, it indicates
  1441  // that the tunnels in the nodes are set up and that cross-node traffic can be
  1442  // tested. Returns an error if the timeout is exceeded for waiting for the nodes
  1443  // to be annotated.
  1444  func (kub *Kubectl) CiliumNodesWait() (bool, error) {
  1445  	body := func() bool {
  1446  		filter := `{range .items[*]}{@.metadata.name}{"="}{@.metadata.annotations.io\.cilium\.network\.ipv4-pod-cidr}{"\n"}{end}`
  1447  		data := kub.ExecShort(fmt.Sprintf(
  1448  			"%s get nodes -o jsonpath='%s'", KubectlCmd, filter))
  1449  		if !data.WasSuccessful() {
  1450  			return false
  1451  		}
  1452  		result := data.KVOutput()
  1453  		for k, v := range result {
  1454  			if v == "" {
  1455  				kub.logger.Infof("Kubernetes node '%v' does not have Cilium metadata", k)
  1456  				return false
  1457  			}
  1458  			kub.logger.Infof("Kubernetes node '%v' IPv4 address: '%v'", k, v)
  1459  		}
  1460  		return true
  1461  	}
  1462  	err := WithTimeout(body, "Kubernetes node does not have cilium metadata", &TimeoutConfig{Timeout: HelperTimeout})
  1463  	if err != nil {
  1464  		return false, err
  1465  	}
  1466  	return true, nil
  1467  }
  1468  
  1469  // WaitPolicyDeleted waits for policy policyName to be deleted from the
  1470  // cilium-agent running in pod. Returns an error if policyName was unable to
  1471  // be deleted after some amount of time.
  1472  func (kub *Kubectl) WaitPolicyDeleted(pod string, policyName string) error {
  1473  	body := func() bool {
  1474  		ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1475  		defer cancel()
  1476  		res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyName))
  1477  
  1478  		// `cilium policy get <policy name>` fails if the policy is not loaded,
  1479  		// which is the condition we want.
  1480  		return !res.WasSuccessful()
  1481  	}
  1482  
  1483  	return WithTimeout(body, fmt.Sprintf("Policy %s was not deleted in time", policyName), &TimeoutConfig{Timeout: HelperTimeout})
  1484  }
  1485  
  1486  // CiliumIsPolicyLoaded returns true if the policy is loaded in the given
  1487  // cilium Pod. it returns false in case that the policy is not in place
  1488  func (kub *Kubectl) CiliumIsPolicyLoaded(pod string, policyCmd string) bool {
  1489  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1490  	defer cancel()
  1491  	res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyCmd))
  1492  	return res.WasSuccessful()
  1493  }
  1494  
  1495  // CiliumPolicyRevision returns the policy revision in the specified Cilium pod.
  1496  // Returns an error if the policy revision cannot be retrieved.
  1497  func (kub *Kubectl) CiliumPolicyRevision(pod string) (int, error) {
  1498  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1499  	defer cancel()
  1500  	res := kub.CiliumExecContext(ctx, pod, "cilium policy get -o json")
  1501  	if !res.WasSuccessful() {
  1502  		return -1, fmt.Errorf("cannot get the revision %s", res.Output())
  1503  	}
  1504  
  1505  	revision, err := res.Filter("{.revision}")
  1506  	if err != nil {
  1507  		return -1, fmt.Errorf("cannot get revision from json: %s", err)
  1508  	}
  1509  
  1510  	revi, err := strconv.Atoi(strings.Trim(revision.String(), "\n"))
  1511  	if err != nil {
  1512  		kub.logger.Errorf("revision on pod '%s' is not valid '%s'", pod, res.CombineOutput())
  1513  		return -1, err
  1514  	}
  1515  	return revi, nil
  1516  }
  1517  
  1518  // ResourceLifeCycleAction represents an action performed upon objects in
  1519  // Kubernetes.
  1520  type ResourceLifeCycleAction string
  1521  
  1522  // CiliumPolicyAction performs the specified action in Kubernetes for the policy
  1523  // stored in path filepath and waits up  until timeout seconds for the policy
  1524  // to be applied in all Cilium endpoints. Returns an error if the policy is not
  1525  // imported before the timeout is
  1526  // exceeded.
  1527  func (kub *Kubectl) CiliumPolicyAction(namespace, filepath string, action ResourceLifeCycleAction, timeout time.Duration) (string, error) {
  1528  	numNodes := kub.GetNumNodes()
  1529  
  1530  	// Test filter: https://jqplay.org/s/EgNzc06Cgn
  1531  	jqFilter := fmt.Sprintf(
  1532  		`[.items[]|{name:.metadata.name, enforcing: (.status|if has("nodes") then .nodes |to_entries|map_values(.value.enforcing) + [(.|length >= %d)]|all else true end)|tostring, status: has("status")|tostring}]`,
  1533  		numNodes)
  1534  	npFilter := fmt.Sprintf(
  1535  		`{range .items[*]}{"%s="}{.metadata.name}{" %s="}{.metadata.namespace}{"\n"}{end}`,
  1536  		KubectlPolicyNameLabel, KubectlPolicyNameSpaceLabel)
  1537  	kub.logger.Infof("Performing %s action on resource '%s'", action, filepath)
  1538  
  1539  	if status := kub.Action(action, filepath, namespace); !status.WasSuccessful() {
  1540  		return "", status.GetErr(fmt.Sprintf("Cannot perform '%s' on resorce '%s'", action, filepath))
  1541  	}
  1542  
  1543  	if action == KubectlDelete {
  1544  		// Due policy is uninstalled, there is no need to validate that the policy is enforce.
  1545  		return "", nil
  1546  	}
  1547  
  1548  	body := func() bool {
  1549  		var data []map[string]string
  1550  		cmd := fmt.Sprintf("%s get cnp --all-namespaces -o json | jq '%s'",
  1551  			KubectlCmd, jqFilter)
  1552  
  1553  		res := kub.ExecShort(cmd)
  1554  		if !res.WasSuccessful() {
  1555  			kub.logger.WithError(res.GetErr("")).Error("cannot get cnp status")
  1556  			return false
  1557  
  1558  		}
  1559  
  1560  		err := res.Unmarshal(&data)
  1561  		if err != nil {
  1562  			kub.logger.WithError(err).Error("Cannot unmarshal json")
  1563  			return false
  1564  		}
  1565  
  1566  		for _, item := range data {
  1567  			if item["enforcing"] != "true" || item["status"] != "true" {
  1568  				kub.logger.Errorf("Policy '%s' is not enforcing yet", item["name"])
  1569  				return false
  1570  			}
  1571  		}
  1572  		return true
  1573  	}
  1574  
  1575  	err := WithTimeout(
  1576  		body,
  1577  		"cannot change state of resource correctly; command timed out",
  1578  		&TimeoutConfig{Timeout: timeout})
  1579  
  1580  	if err != nil {
  1581  		return "", err
  1582  	}
  1583  
  1584  	knpBody := func() bool {
  1585  		knp := kub.ExecShort(fmt.Sprintf("%s get --all-namespaces netpol -o jsonpath='%s'",
  1586  			KubectlCmd, npFilter))
  1587  		result := knp.ByLines()
  1588  		if len(result) == 0 {
  1589  			return true
  1590  		}
  1591  
  1592  		pods, err := kub.GetCiliumPods(KubeSystemNamespace)
  1593  		if err != nil {
  1594  			kub.logger.WithError(err).Error("cannot retrieve cilium pods")
  1595  			return false
  1596  		}
  1597  		for _, item := range result {
  1598  			for _, ciliumPod := range pods {
  1599  				if !kub.CiliumIsPolicyLoaded(ciliumPod, item) {
  1600  					kub.logger.Infof("Policy '%s' is not ready on Cilium pod '%s'", item, ciliumPod)
  1601  					return false
  1602  				}
  1603  			}
  1604  		}
  1605  		return true
  1606  	}
  1607  
  1608  	err = WithTimeout(
  1609  		knpBody,
  1610  		"cannot change state of Kubernetes network policies correctly; command timed out",
  1611  		&TimeoutConfig{Timeout: timeout})
  1612  	return "", err
  1613  }
  1614  
  1615  // CiliumReport report the cilium pod to the log and appends the logs for the
  1616  // given commands.
  1617  func (kub *Kubectl) CiliumReport(namespace string, commands ...string) {
  1618  	if config.CiliumTestConfig.SkipLogGathering {
  1619  		ginkgoext.GinkgoPrint("Skipped gathering logs (-cilium.skipLogs=true)\n")
  1620  		return
  1621  	}
  1622  
  1623  	// Log gathering for Cilium should take at most 5 minutes. This ensures that
  1624  	// the CiliumReport stage doesn't cause the entire CI to hang.
  1625  
  1626  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
  1627  	defer cancel()
  1628  
  1629  	var wg sync.WaitGroup
  1630  	wg.Add(1)
  1631  
  1632  	go func() {
  1633  		defer wg.Done()
  1634  		kub.DumpCiliumCommandOutput(ctx, namespace)
  1635  		kub.GatherLogs(ctx)
  1636  	}()
  1637  
  1638  	kub.CiliumCheckReport(ctx)
  1639  
  1640  	pods, err := kub.GetCiliumPodsContext(ctx, namespace)
  1641  	if err != nil {
  1642  		kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump")
  1643  	}
  1644  	res := kub.ExecContextShort(ctx, fmt.Sprintf("%s get pods -o wide --all-namespaces", KubectlCmd))
  1645  	ginkgoext.GinkgoPrint(res.GetDebugMessage())
  1646  
  1647  	results := make([]*CmdRes, 0, len(pods)*len(commands))
  1648  	ginkgoext.GinkgoPrint("Fetching command output from pods %s", pods)
  1649  	for _, pod := range pods {
  1650  		for _, cmd := range commands {
  1651  			res = kub.ExecPodCmdBackground(ctx, namespace, pod, cmd, ExecOptions{SkipLog: true})
  1652  			results = append(results, res)
  1653  		}
  1654  	}
  1655  
  1656  	wg.Wait()
  1657  
  1658  	for _, res := range results {
  1659  		res.WaitUntilFinish()
  1660  		ginkgoext.GinkgoPrint(res.GetDebugMessage())
  1661  	}
  1662  }
  1663  
  1664  // EtcdOperatorReport dump etcd pods data into the report directory to be able
  1665  // to debug etcd operator status in case of fail test.
  1666  func (kub *Kubectl) EtcdOperatorReport(ctx context.Context, reportCmds map[string]string) {
  1667  	if reportCmds == nil {
  1668  		reportCmds = make(map[string]string)
  1669  	}
  1670  
  1671  	pods, err := kub.GetPodNamesContext(ctx, KubeSystemNamespace, "etcd_cluster=cilium-etcd")
  1672  	if err != nil {
  1673  		kub.logger.WithError(err).Error("No etcd pods")
  1674  		return
  1675  	}
  1676  
  1677  	etcdctl := "etcdctl --endpoints=https://%s.cilium-etcd.kube-system.svc:2379 " +
  1678  		"--cert-file /etc/etcdtls/member/peer-tls/peer.crt " +
  1679  		"--key-file /etc/etcdtls/member/peer-tls/peer.key " +
  1680  		"--ca-file /etc/etcdtls/member/peer-tls/peer-ca.crt " +
  1681  		" %s"
  1682  
  1683  	etcdDumpCommands := map[string]string{
  1684  		"member list":    "etcd_%s_member_list",
  1685  		"cluster-health": "etcd_%s_cluster_health",
  1686  	}
  1687  
  1688  	for _, pod := range pods {
  1689  		for cmd, reportFile := range etcdDumpCommands {
  1690  			etcdCmd := fmt.Sprintf(etcdctl, pod, cmd)
  1691  			command := fmt.Sprintf("%s -n %s exec -ti %s -- %s",
  1692  				KubectlCmd, KubeSystemNamespace, pod, etcdCmd)
  1693  			reportCmds[command] = fmt.Sprintf(reportFile, pod)
  1694  		}
  1695  	}
  1696  }
  1697  
  1698  // CiliumCheckReport prints a few checks on the Junit output to provide more
  1699  // context to users. The list of checks that prints are the following:
  1700  // - Number of Kubernetes and Cilium policies installed.
  1701  // - Policy enforcement status by endpoint.
  1702  // - Controller, health, kvstore status.
  1703  func (kub *Kubectl) CiliumCheckReport(ctx context.Context) {
  1704  	pods, _ := kub.GetCiliumPods(KubeSystemNamespace)
  1705  	fmt.Fprintf(CheckLogs, "Cilium pods: %v\n", pods)
  1706  
  1707  	var policiesFilter = `{range .items[*]}{.metadata.namespace}{"::"}{.metadata.name}{" "}{end}`
  1708  	netpols := kub.ExecContextShort(ctx, fmt.Sprintf(
  1709  		"%s get netpol -o jsonpath='%s' --all-namespaces",
  1710  		KubectlCmd, policiesFilter))
  1711  	fmt.Fprintf(CheckLogs, "Netpols loaded: %v\n", netpols.Output())
  1712  
  1713  	cnp := kub.ExecContextShort(ctx, fmt.Sprintf(
  1714  		"%s get cnp -o jsonpath='%s' --all-namespaces",
  1715  		KubectlCmd, policiesFilter))
  1716  	fmt.Fprintf(CheckLogs, "CiliumNetworkPolicies loaded: %v\n", cnp.Output())
  1717  
  1718  	cepFilter := `{range .items[*]}{.metadata.name}{"="}{.status.policy.ingress.enforcing}{":"}{.status.policy.egress.enforcing}{"\n"}{end}`
  1719  	cepStatus := kub.ExecContextShort(ctx, fmt.Sprintf(
  1720  		"%s get cep -o jsonpath='%s' --all-namespaces",
  1721  		KubectlCmd, cepFilter))
  1722  
  1723  	fmt.Fprintf(CheckLogs, "Endpoint Policy Enforcement:\n")
  1724  
  1725  	table := tabwriter.NewWriter(CheckLogs, 5, 0, 3, ' ', 0)
  1726  	fmt.Fprintf(table, "Pod\tIngress\tEgress\n")
  1727  	for pod, policy := range cepStatus.KVOutput() {
  1728  		data := strings.SplitN(policy, ":", 2)
  1729  		if len(data) != 2 {
  1730  			data[0] = "invalid value"
  1731  			data[1] = "invalid value"
  1732  		}
  1733  		fmt.Fprintf(table, "%s\t%s\t%s\n", pod, data[0], data[1])
  1734  	}
  1735  	table.Flush()
  1736  
  1737  	var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}::{.status.last-failure-msg}{"\n"}{end}`
  1738  	var failedControllers string
  1739  	for _, pod := range pods {
  1740  		var prefix = ""
  1741  		status := kub.CiliumExecContext(ctx, pod, "cilium status --all-controllers -o json")
  1742  		result, err := status.Filter(controllersFilter)
  1743  		if err != nil {
  1744  			kub.logger.WithError(err).Error("Cannot filter controller status output")
  1745  			continue
  1746  		}
  1747  		var total = 0
  1748  		var failed = 0
  1749  		for name, data := range result.KVOutput() {
  1750  			total++
  1751  			status := strings.SplitN(data, "::", 2)
  1752  			if len(status) != 2 {
  1753  				// Just make sure that the the len of the output is 2 to not
  1754  				// fail on index error in the following lines.
  1755  				continue
  1756  			}
  1757  			if status[0] != "" {
  1758  				failed++
  1759  				prefix = "⚠️  "
  1760  				failedControllers += fmt.Sprintf("controller %s failure '%s'\n", name, status[1])
  1761  			}
  1762  		}
  1763  		statusFilter := `Status: {.cilium.state}  Health: {.cluster.ciliumHealth.state}` +
  1764  			` Nodes "{.cluster.nodes[*].name}" ContinerRuntime: {.container-runtime.state}` +
  1765  			` Kubernetes: {.kubernetes.state} KVstore: {.kvstore.state}`
  1766  		data, _ := status.Filter(statusFilter)
  1767  		fmt.Fprintf(CheckLogs, "%sCilium agent '%s': %s Controllers: Total %d Failed %d\n",
  1768  			prefix, pod, data, total, failed)
  1769  		if failedControllers != "" {
  1770  			fmt.Fprintf(CheckLogs, "Failed controllers:\n %s", failedControllers)
  1771  		}
  1772  	}
  1773  }
  1774  
  1775  // ValidateNoErrorsInLogs checks in cilium logs since the given duration (By
  1776  // default `CurrentGinkgoTestDescription().Duration`) do not contain `panic`,
  1777  // `deadlocks` or `segmentation faults` messages. In case of any of these
  1778  // messages, it'll mark the test as failed.
  1779  func (kub *Kubectl) ValidateNoErrorsInLogs(duration time.Duration) {
  1780  
  1781  	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
  1782  	defer cancel()
  1783  
  1784  	var logs string
  1785  	cmd := fmt.Sprintf("%s -n %s logs --timestamps=true -l k8s-app=cilium --since=%vs",
  1786  		KubectlCmd, KubeSystemNamespace, duration.Seconds())
  1787  	res := kub.ExecContext(ctx, fmt.Sprintf("%s --previous", cmd), ExecOptions{SkipLog: true})
  1788  	if res.WasSuccessful() {
  1789  		logs += res.Output().String()
  1790  	}
  1791  	res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true})
  1792  	if res.WasSuccessful() {
  1793  		logs += res.Output().String()
  1794  	}
  1795  	defer func() {
  1796  		// Keep the cilium logs for the given test in a separate file.
  1797  		testPath, err := CreateReportDirectory()
  1798  		if err != nil {
  1799  			kub.logger.WithError(err).Error("Cannot create report directory")
  1800  			return
  1801  		}
  1802  		err = ioutil.WriteFile(
  1803  			fmt.Sprintf("%s/%s", testPath, CiliumTestLog),
  1804  			[]byte(logs), LogPerm)
  1805  
  1806  		if err != nil {
  1807  			kub.logger.WithError(err).Errorf("Cannot create %s", CiliumTestLog)
  1808  		}
  1809  	}()
  1810  
  1811  	failIfContainsBadLogMsg(logs)
  1812  
  1813  	fmt.Fprintf(CheckLogs, logutils.LogErrorsSummary(logs))
  1814  }
  1815  
  1816  // GatherCiliumCoreDumps copies core dumps if are present in the /tmp folder
  1817  // into the test report folder for further analysis.
  1818  func (kub *Kubectl) GatherCiliumCoreDumps(ctx context.Context, ciliumPod string) {
  1819  	log := kub.logger.WithField("pod", ciliumPod)
  1820  
  1821  	cores := kub.CiliumExecContext(ctx, ciliumPod, "ls /tmp/ | grep core")
  1822  	if !cores.WasSuccessful() {
  1823  		log.Debug("There is no core dumps in the pod")
  1824  		return
  1825  	}
  1826  
  1827  	testPath, err := CreateReportDirectory()
  1828  	if err != nil {
  1829  		log.WithError(err).Errorf("cannot create test result path '%s'", testPath)
  1830  		return
  1831  	}
  1832  	resultPath := filepath.Join(BasePath, testPath)
  1833  
  1834  	for _, core := range cores.ByLines() {
  1835  		dst := filepath.Join(resultPath, core)
  1836  		src := filepath.Join("/tmp/", core)
  1837  		cmd := fmt.Sprintf("%s -n %s cp %s:%s %s",
  1838  			KubectlCmd, KubeSystemNamespace,
  1839  			ciliumPod, src, dst)
  1840  		res := kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true})
  1841  		if !res.WasSuccessful() {
  1842  			log.WithField("output", res.CombineOutput()).Error("Cannot get core from pod")
  1843  		}
  1844  	}
  1845  }
  1846  
  1847  // GetCiliumHostIPv4 retrieves cilium_host IPv4 addr of the given node.
  1848  func (kub *Kubectl) GetCiliumHostIPv4(ctx context.Context, node string) (string, error) {
  1849  	pod, err := kub.GetCiliumPodOnNode(KubeSystemNamespace, node)
  1850  	if err != nil {
  1851  		return "", fmt.Errorf("unable to retrieve cilium pod: %s", err)
  1852  	}
  1853  
  1854  	cmd := "ip -4 -o a show dev cilium_host | grep -o -e 'inet [0-9.]*' | cut -d' ' -f2"
  1855  	res := kub.ExecPodCmd(KubeSystemNamespace, pod, cmd)
  1856  	if !res.WasSuccessful() {
  1857  		return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr: %s", res.GetError())
  1858  	}
  1859  	addr := res.SingleOut()
  1860  	if addr == "" {
  1861  		return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr")
  1862  	}
  1863  
  1864  	return addr, nil
  1865  }
  1866  
  1867  // DumpCiliumCommandOutput runs a variety of commands (CiliumKubCLICommands) and writes the results to
  1868  // TestResultsPath
  1869  func (kub *Kubectl) DumpCiliumCommandOutput(ctx context.Context, namespace string) {
  1870  	ReportOnPod := func(pod string) {
  1871  		logger := kub.logger.WithField("CiliumPod", pod)
  1872  
  1873  		testPath, err := CreateReportDirectory()
  1874  		if err != nil {
  1875  			logger.WithError(err).Errorf("cannot create test result path '%s'", testPath)
  1876  			return
  1877  		}
  1878  
  1879  		genReportCmds := func(cliCmds map[string]string) map[string]string {
  1880  			reportCmds := map[string]string{}
  1881  			for cmd, logfile := range cliCmds {
  1882  				command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
  1883  				reportCmds[command] = fmt.Sprintf("%s_%s", pod, logfile)
  1884  			}
  1885  			return reportCmds
  1886  		}
  1887  
  1888  		reportCmds := genReportCmds(ciliumKubCLICommands)
  1889  		reportMapContext(ctx, testPath, reportCmds, kub.SSHMeta)
  1890  
  1891  		logsPath := filepath.Join(BasePath, testPath)
  1892  
  1893  		// Get bugtool output. Since bugtool output is dumped in the pod's filesystem,
  1894  		// copy it over with `kubectl cp`.
  1895  		bugtoolCmd := fmt.Sprintf("%s exec -n %s %s -- %s",
  1896  			KubectlCmd, namespace, pod, CiliumBugtool)
  1897  		res := kub.ExecContext(ctx, bugtoolCmd, ExecOptions{SkipLog: true})
  1898  		if !res.WasSuccessful() {
  1899  			logger.Errorf("%s failed: %s", bugtoolCmd, res.CombineOutput().String())
  1900  			return
  1901  		}
  1902  		// Default output directory is /tmp for bugtool.
  1903  		res = kub.ExecContext(ctx, fmt.Sprintf("%s exec -n %s %s -- ls /tmp/", KubectlCmd, namespace, pod))
  1904  		tmpList := res.ByLines()
  1905  		for _, line := range tmpList {
  1906  			// Only copy over bugtool output to directory.
  1907  			if !strings.Contains(line, CiliumBugtool) {
  1908  				continue
  1909  			}
  1910  
  1911  			res = kub.ExecContext(ctx, fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s /tmp/%[4]s",
  1912  				KubectlCmd, namespace, pod, line),
  1913  				ExecOptions{SkipLog: true})
  1914  			if !res.WasSuccessful() {
  1915  				logger.Errorf("'%s' failed: %s", res.GetCmd(), res.CombineOutput())
  1916  				continue
  1917  			}
  1918  
  1919  			archiveName := filepath.Join(logsPath, fmt.Sprintf("bugtool-%s", pod))
  1920  			res = kub.ExecContext(ctx, fmt.Sprintf("mkdir -p %s", archiveName))
  1921  			if !res.WasSuccessful() {
  1922  				logger.WithField("cmd", res.GetCmd()).Errorf(
  1923  					"cannot create bugtool archive folder: %s", res.CombineOutput())
  1924  				continue
  1925  			}
  1926  
  1927  			cmd := fmt.Sprintf("tar -xf /tmp/%s -C %s --strip-components=1", line, archiveName)
  1928  			res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true})
  1929  			if !res.WasSuccessful() {
  1930  				logger.WithField("cmd", cmd).Errorf(
  1931  					"Cannot untar bugtool output: %s", res.CombineOutput())
  1932  				continue
  1933  			}
  1934  			//Remove bugtool artifact, so it'll be not used if any other fail test
  1935  			_ = kub.ExecPodCmdBackground(ctx, KubeSystemNamespace, pod, fmt.Sprintf("rm /tmp/%s", line))
  1936  		}
  1937  
  1938  		// Finally, get kvstore output - this is best effort; we do this last
  1939  		// because if connectivity to the kvstore is broken from a cilium pod,
  1940  		// we don't want the context above to timeout and as a result, get none
  1941  		// of the other logs from the tests.
  1942  
  1943  		// Use a shorter context for kvstore-related commands to avoid having
  1944  		// further log-gathering fail as well if the first Cilium pod fails to
  1945  		// gather kvstore logs.
  1946  		kvstoreCmdCtx, cancel := context.WithTimeout(ctx, MidCommandTimeout)
  1947  		defer cancel()
  1948  		reportCmds = genReportCmds(ciliumKubCLICommandsKVStore)
  1949  		reportMapContext(kvstoreCmdCtx, testPath, reportCmds, kub.SSHMeta)
  1950  	}
  1951  
  1952  	pods, err := kub.GetCiliumPodsContext(ctx, namespace)
  1953  	if err != nil {
  1954  		kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump")
  1955  		return
  1956  	}
  1957  	for _, pod := range pods {
  1958  		ReportOnPod(pod)
  1959  		kub.GatherCiliumCoreDumps(ctx, pod)
  1960  	}
  1961  }
  1962  
  1963  // GatherLogs dumps kubernetes pods, services, DaemonSet to the testResultsPath
  1964  // directory
  1965  func (kub *Kubectl) GatherLogs(ctx context.Context) {
  1966  	reportCmds := map[string]string{
  1967  		"kubectl get pods --all-namespaces -o json":                  "pods.txt",
  1968  		"kubectl get services --all-namespaces -o json":              "svc.txt",
  1969  		"kubectl get nodes -o json":                                  "nodes.txt",
  1970  		"kubectl get ds --all-namespaces -o json":                    "ds.txt",
  1971  		"kubectl get cnp --all-namespaces -o json":                   "cnp.txt",
  1972  		"kubectl get cep --all-namespaces -o json":                   "cep.txt",
  1973  		"kubectl get netpol --all-namespaces -o json":                "netpol.txt",
  1974  		"kubectl describe pods --all-namespaces":                     "pods_status.txt",
  1975  		"kubectl get replicationcontroller --all-namespaces -o json": "replicationcontroller.txt",
  1976  		"kubectl get deployment --all-namespaces -o json":            "deployment.txt",
  1977  	}
  1978  
  1979  	kub.GeneratePodLogGatheringCommands(ctx, reportCmds)
  1980  	kub.EtcdOperatorReport(ctx, reportCmds)
  1981  
  1982  	res := kub.ExecContext(ctx, fmt.Sprintf(`%s api-resources | grep -v "^NAME" | awk '{print $1}'`, KubectlCmd))
  1983  	if res.WasSuccessful() {
  1984  		for _, line := range res.ByLines() {
  1985  			key := fmt.Sprintf("%s get %s --all-namespaces -o wide", KubectlCmd, line)
  1986  			reportCmds[key] = fmt.Sprintf("api-resource-%s.txt", line)
  1987  		}
  1988  	} else {
  1989  		kub.logger.Errorf("Cannot get api-resoureces: %s", res.GetDebugMessage())
  1990  	}
  1991  
  1992  	testPath, err := CreateReportDirectory()
  1993  	if err != nil {
  1994  		kub.logger.WithError(err).Errorf(
  1995  			"cannot create test results path '%s'", testPath)
  1996  		return
  1997  	}
  1998  	reportMap(testPath, reportCmds, kub.SSHMeta)
  1999  
  2000  	for _, node := range []string{K8s1VMName(), K8s2VMName()} {
  2001  		vm := GetVagrantSSHMeta(node)
  2002  		reportCmds := map[string]string{
  2003  			"journalctl --no-pager -au kubelet": fmt.Sprintf("kubelet-%s.log", node),
  2004  			"sudo top -n 1 -b":                  fmt.Sprintf("top-%s.log", node),
  2005  			"sudo ps aux":                       fmt.Sprintf("ps-%s.log", node),
  2006  		}
  2007  		reportMapContext(ctx, testPath, reportCmds, vm)
  2008  	}
  2009  }
  2010  
  2011  // GeneratePodLogGatheringCommands generates the commands to gather logs for
  2012  // all pods in the Kubernetes cluster, and maps the commands to the filename
  2013  // in which they will be stored in reportCmds.
  2014  func (kub *Kubectl) GeneratePodLogGatheringCommands(ctx context.Context, reportCmds map[string]string) {
  2015  	if reportCmds == nil {
  2016  		reportCmds = make(map[string]string)
  2017  	}
  2018  	pods, err := kub.GetAllPods(ctx, ExecOptions{SkipLog: true})
  2019  	if err != nil {
  2020  		kub.logger.WithError(err).Error("Unable to get pods from Kubernetes via kubectl")
  2021  	}
  2022  
  2023  	for _, pod := range pods {
  2024  		for _, containerStatus := range pod.Status.ContainerStatuses {
  2025  			logCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name)
  2026  			logfileName := fmt.Sprintf("pod-%s-%s-%s.log", pod.Namespace, pod.Name, containerStatus.Name)
  2027  			reportCmds[logCmd] = logfileName
  2028  
  2029  			if containerStatus.RestartCount > 0 {
  2030  				previousLogCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s --previous", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name)
  2031  				previousLogfileName := fmt.Sprintf("pod-%s-%s-%s-previous.log", pod.Namespace, pod.Name, containerStatus.Name)
  2032  				reportCmds[previousLogCmd] = previousLogfileName
  2033  			}
  2034  		}
  2035  	}
  2036  }
  2037  
  2038  // GetCiliumPodOnNode returns the name of the Cilium pod that is running on / in
  2039  //the specified node / namespace.
  2040  func (kub *Kubectl) GetCiliumPodOnNode(namespace string, node string) (string, error) {
  2041  	filter := fmt.Sprintf(
  2042  		"-o jsonpath='{.items[?(@.spec.nodeName == \"%s\")].metadata.name}'", node)
  2043  
  2044  	res := kub.ExecShort(fmt.Sprintf(
  2045  		"%s -n %s get pods -l k8s-app=cilium %s", KubectlCmd, namespace, filter))
  2046  	if !res.WasSuccessful() {
  2047  		return "", fmt.Errorf("Cilium pod not found on node '%s'", node)
  2048  	}
  2049  
  2050  	return res.Output().String(), nil
  2051  }
  2052  
  2053  func (kub *Kubectl) ciliumPreFlightCheck() error {
  2054  	err := kub.ciliumStatusPreFlightCheck()
  2055  	if err != nil {
  2056  		return fmt.Errorf("status is unhealthy: %s", err)
  2057  	}
  2058  
  2059  	err = kub.ciliumControllersPreFlightCheck()
  2060  	if err != nil {
  2061  		return fmt.Errorf("controllers are failing: %s", err)
  2062  	}
  2063  
  2064  	switch GetCurrentIntegration() {
  2065  	case CIIntegrationFlannel:
  2066  	default:
  2067  		err = kub.ciliumHealthPreFlightCheck()
  2068  		if err != nil {
  2069  			return fmt.Errorf("connectivity health is failing: %s", err)
  2070  		}
  2071  	}
  2072  	err = kub.fillServiceCache()
  2073  	if err != nil {
  2074  		return fmt.Errorf("unable to fill service cache: %s", err)
  2075  	}
  2076  	err = kub.ciliumServicePreFlightCheck()
  2077  	if err != nil {
  2078  		return fmt.Errorf("cilium services are not set up correctly: %s", err)
  2079  	}
  2080  	err = kub.servicePreFlightCheck("kubernetes", "default")
  2081  	if err != nil {
  2082  		return fmt.Errorf("kubernetes service is not ready: %s", err)
  2083  	}
  2084  
  2085  	return nil
  2086  }
  2087  
  2088  // CiliumPreFlightCheck specify that it checks that various subsystems within
  2089  // Cilium are in a good state. If one of the multiple preflight fails it'll
  2090  // return an error.
  2091  func (kub *Kubectl) CiliumPreFlightCheck() error {
  2092  	ginkgoext.By("Performing Cilium preflight check")
  2093  	// Doing this withTimeout because the Status can be ready, but the other
  2094  	// nodes cannot be show up yet, and the cilium-health can fail as a false positive.
  2095  	var (
  2096  		lastError           string
  2097  		consecutiveFailures int
  2098  	)
  2099  
  2100  	body := func() bool {
  2101  		if err := kub.ciliumPreFlightCheck(); err != nil {
  2102  			newError := err.Error()
  2103  			if lastError != newError || consecutiveFailures >= 5 {
  2104  				ginkgoext.GinkgoPrint("Cilium is not ready yet: %s", newError)
  2105  				lastError = newError
  2106  				consecutiveFailures = 0
  2107  			} else {
  2108  				consecutiveFailures++
  2109  			}
  2110  			return false
  2111  		}
  2112  		return true
  2113  
  2114  	}
  2115  	timeoutErr := WithTimeout(body, "PreflightCheck failed", &TimeoutConfig{Timeout: HelperTimeout})
  2116  	if timeoutErr != nil {
  2117  		return fmt.Errorf("CiliumPreFlightCheck error: %s: Last polled error: %s", timeoutErr, lastError)
  2118  	}
  2119  	return nil
  2120  }
  2121  
  2122  func (kub *Kubectl) ciliumStatusPreFlightCheck() error {
  2123  	ginkgoext.By("Performing Cilium status preflight check")
  2124  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2125  	if err != nil {
  2126  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2127  	}
  2128  	for _, pod := range ciliumPods {
  2129  		status := kub.CiliumExec(pod, "cilium status --all-health --all-nodes")
  2130  		if !status.WasSuccessful() {
  2131  			return fmt.Errorf("cilium-agent '%s' is unhealthy: %s", pod, status.OutputPrettyPrint())
  2132  		}
  2133  		noQuorum, err := regexp.Match(`^.*KVStore:.*has-quorum=false.*$`, status.Output().Bytes())
  2134  		if err != nil {
  2135  			return fmt.Errorf("Failed to check for kvstore quorum: %s", err.Error())
  2136  		}
  2137  		if noQuorum {
  2138  			return fmt.Errorf("KVStore doesn't have quorum: %s", status.OutputPrettyPrint())
  2139  		}
  2140  	}
  2141  
  2142  	return nil
  2143  }
  2144  
  2145  func (kub *Kubectl) ciliumControllersPreFlightCheck() error {
  2146  	ginkgoext.By("Performing Cilium controllers preflight check")
  2147  	var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}{"\n"}{end}`
  2148  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2149  	if err != nil {
  2150  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2151  	}
  2152  	for _, pod := range ciliumPods {
  2153  		status := kub.CiliumExec(pod, fmt.Sprintf(
  2154  			"cilium status --all-controllers -o jsonpath='%s'", controllersFilter))
  2155  		if !status.WasSuccessful() {
  2156  			return fmt.Errorf("cilium-agent '%s': Cannot run cilium status: %s",
  2157  				pod, status.OutputPrettyPrint())
  2158  		}
  2159  		for controller, status := range status.KVOutput() {
  2160  			if status != "0" {
  2161  				failmsg := kub.CiliumExec(pod, "cilium status --all-controllers")
  2162  				return fmt.Errorf("cilium-agent '%s': controller %s is failing: %s",
  2163  					pod, controller, failmsg.OutputPrettyPrint())
  2164  			}
  2165  		}
  2166  	}
  2167  
  2168  	return nil
  2169  }
  2170  
  2171  func (kub *Kubectl) ciliumHealthPreFlightCheck() error {
  2172  	ginkgoext.By("Performing Cilium health check")
  2173  	var nodesFilter = `{.nodes[*].name}`
  2174  	var statusFilter = `{range .nodes[*]}{.name}{"="}{.host.primary-address.http.status}{"\n"}{end}`
  2175  
  2176  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2177  	if err != nil {
  2178  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2179  	}
  2180  	for _, pod := range ciliumPods {
  2181  		status := kub.CiliumExec(pod, "cilium-health status -o json --probe")
  2182  		if !status.WasSuccessful() {
  2183  			return fmt.Errorf(
  2184  				"Cluster connectivity is unhealthy on '%s': %s",
  2185  				pod, status.OutputPrettyPrint())
  2186  		}
  2187  
  2188  		// By Checking that the node list is the same
  2189  		nodes, err := status.Filter(nodesFilter)
  2190  		if err != nil {
  2191  			return fmt.Errorf("Cannot unmarshal health status: %s", err)
  2192  		}
  2193  
  2194  		nodeCount := strings.Split(nodes.String(), " ")
  2195  		if len(ciliumPods) != len(nodeCount) {
  2196  			return fmt.Errorf(
  2197  				"cilium-agent '%s': Only %d/%d nodes appeared in cilium-health status. nodes = '%+v'",
  2198  				pod, len(nodeCount), len(ciliumPods), nodeCount)
  2199  		}
  2200  
  2201  		healthStatus, err := status.Filter(statusFilter)
  2202  		if err != nil {
  2203  			return fmt.Errorf("Cannot unmarshal health status: %s", err)
  2204  		}
  2205  
  2206  		for node, status := range healthStatus.KVOutput() {
  2207  			if status != "" {
  2208  				return fmt.Errorf("cilium-agent '%s': connectivity to node '%s' is unhealthy: '%s'",
  2209  					pod, node, status)
  2210  			}
  2211  		}
  2212  	}
  2213  	return nil
  2214  }
  2215  
  2216  // serviceCache keeps service information from
  2217  // k8s, Cilium services and Cilium bpf load balancer map
  2218  type serviceCache struct {
  2219  	services  v1.ServiceList
  2220  	endpoints v1.EndpointsList
  2221  	pods      []ciliumPodServiceCache
  2222  }
  2223  
  2224  // ciliumPodServiceCache
  2225  type ciliumPodServiceCache struct {
  2226  	name          string
  2227  	services      []models.Service
  2228  	loadBalancers map[string][]string
  2229  }
  2230  
  2231  func (kub *Kubectl) fillServiceCache() error {
  2232  	cache := serviceCache{}
  2233  
  2234  	svcRes := kub.GetFromAllNS("service")
  2235  	err := svcRes.GetErr("Unable to get k8s services")
  2236  	if err != nil {
  2237  		return err
  2238  	}
  2239  	err = svcRes.Unmarshal(&cache.services)
  2240  
  2241  	if err != nil {
  2242  		return fmt.Errorf("Unable to unmarshal K8s services: %s", err.Error())
  2243  	}
  2244  
  2245  	epRes := kub.GetFromAllNS("endpoints")
  2246  	err = epRes.GetErr("Unable to get k8s endpoints")
  2247  	if err != nil {
  2248  		return err
  2249  	}
  2250  	err = epRes.Unmarshal(&cache.endpoints)
  2251  	if err != nil {
  2252  		return fmt.Errorf("Unable to unmarshal K8s endpoints: %s", err.Error())
  2253  	}
  2254  
  2255  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2256  	if err != nil {
  2257  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2258  	}
  2259  	ciliumSvcCmd := "cilium service list -o json"
  2260  	ciliumBpfLbCmd := "cilium bpf lb list -o json"
  2261  
  2262  	cache.pods = make([]ciliumPodServiceCache, 0, len(ciliumPods))
  2263  	for _, pod := range ciliumPods {
  2264  		podCache := ciliumPodServiceCache{name: pod}
  2265  
  2266  		ciliumServicesRes := kub.CiliumExec(pod, ciliumSvcCmd)
  2267  		err := ciliumServicesRes.GetErr(
  2268  			fmt.Sprintf("Unable to retrieve Cilium services on %s", pod))
  2269  		if err != nil {
  2270  			return err
  2271  		}
  2272  
  2273  		err = ciliumServicesRes.Unmarshal(&podCache.services)
  2274  		if err != nil {
  2275  			return fmt.Errorf("Unable to unmarshal Cilium services: %s", err.Error())
  2276  		}
  2277  
  2278  		ciliumLbRes := kub.CiliumExec(pod, ciliumBpfLbCmd)
  2279  		err = ciliumLbRes.GetErr(
  2280  			fmt.Sprintf("Unable to retrieve Cilium bpf lb list on %s", pod))
  2281  		if err != nil {
  2282  			return err
  2283  		}
  2284  
  2285  		err = ciliumLbRes.Unmarshal(&podCache.loadBalancers)
  2286  		if err != nil {
  2287  			return fmt.Errorf("Unable to unmarshal Cilium bpf lb list: %s", err.Error())
  2288  		}
  2289  		cache.pods = append(cache.pods, podCache)
  2290  	}
  2291  	kub.serviceCache = &cache
  2292  	return nil
  2293  }
  2294  
  2295  // KubeDNSPreFlightCheck makes sure that kube-dns is plumbed into Cilium.
  2296  func (kub *Kubectl) KubeDNSPreFlightCheck() error {
  2297  	err := kub.fillServiceCache()
  2298  	if err != nil {
  2299  		return err
  2300  	}
  2301  	return kub.servicePreFlightCheck("kube-dns", "kube-system")
  2302  }
  2303  
  2304  // servicePreFlightCheck makes sure that k8s service with given name and
  2305  // namespace is properly plumbed in Cilium
  2306  func (kub *Kubectl) servicePreFlightCheck(serviceName, serviceNamespace string) error {
  2307  	ginkgoext.By("Performing K8s service preflight check")
  2308  	var service *v1.Service
  2309  	for _, s := range kub.serviceCache.services.Items {
  2310  		if s.Name == serviceName && s.Namespace == serviceNamespace {
  2311  			service = &s
  2312  			break
  2313  		}
  2314  	}
  2315  
  2316  	if service == nil {
  2317  		return fmt.Errorf("%s/%s service not found in service cache", serviceName, serviceNamespace)
  2318  	}
  2319  
  2320  	for _, pod := range kub.serviceCache.pods {
  2321  
  2322  		err := validateK8sService(*service, kub.serviceCache.endpoints.Items, pod.services, pod.loadBalancers)
  2323  		if err != nil {
  2324  			return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error())
  2325  		}
  2326  	}
  2327  	return nil
  2328  }
  2329  
  2330  func validateK8sService(k8sService v1.Service, k8sEndpoints []v1.Endpoints, ciliumSvcs []models.Service, ciliumLB map[string][]string) error {
  2331  	var ciliumService *models.Service
  2332  CILIUM_SERVICES:
  2333  	for _, cSvc := range ciliumSvcs {
  2334  		if cSvc.Status.Realized.FrontendAddress.IP == k8sService.Spec.ClusterIP {
  2335  			for _, port := range k8sService.Spec.Ports {
  2336  				if int32(cSvc.Status.Realized.FrontendAddress.Port) == port.Port {
  2337  					ciliumService = &cSvc
  2338  					break CILIUM_SERVICES
  2339  				}
  2340  			}
  2341  		}
  2342  	}
  2343  
  2344  	if ciliumService == nil {
  2345  		return fmt.Errorf("Failed to find Cilium service corresponding to %s/%s k8s service", k8sService.Namespace, k8sService.Name)
  2346  	}
  2347  
  2348  	temp := map[string]bool{}
  2349  	err := validateCiliumSvc(*ciliumService, []v1.Service{k8sService}, k8sEndpoints, temp)
  2350  	if err != nil {
  2351  		return err
  2352  	}
  2353  	return validateCiliumSvcLB(*ciliumService, ciliumLB)
  2354  }
  2355  
  2356  // ciliumServicePreFlightCheck checks that k8s service is plumbed correctly
  2357  func (kub *Kubectl) ciliumServicePreFlightCheck() error {
  2358  	ginkgoext.By("Performing Cilium service preflight check")
  2359  	for _, pod := range kub.serviceCache.pods {
  2360  		k8sServicesFound := map[string]bool{}
  2361  
  2362  		for _, cSvc := range pod.services {
  2363  			err := validateCiliumSvc(cSvc, kub.serviceCache.services.Items, kub.serviceCache.endpoints.Items, k8sServicesFound)
  2364  			if err != nil {
  2365  				return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error())
  2366  			}
  2367  		}
  2368  
  2369  		notFoundServices := make([]string, 0, len(kub.serviceCache.services.Items))
  2370  		for _, k8sSvc := range kub.serviceCache.services.Items {
  2371  			key := serviceKey(k8sSvc)
  2372  			// ignore headless services
  2373  			if k8sSvc.Spec.Type == v1.ServiceTypeClusterIP &&
  2374  				k8sSvc.Spec.ClusterIP == v1.ClusterIPNone {
  2375  				continue
  2376  			}
  2377  			// TODO(brb) check NodePort services
  2378  			if k8sSvc.Spec.Type == v1.ServiceTypeNodePort {
  2379  				continue
  2380  			}
  2381  			if _, ok := k8sServicesFound[key]; !ok {
  2382  				notFoundServices = append(notFoundServices, key)
  2383  			}
  2384  		}
  2385  
  2386  		if len(notFoundServices) > 0 {
  2387  			return fmt.Errorf("Failed to find Cilium service corresponding to k8s services %s on pod %v",
  2388  				strings.Join(notFoundServices, ", "), pod)
  2389  		}
  2390  
  2391  		for _, cSvc := range pod.services {
  2392  			err := validateCiliumSvcLB(cSvc, pod.loadBalancers)
  2393  			if err != nil {
  2394  				return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error())
  2395  			}
  2396  		}
  2397  		if len(pod.services) != len(pod.loadBalancers) {
  2398  			return fmt.Errorf("Length of Cilium services doesn't match length of bpf LB map on pod %v", pod)
  2399  		}
  2400  	}
  2401  	return nil
  2402  }
  2403  
  2404  // DeleteETCDOperator delete the etcd-operator from the cluster pointed by kub.
  2405  func (kub *Kubectl) DeleteETCDOperator() {
  2406  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete crd etcdclusters.etcd.database.coreos.com", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2407  		log.Warningf("Unable to delete etcdclusters.etcd.database.coreos.com CRD: %s", res.OutputPrettyPrint())
  2408  	}
  2409  
  2410  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete deployment cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2411  		log.Warningf("Unable to delete cilium-etcd-operator Deployment: %s", res.OutputPrettyPrint())
  2412  	}
  2413  
  2414  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2415  		log.Warningf("Unable to delete cilium-etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint())
  2416  	}
  2417  
  2418  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2419  		log.Warningf("Unable to delete cilium-etcd-operator ClusterRole: %s", res.OutputPrettyPrint())
  2420  	}
  2421  
  2422  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2423  		log.Warningf("Unable to delete cilium-etcd-operator ServiceAccount: %s", res.OutputPrettyPrint())
  2424  	}
  2425  
  2426  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2427  		log.Warningf("Unable to delete etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint())
  2428  	}
  2429  
  2430  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2431  		log.Warningf("Unable to delete etcd-operator ClusterRole: %s", res.OutputPrettyPrint())
  2432  	}
  2433  
  2434  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-sa", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2435  		log.Warningf("Unable to delete cilium-etcd-sa ServiceAccount: %s", res.OutputPrettyPrint())
  2436  	}
  2437  }
  2438  
  2439  func serviceKey(s v1.Service) string {
  2440  	return s.Namespace + "/" + s.Name
  2441  }
  2442  
  2443  // validateCiliumSvc checks if given Cilium service has corresponding k8s services and endpoints in given slices
  2444  func validateCiliumSvc(cSvc models.Service, k8sSvcs []v1.Service, k8sEps []v1.Endpoints, k8sServicesFound map[string]bool) error {
  2445  	var k8sService *v1.Service
  2446  
  2447  	// TODO(brb) validate NodePort services
  2448  	if cSvc.Status.Realized.Flags != nil && cSvc.Status.Realized.Flags.NodePort {
  2449  		return nil
  2450  	}
  2451  
  2452  	for _, k8sSvc := range k8sSvcs {
  2453  		if k8sSvc.Spec.ClusterIP == cSvc.Status.Realized.FrontendAddress.IP {
  2454  			k8sService = &k8sSvc
  2455  			break
  2456  		}
  2457  	}
  2458  	if k8sService == nil {
  2459  		return fmt.Errorf("Could not find Cilium service with ip %s in k8s", cSvc.Spec.FrontendAddress.IP)
  2460  	}
  2461  
  2462  	var k8sServicePort *v1.ServicePort
  2463  	for _, k8sPort := range k8sService.Spec.Ports {
  2464  		if k8sPort.Port == int32(cSvc.Status.Realized.FrontendAddress.Port) {
  2465  			k8sServicePort = &k8sPort
  2466  			k8sServicesFound[serviceKey(*k8sService)] = true
  2467  			break
  2468  		}
  2469  	}
  2470  	if k8sServicePort == nil {
  2471  		return fmt.Errorf("Could not find Cilium service with address %s:%d in k8s", cSvc.Spec.FrontendAddress.IP, cSvc.Spec.FrontendAddress.Port)
  2472  	}
  2473  
  2474  	for _, backAddr := range cSvc.Status.Realized.BackendAddresses {
  2475  		foundEp := false
  2476  		for _, k8sEp := range k8sEps {
  2477  			for _, epAddr := range getK8sEndpointAddresses(k8sEp) {
  2478  				if addrsEqual(backAddr, epAddr) {
  2479  					foundEp = true
  2480  				}
  2481  			}
  2482  		}
  2483  		if !foundEp {
  2484  			return fmt.Errorf(
  2485  				"Could not match cilium service backend address %s:%d with k8s endpoint",
  2486  				*backAddr.IP, backAddr.Port)
  2487  		}
  2488  	}
  2489  	return nil
  2490  }
  2491  
  2492  func validateCiliumSvcLB(cSvc models.Service, lbMap map[string][]string) error {
  2493  	frontendAddress := cSvc.Status.Realized.FrontendAddress.IP + ":" + strconv.Itoa(int(cSvc.Status.Realized.FrontendAddress.Port))
  2494  	bpfBackends, ok := lbMap[frontendAddress]
  2495  	if !ok {
  2496  		return fmt.Errorf("%s bpf lb map entry not found", frontendAddress)
  2497  	}
  2498  
  2499  BACKENDS:
  2500  	for _, addr := range cSvc.Status.Realized.BackendAddresses {
  2501  		backend := *addr.IP + ":" + strconv.Itoa(int(addr.Port))
  2502  		for _, bpfAddr := range bpfBackends {
  2503  			if strings.Contains(bpfAddr, backend) {
  2504  				continue BACKENDS
  2505  			}
  2506  		}
  2507  		return fmt.Errorf("%s not found in bpf map", backend)
  2508  	}
  2509  	return nil
  2510  }
  2511  
  2512  func getK8sEndpointAddresses(ep v1.Endpoints) []*models.BackendAddress {
  2513  	result := []*models.BackendAddress{}
  2514  	for _, subset := range ep.Subsets {
  2515  		for _, addr := range subset.Addresses {
  2516  			ip := addr.IP
  2517  			for _, port := range subset.Ports {
  2518  				ba := &models.BackendAddress{
  2519  					IP:   &ip,
  2520  					Port: uint16(port.Port),
  2521  				}
  2522  				result = append(result, ba)
  2523  			}
  2524  		}
  2525  	}
  2526  	return result
  2527  }
  2528  
  2529  func addrsEqual(addr1, addr2 *models.BackendAddress) bool {
  2530  	return *addr1.IP == *addr2.IP && addr1.Port == addr2.Port
  2531  }
  2532  
  2533  // GenerateNamespaceForTest generates a namespace based off of the current test
  2534  // which is running.
  2535  func GenerateNamespaceForTest() string {
  2536  	lowered := strings.ToLower(ginkgoext.CurrentGinkgoTestDescription().FullTestText)
  2537  	// K8s namespaces cannot have spaces.
  2538  	replaced := strings.Replace(lowered, " ", "", -1)
  2539  	return replaced
  2540  }