github.phpd.cn/cilium/cilium@v1.6.12/test/helpers/kubectl.go (about)

     1  // Copyright 2018-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package helpers
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"os"
    24  	"path/filepath"
    25  	"regexp"
    26  	"strconv"
    27  	"strings"
    28  	"sync"
    29  	"text/tabwriter"
    30  	"time"
    31  
    32  	"github.com/cilium/cilium/api/v1/models"
    33  	"github.com/cilium/cilium/pkg/annotation"
    34  	cnpv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    35  	"github.com/cilium/cilium/test/config"
    36  	"github.com/cilium/cilium/test/ginkgo-ext"
    37  	"github.com/cilium/cilium/test/helpers/logutils"
    38  
    39  	"github.com/asaskevich/govalidator"
    40  	"github.com/sirupsen/logrus"
    41  	"k8s.io/api/core/v1"
    42  )
    43  
    44  const (
    45  	// KubectlCmd Kubernetes controller command
    46  	KubectlCmd      = "kubectl"
    47  	manifestsPath   = "k8sT/manifests/"
    48  	descriptorsPath = "../examples/kubernetes"
    49  	kubeDNSLabel    = "k8s-app=kube-dns"
    50  
    51  	// DNSHelperTimeout is a predefined timeout value for K8s DNS commands. It
    52  	// must be larger than 5 minutes because kubedns has a hardcoded resync
    53  	// period of 5 minutes. We have experienced test failures because kubedns
    54  	// needed this time to recover from a connection problem to kube-apiserver.
    55  	// The kubedns resyncPeriod is defined at
    56  	// https://github.com/kubernetes/dns/blob/80fdd88276adba36a87c4f424b66fdf37cd7c9a8/pkg/dns/dns.go#L53
    57  	DNSHelperTimeout = 7 * time.Minute
    58  
    59  	// EnableMicroscope is true when microscope should be enabled
    60  	EnableMicroscope = false
    61  
    62  	// CIIntegrationFlannel contains the constant to be used when flannel is
    63  	// used in the CI.
    64  	CIIntegrationFlannel = "flannel"
    65  )
    66  
    67  var (
    68  	defaultHelmOptions = map[string]string{
    69  		"global.registry":               "k8s1:5000/cilium",
    70  		"agent.image":                   "cilium-dev",
    71  		"global.tag":                    "latest",
    72  		"operator.image":                "operator",
    73  		"operator.tag":                  "latest",
    74  		"managed-etcd.registry":         "docker.io/cilium",
    75  		"global.debug.enabled":          "true",
    76  		"global.k8s.requireIPv4PodCIDR": "true",
    77  		"global.pprof.enabled":          "true",
    78  		"global.logSystemLoad":          "true",
    79  		"global.bpf.preallocateMaps":    "true",
    80  		"global.etcd.leaseTTL":          "30s",
    81  		"global.ipv4.enabled":           "true",
    82  		"global.ipv6.enabled":           "true",
    83  	}
    84  
    85  	flannelHelmOverrides = map[string]string{
    86  		"global.flannel.enabled": "true",
    87  		"global.ipv6.enabled":    "false",
    88  		"global.tunnel":          "disabled",
    89  	}
    90  )
    91  
    92  // GetCurrentK8SEnv returns the value of K8S_VERSION from the OS environment.
    93  func GetCurrentK8SEnv() string { return os.Getenv("K8S_VERSION") }
    94  
    95  // GetCurrentIntegration returns CI integration set up to run against Cilium.
    96  func GetCurrentIntegration() string {
    97  	switch strings.ToLower(os.Getenv("CNI_INTEGRATION")) {
    98  	case CIIntegrationFlannel:
    99  		return CIIntegrationFlannel
   100  	default:
   101  		return ""
   102  	}
   103  }
   104  
   105  // Kubectl is a wrapper around an SSHMeta. It is used to run Kubernetes-specific
   106  // commands on the node which is accessible via the SSH metadata stored in its
   107  // SSHMeta.
   108  type Kubectl struct {
   109  	*SSHMeta
   110  	*serviceCache
   111  }
   112  
   113  // CreateKubectl initializes a Kubectl helper with the provided vmName and log
   114  // It marks the test as Fail if cannot get the ssh meta information or cannot
   115  // execute a `ls` on the virtual machine.
   116  func CreateKubectl(vmName string, log *logrus.Entry) *Kubectl {
   117  	node := GetVagrantSSHMeta(vmName)
   118  	if node == nil {
   119  		ginkgoext.Fail(fmt.Sprintf("Cannot connect to vmName  '%s'", vmName), 1)
   120  		return nil
   121  	}
   122  	// This `ls` command is a sanity check, sometimes the meta ssh info is not
   123  	// nil but new commands cannot be executed using SSH, tests failed and it
   124  	// was hard to debug.
   125  	res := node.ExecShort("ls /tmp/")
   126  	if !res.WasSuccessful() {
   127  		ginkgoext.Fail(fmt.Sprintf(
   128  			"Cannot execute ls command on vmName '%s'", vmName), 1)
   129  		return nil
   130  	}
   131  	node.logger = log
   132  
   133  	return &Kubectl{
   134  		SSHMeta: node,
   135  	}
   136  }
   137  
   138  // CepGet returns the endpoint model for the given pod name in the specified
   139  // namespaces. If the pod is not present it returns nil
   140  func (kub *Kubectl) CepGet(namespace string, pod string) *cnpv2.EndpointStatus {
   141  	log := kub.logger.WithFields(logrus.Fields{
   142  		"cep":       pod,
   143  		"namespace": namespace})
   144  
   145  	cmd := fmt.Sprintf("%s -n %s get cep %s -o json | jq '.status'", KubectlCmd, namespace, pod)
   146  	res := kub.ExecShort(cmd)
   147  	if !res.WasSuccessful() {
   148  		log.Debug("cep is not present")
   149  		return nil
   150  	}
   151  
   152  	var data *cnpv2.EndpointStatus
   153  	err := res.Unmarshal(&data)
   154  	if err != nil {
   155  		log.WithError(err).Error("cannot Unmarshal json")
   156  		return nil
   157  	}
   158  	return data
   159  }
   160  
   161  // GetNumNodes returns the number of Kubernetes nodes running
   162  func (kub *Kubectl) GetNumNodes() int {
   163  	getNodesCmd := fmt.Sprintf("%s get nodes -o jsonpath='{.items.*.metadata.name}'", KubectlCmd)
   164  	res := kub.ExecShort(getNodesCmd)
   165  	if !res.WasSuccessful() {
   166  		return 0
   167  	}
   168  
   169  	return len(strings.Split(res.SingleOut(), " "))
   170  }
   171  
   172  // ExecKafkaPodCmd executes shell command with arguments arg in the specified pod residing in the specified
   173  // namespace. It returns the stdout of the command that was executed.
   174  // The kafka producer and consumer scripts do not return error if command
   175  // leads to TopicAuthorizationException or any other error. Hence the
   176  // function needs to also take into account the stderr messages returned.
   177  func (kub *Kubectl) ExecKafkaPodCmd(namespace string, pod string, arg string) error {
   178  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, arg)
   179  	res := kub.Exec(command)
   180  	if !res.WasSuccessful() {
   181  		return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed %s",
   182  			res.GetCmd(), res.OutputPrettyPrint())
   183  	}
   184  
   185  	if strings.Contains(res.GetStdErr(), "ERROR") {
   186  		return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed '%s'",
   187  			res.GetCmd(), res.OutputPrettyPrint())
   188  	}
   189  	return nil
   190  }
   191  
   192  // ExecPodCmd executes command cmd in the specified pod residing in the specified
   193  // namespace. It returns a pointer to CmdRes with all the output
   194  func (kub *Kubectl) ExecPodCmd(namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes {
   195  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
   196  	return kub.Exec(command, options...)
   197  }
   198  
   199  // ExecPodCmdContext synchronously executes command cmd in the specified pod residing in the
   200  // specified namespace. It returns a pointer to CmdRes with all the output.
   201  func (kub *Kubectl) ExecPodCmdContext(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes {
   202  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
   203  	return kub.ExecContext(ctx, command, options...)
   204  }
   205  
   206  // ExecPodCmdBackground executes command cmd in background in the specified pod residing
   207  // in the specified namespace. It returns a pointer to CmdRes with all the
   208  // output
   209  //
   210  // To receive the output of this function, the caller must invoke either
   211  // kub.WaitUntilFinish() or kub.WaitUntilMatch() then subsequently fetch the
   212  // output out of the result.
   213  func (kub *Kubectl) ExecPodCmdBackground(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes {
   214  	command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
   215  	return kub.ExecInBackground(ctx, command, options...)
   216  }
   217  
   218  // Get retrieves the provided Kubernetes objects from the specified namespace.
   219  func (kub *Kubectl) Get(namespace string, command string) *CmdRes {
   220  	return kub.ExecShort(fmt.Sprintf(
   221  		"%s -n %s get %s -o json", KubectlCmd, namespace, command))
   222  }
   223  
   224  // GetFromAllNS retrieves provided Kubernetes objects from all namespaces
   225  func (kub *Kubectl) GetFromAllNS(kind string) *CmdRes {
   226  	return kub.ExecShort(fmt.Sprintf(
   227  		"%s get %s --all-namespaces -o json", KubectlCmd, kind))
   228  }
   229  
   230  // GetCNP retrieves the output of `kubectl get cnp` in the given namespace for
   231  // the given CNP and return a CNP struct. If the CNP does not exists or cannot
   232  // unmarshal the Json output will return nil.
   233  func (kub *Kubectl) GetCNP(namespace string, cnp string) *cnpv2.CiliumNetworkPolicy {
   234  	log := kub.logger.WithFields(logrus.Fields{
   235  		"fn":  "GetCNP",
   236  		"cnp": cnp,
   237  		"ns":  namespace,
   238  	})
   239  	res := kub.Get(namespace, fmt.Sprintf("cnp %s", cnp))
   240  	if !res.WasSuccessful() {
   241  		log.WithField("error", res.CombineOutput()).Info("cannot get CNP")
   242  		return nil
   243  	}
   244  	var result cnpv2.CiliumNetworkPolicy
   245  	err := res.Unmarshal(&result)
   246  	if err != nil {
   247  		log.WithError(err).Errorf("cannot unmarshal CNP output")
   248  		return nil
   249  	}
   250  	return &result
   251  }
   252  
   253  func (kub *Kubectl) WaitForCRDCount(filter string, count int, timeout time.Duration) error {
   254  	// Set regexp flag m for multi-line matching, then add the
   255  	// matches for beginning and end of a line, so that we count
   256  	// at most one match per line (like "grep <filter> | wc -l")
   257  	regex := regexp.MustCompile("(?m:^.*(?:" + filter + ").*$)")
   258  	body := func() bool {
   259  		res := kub.ExecShort(fmt.Sprintf("%s get crds", KubectlCmd))
   260  		if !res.WasSuccessful() {
   261  			log.Error(res.GetErr("kubectl get crds failed"))
   262  			return false
   263  		}
   264  		return len(regex.FindAllString(res.GetStdOut(), -1)) == count
   265  	}
   266  	return WithTimeout(
   267  		body,
   268  		fmt.Sprintf("timed out waiting for %d CRDs matching filter \"%s\" to be ready", count, filter),
   269  		&TimeoutConfig{Timeout: timeout})
   270  }
   271  
   272  // GetPods gets all of the pods in the given namespace that match the provided
   273  // filter.
   274  func (kub *Kubectl) GetPods(namespace string, filter string) *CmdRes {
   275  	return kub.ExecShort(fmt.Sprintf("%s -n %s get pods %s -o json", KubectlCmd, namespace, filter))
   276  }
   277  
   278  // GetPodsNodes returns a map with pod name as a key and node name as value. It
   279  // only gets pods in the given namespace that match the provided filter. It
   280  // returns an error if pods cannot be retrieved correctly
   281  func (kub *Kubectl) GetPodsNodes(namespace string, filter string) (map[string]string, error) {
   282  	jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.spec.nodeName}{"\n"}{end}`
   283  	res := kub.Exec(fmt.Sprintf("%s -n %s get pods %s -o jsonpath='%s'",
   284  		KubectlCmd, namespace, filter, jsonFilter))
   285  	if !res.WasSuccessful() {
   286  		return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput())
   287  	}
   288  	return res.KVOutput(), nil
   289  }
   290  
   291  // GetPodsIPs returns a map with pod name as a key and pod IP name as value. It
   292  // only gets pods in the given namespace that match the provided filter. It
   293  // returns an error if pods cannot be retrieved correctly
   294  func (kub *Kubectl) GetPodsIPs(namespace string, filter string) (map[string]string, error) {
   295  	jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.status.podIP}{"\n"}{end}`
   296  	res := kub.ExecShort(fmt.Sprintf("%s -n %s get pods -l %s -o jsonpath='%s'",
   297  		KubectlCmd, namespace, filter, jsonFilter))
   298  	if !res.WasSuccessful() {
   299  		return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput())
   300  	}
   301  	return res.KVOutput(), nil
   302  }
   303  
   304  // GetEndpoints gets all of the endpoints in the given namespace that match the
   305  // provided filter.
   306  func (kub *Kubectl) GetEndpoints(namespace string, filter string) *CmdRes {
   307  	return kub.ExecShort(fmt.Sprintf("%s -n %s get endpoints %s -o json", KubectlCmd, namespace, filter))
   308  }
   309  
   310  // GetAllPods returns a slice of all pods present in Kubernetes cluster, along
   311  // with an error if the pods could not be retrieved via `kubectl`, or if the
   312  // pod objects are unable to be marshaled from JSON.
   313  func (kub *Kubectl) GetAllPods(ctx context.Context, options ...ExecOptions) ([]v1.Pod, error) {
   314  	var ops ExecOptions
   315  	if len(options) > 0 {
   316  		ops = options[0]
   317  	}
   318  
   319  	getPodsCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout)
   320  	defer cancel()
   321  
   322  	var podsList v1.List
   323  	err := kub.ExecContext(getPodsCtx,
   324  		fmt.Sprintf("%s get pods --all-namespaces -o json", KubectlCmd),
   325  		ExecOptions{SkipLog: ops.SkipLog}).Unmarshal(&podsList)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  
   330  	pods := make([]v1.Pod, len(podsList.Items))
   331  	for _, item := range podsList.Items {
   332  		var pod v1.Pod
   333  		err = json.Unmarshal(item.Raw, &pod)
   334  		if err != nil {
   335  			return nil, err
   336  		}
   337  		pods = append(pods, pod)
   338  	}
   339  
   340  	return pods, nil
   341  }
   342  
   343  // GetPodNames returns the names of all of the pods that are labeled with label
   344  // in the specified namespace, along with an error if the pod names cannot be
   345  // retrieved.
   346  func (kub *Kubectl) GetPodNames(namespace string, label string) ([]string, error) {
   347  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
   348  	defer cancel()
   349  	return kub.GetPodNamesContext(ctx, namespace, label)
   350  }
   351  
   352  // GetPodNamesContext returns the names of all of the pods that are labeled with
   353  // label in the specified namespace, along with an error if the pod names cannot
   354  // be retrieved.
   355  func (kub *Kubectl) GetPodNamesContext(ctx context.Context, namespace string, label string) ([]string, error) {
   356  	stdout := new(bytes.Buffer)
   357  	filter := "-o jsonpath='{.items[*].metadata.name}'"
   358  
   359  	cmd := fmt.Sprintf("%s -n %s get pods -l %s %s", KubectlCmd, namespace, label, filter)
   360  
   361  	// Taking more than 30 seconds to get pods means that something is wrong
   362  	// connecting to the node.
   363  	podNamesCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout)
   364  	defer cancel()
   365  	err := kub.ExecuteContext(podNamesCtx, cmd, stdout, nil)
   366  
   367  	if err != nil {
   368  		return nil, fmt.Errorf(
   369  			"could not find pods in namespace '%v' with label '%v': %s", namespace, label, err)
   370  	}
   371  
   372  	out := strings.Trim(stdout.String(), "\n")
   373  	if len(out) == 0 {
   374  		//Small hack. String split always return an array with an empty string
   375  		return []string{}, nil
   376  	}
   377  	return strings.Split(out, " "), nil
   378  }
   379  
   380  // GetServiceHostPort returns the host and the first port for the given service name.
   381  // It will return an error if service cannot be retrieved.
   382  func (kub *Kubectl) GetServiceHostPort(namespace string, service string) (string, int, error) {
   383  	var data v1.Service
   384  	err := kub.Get(namespace, fmt.Sprintf("service %s", service)).Unmarshal(&data)
   385  	if err != nil {
   386  		return "", 0, err
   387  	}
   388  	if len(data.Spec.Ports) == 0 {
   389  		return "", 0, fmt.Errorf("Service '%s' does not have ports defined", service)
   390  	}
   391  	return data.Spec.ClusterIP, int(data.Spec.Ports[0].Port), nil
   392  }
   393  
   394  // Logs returns a CmdRes with containing the resulting metadata from the
   395  // execution of `kubectl logs <pod> -n <namespace>`.
   396  func (kub *Kubectl) Logs(namespace string, pod string) *CmdRes {
   397  	return kub.Exec(
   398  		fmt.Sprintf("%s -n %s logs %s", KubectlCmd, namespace, pod))
   399  }
   400  
   401  // MicroscopeStart installs (if it is not installed) a new microscope pod,
   402  // waits until pod is ready, and runs microscope in background. It returns an
   403  // error in the case where microscope cannot be installed, or it is not ready after
   404  // a timeout. It also returns a callback function to stop the monitor and save
   405  // the output to `helpers.monitorLogFileName` file. Takes an optional list of
   406  // arguments to pass to mircoscope.
   407  func (kub *Kubectl) MicroscopeStart(microscopeOptions ...string) (error, func() error) {
   408  	if !EnableMicroscope {
   409  		return nil, func() error { return nil }
   410  	}
   411  
   412  	microscope := "microscope"
   413  	var microscopeCmd string
   414  	if len(microscopeOptions) == 0 {
   415  		microscopeCmd = "microscope"
   416  	} else {
   417  		microscopeCmd = fmt.Sprintf("%s %s", microscope, strings.Join(microscopeOptions, " "))
   418  	}
   419  	var microscopeCmdWithTimestamps = microscopeCmd + "| ts '[%Y-%m-%d %H:%M:%S]'"
   420  	var cb = func() error { return nil }
   421  	cmd := fmt.Sprintf("%[1]s -ti -n %[2]s exec %[3]s -- %[4]s",
   422  		KubectlCmd, KubeSystemNamespace, microscope, microscopeCmdWithTimestamps)
   423  	microscopePath := ManifestGet(microscopeManifest)
   424  	_ = kub.ApplyDefault(microscopePath)
   425  
   426  	err := kub.WaitforPods(
   427  		KubeSystemNamespace,
   428  		fmt.Sprintf("-l k8s-app=%s", microscope),
   429  		HelperTimeout)
   430  	if err != nil {
   431  		return err, cb
   432  	}
   433  
   434  	ctx, cancel := context.WithCancel(context.Background())
   435  	res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true})
   436  
   437  	cb = func() error {
   438  		cancel()
   439  		<-ctx.Done()
   440  		testPath, err := CreateReportDirectory()
   441  		if err != nil {
   442  			kub.logger.WithError(err).Errorf(
   443  				"cannot create test results path '%s'", testPath)
   444  			return err
   445  		}
   446  
   447  		err = WriteOrAppendToFile(
   448  			filepath.Join(testPath, MonitorLogFileName),
   449  			res.CombineOutput().Bytes(),
   450  			LogPerm)
   451  		if err != nil {
   452  			log.WithError(err).Errorf("cannot create monitor log file")
   453  			return err
   454  		}
   455  		res := kub.Exec(fmt.Sprintf("%s -n %s delete pod --grace-period=0 --force microscope", KubectlCmd, KubeSystemNamespace))
   456  		if !res.WasSuccessful() {
   457  			return fmt.Errorf("error deleting microscope pod: %s", res.OutputPrettyPrint())
   458  		}
   459  		return nil
   460  	}
   461  
   462  	return nil, cb
   463  }
   464  
   465  // MonitorStart runs cilium monitor in the background and dumps the contents
   466  // into a log file for later debugging
   467  func (kub *Kubectl) MonitorStart(namespace, pod, filename string) func() error {
   468  	cmd := fmt.Sprintf("%s exec -n %s %s -- cilium monitor -v", KubectlCmd, namespace, pod)
   469  	ctx, cancel := context.WithCancel(context.Background())
   470  	res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true})
   471  
   472  	cb := func() error {
   473  		cancel()
   474  		<-ctx.Done()
   475  		testPath, err := CreateReportDirectory()
   476  		if err != nil {
   477  			kub.logger.WithError(err).Errorf(
   478  				"cannot create test results path '%s'", testPath)
   479  			return err
   480  		}
   481  
   482  		err = WriteOrAppendToFile(
   483  			filepath.Join(testPath, filename),
   484  			res.CombineOutput().Bytes(),
   485  			LogPerm)
   486  		if err != nil {
   487  			log.WithError(err).Errorf("cannot create monitor log file %s", filename)
   488  			return err
   489  		}
   490  		return nil
   491  	}
   492  
   493  	return cb
   494  }
   495  
   496  // BackgroundReport dumps the result of the given commands on cilium pods each
   497  // five seconds.
   498  func (kub *Kubectl) BackgroundReport(commands ...string) (context.CancelFunc, error) {
   499  	backgroundCtx, cancel := context.WithCancel(context.Background())
   500  	pods, err := kub.GetCiliumPods(KubeSystemNamespace)
   501  	if err != nil {
   502  		return cancel, fmt.Errorf("Cannot retrieve cilium pods: %s", err)
   503  	}
   504  	retrieveInfo := func() {
   505  		for _, pod := range pods {
   506  			for _, cmd := range commands {
   507  				kub.CiliumExec(pod, cmd)
   508  			}
   509  		}
   510  	}
   511  	go func(ctx context.Context) {
   512  		ticker := time.NewTicker(5 * time.Second)
   513  		defer ticker.Stop()
   514  		for {
   515  			select {
   516  			case <-ctx.Done():
   517  				return
   518  			case <-ticker.C:
   519  				retrieveInfo()
   520  			}
   521  		}
   522  	}(backgroundCtx)
   523  	return cancel, nil
   524  }
   525  
   526  // PprofReport runs pprof on cilium nodes each 5 minutes and saves the data
   527  // into the test folder saved with pprof suffix.
   528  func (kub *Kubectl) PprofReport() {
   529  	PProfCadence := 5 * time.Minute
   530  	ticker := time.NewTicker(PProfCadence)
   531  	log := kub.logger.WithField("subsys", "pprofReport")
   532  
   533  	retrievePProf := func(pod, testPath string) {
   534  		res := kub.ExecPodCmd(KubeSystemNamespace, pod, "gops pprof-cpu 1")
   535  		if !res.WasSuccessful() {
   536  			log.Errorf("cannot execute pprof: %s", res.OutputPrettyPrint())
   537  			return
   538  		}
   539  		files := kub.ExecPodCmd(KubeSystemNamespace, pod, `ls -1 /tmp/`)
   540  		for _, file := range files.ByLines() {
   541  			if !strings.Contains(file, "profile") {
   542  				continue
   543  			}
   544  
   545  			dest := filepath.Join(
   546  				BasePath, testPath,
   547  				fmt.Sprintf("%s-profile-%s.pprof", pod, file))
   548  			_ = kub.Exec(fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s %[5]s",
   549  				KubectlCmd, KubeSystemNamespace, pod, file, dest),
   550  				ExecOptions{SkipLog: true})
   551  
   552  			_ = kub.ExecPodCmd(KubeSystemNamespace, pod, fmt.Sprintf(
   553  				"rm %s", filepath.Join("/tmp/", file)))
   554  		}
   555  	}
   556  
   557  	for {
   558  		select {
   559  		case <-ticker.C:
   560  
   561  			testPath, err := CreateReportDirectory()
   562  			if err != nil {
   563  				log.WithError(err).Errorf("cannot create test result path '%s'", testPath)
   564  				return
   565  			}
   566  
   567  			pods, err := kub.GetCiliumPods(KubeSystemNamespace)
   568  			if err != nil {
   569  				log.Errorf("cannot get cilium pods")
   570  			}
   571  
   572  			for _, pod := range pods {
   573  				retrievePProf(pod, testPath)
   574  			}
   575  
   576  		}
   577  	}
   578  }
   579  
   580  // NodeCleanMetadata annotates each node in the Kubernetes cluster with the
   581  // annotation.V4CIDRName and annotation.V6CIDRName annotations. It returns an
   582  // error if the nodes cannot be retrieved via the Kubernetes API.
   583  func (kub *Kubectl) NodeCleanMetadata() error {
   584  	metadata := []string{
   585  		annotation.V4CIDRName,
   586  		annotation.V6CIDRName,
   587  	}
   588  
   589  	data := kub.ExecShort(fmt.Sprintf("%s get nodes -o jsonpath='{.items[*].metadata.name}'", KubectlCmd))
   590  	if !data.WasSuccessful() {
   591  		return fmt.Errorf("could not get nodes via %s: %s", KubectlCmd, data.CombineOutput())
   592  	}
   593  	for _, node := range strings.Split(data.Output().String(), " ") {
   594  		for _, label := range metadata {
   595  			kub.ExecShort(fmt.Sprintf("%s annotate --overwrite nodes %s %s=''", KubectlCmd, node, label))
   596  		}
   597  	}
   598  	return nil
   599  }
   600  
   601  // NamespaceCreate creates a new Kubernetes namespace with the given name
   602  func (kub *Kubectl) NamespaceCreate(name string) *CmdRes {
   603  	ginkgoext.By("Creating namespace %s", name)
   604  	return kub.ExecShort(fmt.Sprintf("%s create namespace %s", KubectlCmd, name))
   605  }
   606  
   607  // NamespaceDelete deletes a given Kubernetes namespace
   608  func (kub *Kubectl) NamespaceDelete(name string) *CmdRes {
   609  	return kub.ExecShort(fmt.Sprintf("%s delete namespace %s", KubectlCmd, name))
   610  }
   611  
   612  // NamespaceLabel sets a label in a Kubernetes namespace
   613  func (kub *Kubectl) NamespaceLabel(namespace string, label string) *CmdRes {
   614  	ginkgoext.By("Setting label %s in namespace %s", label, namespace)
   615  	return kub.ExecShort(fmt.Sprintf("%s label --overwrite namespace %s %s", KubectlCmd, namespace, label))
   616  }
   617  
   618  // WaitforPods waits up until timeout seconds have elapsed for all pods in the
   619  // specified namespace that match the provided JSONPath filter to have their
   620  // containterStatuses equal to "ready". Returns true if all pods achieve
   621  // the aforementioned desired state within timeout seconds. Returns false and
   622  // an error if the command failed or the timeout was exceeded.
   623  func (kub *Kubectl) WaitforPods(namespace string, filter string, timeout time.Duration) error {
   624  	return kub.waitForNPods(checkReady, namespace, filter, 0, timeout)
   625  }
   626  
   627  // checkPodStatusFunc returns true if the pod is in the desired state, or false
   628  // otherwise.
   629  type checkPodStatusFunc func(v1.Pod) bool
   630  
   631  // checkRunning checks that the pods are running, but not necessarily ready.
   632  func checkRunning(pod v1.Pod) bool {
   633  	if pod.Status.Phase != v1.PodRunning || pod.ObjectMeta.DeletionTimestamp != nil {
   634  		return false
   635  	}
   636  	return true
   637  }
   638  
   639  // checkReady determines whether the pods are running and ready.
   640  func checkReady(pod v1.Pod) bool {
   641  	if !checkRunning(pod) {
   642  		return false
   643  	}
   644  
   645  	for _, container := range pod.Status.ContainerStatuses {
   646  		if !container.Ready {
   647  			return false
   648  		}
   649  	}
   650  	return true
   651  }
   652  
   653  // WaitforNPodsRunning waits up until timeout duration has elapsed for at least
   654  // minRequired pods in the specified namespace that match the provided JSONPath
   655  // filter to have their containterStatuses equal to "running".
   656  // Returns no error if minRequired pods achieve the aforementioned desired
   657  // state within timeout seconds. Returns an error if the command failed or the
   658  // timeout was exceeded.
   659  // When minRequired is 0, the function will derive required pod count from number
   660  // of pods in the cluster for every iteration.
   661  func (kub *Kubectl) WaitforNPodsRunning(namespace string, filter string, minRequired int, timeout time.Duration) error {
   662  	return kub.waitForNPods(checkRunning, namespace, filter, minRequired, timeout)
   663  }
   664  
   665  // WaitforNPods waits up until timeout seconds have elapsed for at least
   666  // minRequired pods in the specified namespace that match the provided JSONPath
   667  // filter to have their containterStatuses equal to "ready".
   668  // Returns no error if minRequired pods achieve the aforementioned desired
   669  // state within timeout seconds. Returns an error if the command failed or the
   670  // timeout was exceeded.
   671  // When minRequired is 0, the function will derive required pod count from number
   672  // of pods in the cluster for every iteration.
   673  func (kub *Kubectl) WaitforNPods(namespace string, filter string, minRequired int, timeout time.Duration) error {
   674  	return kub.waitForNPods(checkReady, namespace, filter, minRequired, timeout)
   675  }
   676  
   677  func (kub *Kubectl) waitForNPods(checkStatus checkPodStatusFunc, namespace string, filter string, minRequired int, timeout time.Duration) error {
   678  	body := func() bool {
   679  		podList := &v1.PodList{}
   680  		err := kub.GetPods(namespace, filter).Unmarshal(podList)
   681  		if err != nil {
   682  			kub.logger.Infof("Error while getting PodList: %s", err)
   683  			return false
   684  		}
   685  
   686  		if len(podList.Items) == 0 {
   687  			return false
   688  		}
   689  
   690  		var required int
   691  
   692  		if minRequired == 0 {
   693  			required = len(podList.Items)
   694  		} else {
   695  			required = minRequired
   696  		}
   697  
   698  		if len(podList.Items) < required {
   699  			return false
   700  		}
   701  
   702  		// For each pod, count it as running when all conditions are true:
   703  		//  - It is scheduled via Phase == v1.PodRunning
   704  		//  - It is not scheduled for deletion when DeletionTimestamp is set
   705  		//  - All containers in the pod have passed the liveness check via
   706  		//  containerStatuses.Ready
   707  		currScheduled := 0
   708  		for _, pod := range podList.Items {
   709  			if checkStatus(pod) {
   710  				currScheduled++
   711  			}
   712  		}
   713  
   714  		return currScheduled >= required
   715  	}
   716  
   717  	return WithTimeout(
   718  		body,
   719  		fmt.Sprintf("timed out waiting for pods with filter %s to be ready", filter),
   720  		&TimeoutConfig{Timeout: timeout})
   721  }
   722  
   723  // WaitForServiceEndpoints waits up until timeout seconds have elapsed for all
   724  // endpoints in the specified namespace that match the provided JSONPath
   725  // filter. Returns true if all pods achieve the aforementioned desired state
   726  // within timeout seconds. Returns false and an error if the command failed or
   727  // the timeout was exceeded.
   728  func (kub *Kubectl) WaitForServiceEndpoints(namespace string, filter string, service string, timeout time.Duration) error {
   729  	body := func() bool {
   730  		var jsonPath = fmt.Sprintf("{.items[?(@.metadata.name == '%s')].subsets[0].ports[0].port}", service)
   731  		data, err := kub.GetEndpoints(namespace, filter).Filter(jsonPath)
   732  
   733  		if err != nil {
   734  			kub.logger.WithError(err)
   735  			return false
   736  		}
   737  
   738  		if data.String() != "" {
   739  			return true
   740  		}
   741  
   742  		kub.logger.WithFields(logrus.Fields{
   743  			"namespace": namespace,
   744  			"filter":    filter,
   745  			"data":      data,
   746  			"service":   service,
   747  		}).Info("WaitForServiceEndpoints: service endpoint not ready")
   748  		return false
   749  	}
   750  
   751  	return WithTimeout(body, "could not get service endpoints", &TimeoutConfig{Timeout: timeout})
   752  }
   753  
   754  // Action performs the specified ResourceLifeCycleAction on the Kubernetes
   755  // manifest located at path filepath in the given namespace
   756  func (kub *Kubectl) Action(action ResourceLifeCycleAction, filePath string, namespace ...string) *CmdRes {
   757  	if len(namespace) == 0 {
   758  		kub.logger.Debugf("performing '%v' on '%v'", action, filePath)
   759  		return kub.ExecShort(fmt.Sprintf("%s %s -f %s", KubectlCmd, action, filePath))
   760  	}
   761  
   762  	kub.logger.Debugf("performing '%v' on '%v' in namespace '%v'", action, filePath, namespace[0])
   763  	return kub.ExecShort(fmt.Sprintf("%s %s -f %s -n %s", KubectlCmd, action, filePath, namespace[0]))
   764  }
   765  
   766  // ApplyOptions stores options for kubectl apply command
   767  type ApplyOptions struct {
   768  	FilePath  string
   769  	Namespace string
   770  	Force     bool
   771  	DryRun    bool
   772  	Output    string
   773  	Piped     string
   774  }
   775  
   776  // Apply applies the Kubernetes manifest located at path filepath.
   777  func (kub *Kubectl) Apply(options ApplyOptions) *CmdRes {
   778  	var force string
   779  	if options.Force {
   780  		force = "--force=true"
   781  	} else {
   782  		force = "--force=false"
   783  	}
   784  
   785  	cmd := fmt.Sprintf("%s apply %s -f %s", KubectlCmd, force, options.FilePath)
   786  
   787  	if options.DryRun {
   788  		cmd = cmd + " --dry-run"
   789  	}
   790  
   791  	if len(options.Output) > 0 {
   792  		cmd = cmd + " -o " + options.Output
   793  	}
   794  
   795  	if len(options.Namespace) == 0 {
   796  		kub.logger.Debugf("applying %s", options.FilePath)
   797  	} else {
   798  		kub.logger.Debugf("applying %s in namespace %s", options.FilePath, options.Namespace)
   799  		cmd = cmd + " -n " + options.Namespace
   800  	}
   801  
   802  	if len(options.Piped) > 0 {
   803  		cmd = options.Piped + " | " + cmd
   804  	}
   805  	return kub.ExecMiddle(cmd)
   806  }
   807  
   808  // ApplyDefault applies give filepath with other options set to default
   809  func (kub *Kubectl) ApplyDefault(filePath string) *CmdRes {
   810  	return kub.Apply(ApplyOptions{FilePath: filePath})
   811  }
   812  
   813  // Create creates the Kubernetes kanifest located at path filepath.
   814  func (kub *Kubectl) Create(filePath string) *CmdRes {
   815  	kub.logger.Debugf("creating %s", filePath)
   816  	return kub.ExecShort(
   817  		fmt.Sprintf("%s create -f  %s", KubectlCmd, filePath))
   818  }
   819  
   820  // CreateResource is a wrapper around `kubernetes create <resource>
   821  // <resourceName>.
   822  func (kub *Kubectl) CreateResource(resource, resourceName string) *CmdRes {
   823  	kub.logger.Debug(fmt.Sprintf("creating resource %s with name %s", resource, resourceName))
   824  	return kub.ExecShort(fmt.Sprintf("kubectl create %s %s", resource, resourceName))
   825  }
   826  
   827  // DeleteResource is a wrapper around `kubernetes delete <resource>
   828  // resourceName>.
   829  func (kub *Kubectl) DeleteResource(resource, resourceName string) *CmdRes {
   830  	kub.logger.Debug(fmt.Sprintf("deleting resource %s with name %s", resource, resourceName))
   831  	return kub.Exec(fmt.Sprintf("kubectl delete %s %s", resource, resourceName))
   832  }
   833  
   834  // Delete deletes the Kubernetes manifest at path filepath.
   835  func (kub *Kubectl) Delete(filePath string) *CmdRes {
   836  	kub.logger.Debugf("deleting %s", filePath)
   837  	return kub.ExecShort(
   838  		fmt.Sprintf("%s delete -f  %s", KubectlCmd, filePath))
   839  }
   840  
   841  // WaitKubeDNS waits until the kubeDNS pods are ready. In case of exceeding the
   842  // default timeout it returns an error.
   843  func (kub *Kubectl) WaitKubeDNS() error {
   844  	return kub.WaitforPods(KubeSystemNamespace, fmt.Sprintf("-l %s", kubeDNSLabel), DNSHelperTimeout)
   845  }
   846  
   847  // WaitForKubeDNSEntry waits until the given DNS entry exists in the kube-dns
   848  // service. If the container is not ready after timeout it returns an error. The
   849  // name's format query should be `${name}.${namespace}`. If `svc.cluster.local`
   850  // is not present, it appends to the given name and it checks the service's FQDN.
   851  func (kub *Kubectl) WaitForKubeDNSEntry(serviceName, serviceNamespace string) error {
   852  	svcSuffix := "svc.cluster.local"
   853  	logger := kub.logger.WithFields(logrus.Fields{"serviceName": serviceName, "serviceNamespace": serviceNamespace})
   854  
   855  	serviceNameWithNamespace := fmt.Sprintf("%s.%s", serviceName, serviceNamespace)
   856  	if !strings.HasSuffix(serviceNameWithNamespace, svcSuffix) {
   857  		serviceNameWithNamespace = fmt.Sprintf("%s.%s", serviceNameWithNamespace, svcSuffix)
   858  	}
   859  	// https://bugs.launchpad.net/ubuntu/+source/bind9/+bug/854705
   860  	digCMD := "dig +short %s @%s | grep -v -e '^;'"
   861  
   862  	// If it fails we want to know if it's because of connection cannot be
   863  	// established or DNS does not exist.
   864  	digCMDFallback := "dig +tcp %s @%s"
   865  
   866  	dnsClusterIP, _, err := kub.GetServiceHostPort(KubeSystemNamespace, "kube-dns")
   867  	if err != nil {
   868  		logger.WithError(err).Error("cannot get kube-dns service IP")
   869  		return err
   870  	}
   871  
   872  	body := func() bool {
   873  		serviceIP, _, err := kub.GetServiceHostPort(serviceNamespace, serviceName)
   874  		if err != nil {
   875  			log.WithError(err).Errorf("cannot get service IP for service %s", serviceNameWithNamespace)
   876  			return false
   877  		}
   878  
   879  		// ClusterIPNone denotes that this service is headless; there is no
   880  		// service IP for this service, and thus the IP returned by `dig` is
   881  		// an IP of the pod itself, not ClusterIPNone, which is what Kubernetes
   882  		// shows as the IP for the service for headless services.
   883  		if serviceIP == v1.ClusterIPNone {
   884  			res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP))
   885  			_ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP))
   886  			return res.WasSuccessful()
   887  		}
   888  		log.Debugf("service is not headless; checking whether IP retrieved from DNS matches the IP for the service stored in Kubernetes")
   889  		res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP))
   890  		serviceIPFromDNS := res.SingleOut()
   891  		if !govalidator.IsIP(serviceIPFromDNS) {
   892  			logger.Debugf("output of dig (%s) did not return an IP", serviceIPFromDNS)
   893  			return false
   894  		}
   895  
   896  		// Due to lag between new IPs for the same service being synced between
   897  		// kube-apiserver and DNS, check if the IP for the service that is
   898  		// stored in K8s matches the IP of the service cached in DNS. These
   899  		// can be different, because some tests use the same service names.
   900  		// Wait accordingly for services to match, and for resolving the service
   901  		// name to resolve via DNS.
   902  		if !strings.Contains(serviceIPFromDNS, serviceIP) {
   903  			logger.Debugf("service IP retrieved from DNS (%s) does not match the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP)
   904  			_ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP))
   905  			return false
   906  		}
   907  		logger.Debugf("service IP retrieved from DNS (%s) matches the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP)
   908  		return true
   909  	}
   910  
   911  	return WithTimeout(
   912  		body,
   913  		fmt.Sprintf("DNS '%s' is not ready after timeout", serviceNameWithNamespace),
   914  		&TimeoutConfig{Timeout: DNSHelperTimeout})
   915  }
   916  
   917  // WaitCleanAllTerminatingPods waits until all nodes that are in `Terminating`
   918  // state are deleted correctly in the platform. In case of excedding the
   919  // given timeout (in seconds) it returns an error
   920  func (kub *Kubectl) WaitCleanAllTerminatingPods(timeout time.Duration) error {
   921  	body := func() bool {
   922  		res := kub.ExecShort(fmt.Sprintf(
   923  			"%s get pods --all-namespaces -o jsonpath='{.items[*].metadata.deletionTimestamp}'",
   924  			KubectlCmd))
   925  		if !res.WasSuccessful() {
   926  			return false
   927  		}
   928  
   929  		if res.Output().String() == "" {
   930  			// Output is empty so no terminating containers
   931  			return true
   932  		}
   933  
   934  		podsTerminating := len(strings.Split(res.Output().String(), " "))
   935  		kub.logger.WithField("Terminating pods", podsTerminating).Info("List of pods terminating")
   936  		if podsTerminating > 0 {
   937  			return false
   938  		}
   939  		return true
   940  	}
   941  
   942  	err := WithTimeout(
   943  		body,
   944  		"Pods are still not deleted after a timeout",
   945  		&TimeoutConfig{Timeout: timeout})
   946  	return err
   947  }
   948  
   949  // DeployPatch deploys the original kubernetes descriptor with the given patch.
   950  func (kub *Kubectl) DeployPatch(original, patch string) error {
   951  	// debugYaml only dumps the full created yaml file to the test output if
   952  	// the cilium manifest can not be created correctly.
   953  	debugYaml := func(original, patch string) {
   954  		// dry-run is only available since k8s 1.11
   955  		switch GetCurrentK8SEnv() {
   956  		case "1.8", "1.9", "1.10":
   957  			_ = kub.ExecShort(fmt.Sprintf(
   958  				`%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`,
   959  				KubectlCmd, original, patch))
   960  		default:
   961  			_ = kub.ExecShort(fmt.Sprintf(
   962  				`%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run -o yaml`,
   963  				KubectlCmd, original, patch))
   964  		}
   965  	}
   966  
   967  	var res *CmdRes
   968  	// validation 1st
   969  	// dry-run is only available since k8s 1.11
   970  	switch GetCurrentK8SEnv() {
   971  	case "1.8", "1.9", "1.10":
   972  	default:
   973  		res = kub.ExecShort(fmt.Sprintf(
   974  			`%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run`,
   975  			KubectlCmd, original, patch))
   976  		if !res.WasSuccessful() {
   977  			debugYaml(original, patch)
   978  			return res.GetErr("Cilium patch validation failed")
   979  		}
   980  	}
   981  
   982  	res = kub.Apply(ApplyOptions{
   983  		FilePath: "-",
   984  		Force:    true,
   985  		Piped: fmt.Sprintf(
   986  			`%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`,
   987  			KubectlCmd, original, patch),
   988  	})
   989  	if !res.WasSuccessful() {
   990  		debugYaml(original, patch)
   991  		return res.GetErr("Cilium manifest patch instalation failed")
   992  	}
   993  	return nil
   994  }
   995  
   996  // ciliumInstall installs all Cilium descriptors into kubernetes.
   997  // dsPatchName corresponds to the DaemonSet patch, found by
   998  // getK8sDescriptorPatch, that will be applied to the original Cilium DaemonSet
   999  // descriptor, found by getK8sDescriptor.
  1000  // cmPatchName corresponds to the ConfigMap patch, found by
  1001  // getK8sDescriptorPatch, that will be applied to the original Cilium ConfigMap
  1002  // descriptor, found by getK8sDescriptor.
  1003  // Returns an error if any patch or if any original descriptors files were not
  1004  // found.
  1005  func (kub *Kubectl) ciliumInstall(dsPatchName, cmPatchName string, getK8sDescriptor, getK8sDescriptorPatch func(filename string) string) error {
  1006  	cmPathname := getK8sDescriptor("cilium-cm.yaml")
  1007  	if cmPathname == "" {
  1008  		return fmt.Errorf("Cilium ConfigMap descriptor not found")
  1009  	}
  1010  	dsPathname := getK8sDescriptor("cilium-ds.yaml")
  1011  	if dsPathname == "" {
  1012  		return fmt.Errorf("Cilium DaemonSet descriptor not found")
  1013  	}
  1014  	rbacPathname := getK8sDescriptor("cilium-rbac.yaml")
  1015  	if rbacPathname == "" {
  1016  		return fmt.Errorf("Cilium RBAC descriptor not found")
  1017  	}
  1018  
  1019  	deployOriginal := func(original string) error {
  1020  		// debugYaml only dumps the full created yaml file to the test output if
  1021  		// the cilium manifest can not be created correctly.
  1022  		debugYaml := func(original string) {
  1023  			kub.Apply(ApplyOptions{
  1024  				FilePath: original,
  1025  				DryRun:   true,
  1026  				Output:   "yaml",
  1027  			})
  1028  		}
  1029  
  1030  		// validation 1st
  1031  		res := kub.Apply(ApplyOptions{
  1032  			FilePath: original,
  1033  			DryRun:   true,
  1034  		})
  1035  		if !res.WasSuccessful() {
  1036  			debugYaml(original)
  1037  			return res.GetErr("Cilium manifest validation fails")
  1038  		}
  1039  
  1040  		res = kub.ApplyDefault(original)
  1041  		if !res.WasSuccessful() {
  1042  			debugYaml(original)
  1043  			return res.GetErr("Cannot apply Cilium manifest")
  1044  		}
  1045  		return nil
  1046  	}
  1047  
  1048  	if err := deployOriginal(rbacPathname); err != nil {
  1049  		return err
  1050  	}
  1051  
  1052  	if err := kub.DeployPatch(cmPathname, getK8sDescriptorPatch(cmPatchName)); err != nil {
  1053  		return err
  1054  	}
  1055  
  1056  	if err := kub.DeployPatch(dsPathname, getK8sDescriptorPatch(dsPatchName)); err != nil {
  1057  		return err
  1058  	}
  1059  
  1060  	cmdRes := kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorSA))
  1061  	if !cmdRes.WasSuccessful() {
  1062  		return fmt.Errorf("Unable to deploy descriptor of etcd-operator SA %s: %s", ciliumEtcdOperatorSA, cmdRes.OutputPrettyPrint())
  1063  	}
  1064  
  1065  	cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorRBAC))
  1066  	if !cmdRes.WasSuccessful() {
  1067  		return fmt.Errorf("Unable to deploy descriptor of etcd-operator RBAC %s: %s", ciliumEtcdOperatorRBAC, cmdRes.OutputPrettyPrint())
  1068  	}
  1069  
  1070  	cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperator))
  1071  	if !cmdRes.WasSuccessful() {
  1072  		return fmt.Errorf("Unable to deploy descriptor of etcd-operator %s: %s", ciliumEtcdOperator, cmdRes.OutputPrettyPrint())
  1073  	}
  1074  
  1075  	_ = kub.ApplyDefault(getK8sDescriptor("cilium-operator-sa.yaml"))
  1076  	err := kub.DeployPatch(getK8sDescriptor("cilium-operator.yaml"), getK8sDescriptorPatch("cilium-operator-patch.yaml"))
  1077  	if err != nil {
  1078  		return fmt.Errorf("Unable to deploy descriptor of cilium-operators: %s", err)
  1079  	}
  1080  
  1081  	return nil
  1082  }
  1083  
  1084  func addIfNotOverwritten(options []string, field, value string) []string {
  1085  	for _, s := range options {
  1086  		if strings.HasPrefix(s, "--set "+field) {
  1087  			return options
  1088  		}
  1089  	}
  1090  
  1091  	options = append(options, "--set "+field+"="+value)
  1092  	return options
  1093  }
  1094  
  1095  func (kub *Kubectl) generateCiliumYaml(options []string, filename string) error {
  1096  	for key, value := range defaultHelmOptions {
  1097  		options = addIfNotOverwritten(options, key, value)
  1098  	}
  1099  
  1100  	switch GetCurrentIntegration() {
  1101  	case CIIntegrationFlannel:
  1102  		// Appending the options will override earlier options on CLI.
  1103  		for k, v := range flannelHelmOverrides {
  1104  			options = append(options, fmt.Sprintf("--set %s=%s", k, v))
  1105  		}
  1106  	default:
  1107  	}
  1108  
  1109  	// TODO GH-8753: Use helm rendering library instead of shelling out to
  1110  	// helm template
  1111  	res := kub.ExecMiddle(fmt.Sprintf("helm template %s --namespace=kube-system %s > %s",
  1112  		HelmTemplate, strings.Join(options, " "), filename))
  1113  	if !res.WasSuccessful() {
  1114  		return res.GetErr("Unable to generate YAML")
  1115  	}
  1116  
  1117  	return nil
  1118  }
  1119  
  1120  // ciliumInstallHelm installs Cilium with the Helm options provided.
  1121  func (kub *Kubectl) ciliumInstallHelm(options []string) error {
  1122  	if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil {
  1123  		return err
  1124  	}
  1125  
  1126  	res := kub.Apply(ApplyOptions{FilePath: "cilium.yaml", Force: true})
  1127  	if !res.WasSuccessful() {
  1128  		return res.GetErr("Unable to apply YAML")
  1129  	}
  1130  
  1131  	return nil
  1132  }
  1133  
  1134  // ciliumUninstallHelm uninstalls Cilium with the Helm options provided.
  1135  func (kub *Kubectl) ciliumUninstallHelm(options []string) error {
  1136  	if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil {
  1137  		return err
  1138  	}
  1139  
  1140  	res := kub.Delete("cilium.yaml")
  1141  	if !res.WasSuccessful() {
  1142  		return res.GetErr("Unable to delete YAML")
  1143  	}
  1144  
  1145  	return nil
  1146  }
  1147  
  1148  // CiliumInstall installs Cilium with the provided Helm options.
  1149  func (kub *Kubectl) CiliumInstall(options []string) error {
  1150  	return kub.ciliumInstallHelm(options)
  1151  }
  1152  
  1153  // CiliumUninstall uninstalls Cilium with the provided Helm options.
  1154  func (kub *Kubectl) CiliumUninstall(options []string) error {
  1155  	return kub.ciliumUninstallHelm(options)
  1156  }
  1157  
  1158  // CiliumInstallVersion installs all Cilium descriptors into kubernetes for
  1159  // a given Cilium Version tag.
  1160  // dsPatchName corresponds to the DaemonSet patch that will be applied to the
  1161  // original Cilium DaemonSet descriptor of that given Cilium Version tag.
  1162  // cmPatchName corresponds to the ConfigMap patch that will be applied to the
  1163  // original Cilium ConfigMap descriptor of that given Cilium Version tag.
  1164  // Returns an error if any patch or if any original descriptors files were not
  1165  // found.
  1166  func (kub *Kubectl) CiliumInstallVersion(dsPatchName, cmPatchName, versionTag string) error {
  1167  	getK8sDescriptorPatch := func(filename string) string {
  1168  		// try dependent Cilium, k8s and integration version patch file
  1169  		ginkgoVersionedPath := filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), GetCurrentIntegration(), filename)
  1170  		_, err := os.Stat(ginkgoVersionedPath)
  1171  		if err == nil {
  1172  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1173  		}
  1174  		// try dependent Cilium version and integration patch file
  1175  		ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentIntegration(), filename)
  1176  		_, err = os.Stat(ginkgoVersionedPath)
  1177  		if err == nil {
  1178  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1179  		}
  1180  		// try dependent Cilium and k8s version patch file
  1181  		ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), filename)
  1182  		_, err = os.Stat(ginkgoVersionedPath)
  1183  		if err == nil {
  1184  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1185  		}
  1186  		// try dependent Cilium version patch file
  1187  		ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, filename)
  1188  		_, err = os.Stat(ginkgoVersionedPath)
  1189  		if err == nil {
  1190  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1191  		}
  1192  		// try dependent integration patch file
  1193  		ginkgoVersionedPath = filepath.Join(manifestsPath, GetCurrentIntegration(), filename)
  1194  		_, err = os.Stat(ginkgoVersionedPath)
  1195  		if err == nil {
  1196  			return filepath.Join(BasePath, ginkgoVersionedPath)
  1197  		}
  1198  		return filepath.Join(BasePath, manifestsPath, filename)
  1199  	}
  1200  	getK8sDescriptor := func(filename string) string {
  1201  		return fmt.Sprintf("https://raw.githubusercontent.com/cilium/cilium/%s/examples/kubernetes/%s/%s", versionTag, GetCurrentK8SEnv(), filename)
  1202  	}
  1203  	return kub.ciliumInstall(dsPatchName, cmPatchName, getK8sDescriptor, getK8sDescriptorPatch)
  1204  }
  1205  
  1206  // GetCiliumPods returns a list of all Cilium pods in the specified namespace,
  1207  // and an error if the Cilium pods were not able to be retrieved.
  1208  func (kub *Kubectl) GetCiliumPods(namespace string) ([]string, error) {
  1209  	return kub.GetPodNames(namespace, "k8s-app=cilium")
  1210  }
  1211  
  1212  // GetCiliumPodsContext returns a list of all Cilium pods in the specified
  1213  // namespace, and an error if the Cilium pods were not able to be retrieved.
  1214  func (kub *Kubectl) GetCiliumPodsContext(ctx context.Context, namespace string) ([]string, error) {
  1215  	return kub.GetPodNamesContext(ctx, namespace, "k8s-app=cilium")
  1216  }
  1217  
  1218  // CiliumEndpointsList returns the result of `cilium endpoint list` from the
  1219  // specified pod.
  1220  func (kub *Kubectl) CiliumEndpointsList(ctx context.Context, pod string) *CmdRes {
  1221  	return kub.CiliumExecContext(ctx, pod, "cilium endpoint list -o json")
  1222  }
  1223  
  1224  // CiliumEndpointsStatus returns a mapping  of a pod name to it is corresponding
  1225  // endpoint's status
  1226  func (kub *Kubectl) CiliumEndpointsStatus(pod string) map[string]string {
  1227  	filter := `{range [*]}{@.status.external-identifiers.pod-name}{"="}{@.status.state}{"\n"}{end}`
  1228  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1229  	defer cancel()
  1230  	return kub.CiliumExecContext(ctx, pod, fmt.Sprintf(
  1231  		"cilium endpoint list -o jsonpath='%s'", filter)).KVOutput()
  1232  }
  1233  
  1234  // CiliumEndpointWaitReady waits until all endpoints managed by all Cilium pod
  1235  // are ready. Returns an error if the Cilium pods cannot be retrieved via
  1236  // Kubernetes, or endpoints are not ready after a specified timeout
  1237  func (kub *Kubectl) CiliumEndpointWaitReady() error {
  1238  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  1239  	if err != nil {
  1240  		kub.logger.WithError(err).Error("cannot get Cilium pods")
  1241  		return err
  1242  	}
  1243  
  1244  	body := func(ctx context.Context) (bool, error) {
  1245  		var wg sync.WaitGroup
  1246  		queue := make(chan bool, len(ciliumPods))
  1247  		endpointsReady := func(pod string) {
  1248  			valid := false
  1249  			defer func() {
  1250  				queue <- valid
  1251  				wg.Done()
  1252  			}()
  1253  			logCtx := kub.logger.WithField("pod", pod)
  1254  			status, err := kub.CiliumEndpointsList(ctx, pod).Filter(`{range [*]}{.status.state}{"="}{.status.identity.id}{"\n"}{end}`)
  1255  			if err != nil {
  1256  				logCtx.WithError(err).Errorf("cannot get endpoints states on Cilium pod")
  1257  				return
  1258  			}
  1259  			total := 0
  1260  			invalid := 0
  1261  			for _, line := range strings.Split(status.String(), "\n") {
  1262  				if line == "" {
  1263  					continue
  1264  				}
  1265  				// each line is like status=identityID.
  1266  				// IdentityID is needed because the reserved:init identity
  1267  				// means that the pod is not ready to accept traffic.
  1268  				total++
  1269  				vals := strings.Split(line, "=")
  1270  				if len(vals) != 2 {
  1271  					logCtx.Errorf("Endpoint list does not have a correct output '%s'", line)
  1272  					return
  1273  				}
  1274  				if vals[0] != "ready" {
  1275  					invalid++
  1276  				}
  1277  				// Consider an endpoint with reserved identity 5 (reserved:init) as not ready.
  1278  				if vals[1] == "5" {
  1279  					invalid++
  1280  				}
  1281  			}
  1282  			logCtx.WithFields(logrus.Fields{
  1283  				"total":   total,
  1284  				"invalid": invalid,
  1285  			}).Info("Waiting for cilium endpoints to be ready")
  1286  
  1287  			if invalid != 0 {
  1288  				return
  1289  			}
  1290  			valid = true
  1291  			return
  1292  		}
  1293  		wg.Add(len(ciliumPods))
  1294  		for _, pod := range ciliumPods {
  1295  			go endpointsReady(pod)
  1296  		}
  1297  
  1298  		wg.Wait()
  1299  		close(queue)
  1300  
  1301  		for status := range queue {
  1302  			if status == false {
  1303  				return false, nil
  1304  			}
  1305  		}
  1306  		return true, nil
  1307  	}
  1308  
  1309  	ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1310  	defer cancel()
  1311  	err = WithContext(ctx, body, 1*time.Second)
  1312  	if err == nil {
  1313  		return err
  1314  	}
  1315  
  1316  	callback := func() string {
  1317  		ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1318  		defer cancel()
  1319  
  1320  		var errorMessage string
  1321  		for _, pod := range ciliumPods {
  1322  			var endpoints []models.Endpoint
  1323  			cmdRes := kub.CiliumEndpointsList(ctx, pod)
  1324  			if !cmdRes.WasSuccessful() {
  1325  				errorMessage += fmt.Sprintf(
  1326  					"\tCilium Pod: %s \terror: unable to get endpoint list: %s",
  1327  					pod, cmdRes.err)
  1328  				continue
  1329  			}
  1330  			err := cmdRes.Unmarshal(&endpoints)
  1331  			if err != nil {
  1332  				errorMessage += fmt.Sprintf(
  1333  					"\tCilium Pod: %s \terror: unable to parse endpoint list: %s",
  1334  					pod, err)
  1335  				continue
  1336  			}
  1337  			for _, ep := range endpoints {
  1338  				errorMessage += fmt.Sprintf(
  1339  					"\tCilium Pod: %s \tEndpoint: %d \tIdentity: %d\t State: %s\n",
  1340  					pod, ep.ID, ep.Status.Identity.ID, ep.Status.State)
  1341  			}
  1342  		}
  1343  		return errorMessage
  1344  	}
  1345  	return NewSSHMetaError(err.Error(), callback)
  1346  }
  1347  
  1348  // WaitForCEPIdentity waits for a particular CEP to have an identity present.
  1349  func (kub *Kubectl) WaitForCEPIdentity(ns, podName string) error {
  1350  	body := func(ctx context.Context) (bool, error) {
  1351  		ep := kub.CepGet(ns, podName)
  1352  		if ep == nil {
  1353  			return false, nil
  1354  		}
  1355  		if ep.Identity == nil {
  1356  			return false, nil
  1357  		}
  1358  		return ep.Identity.ID != 0, nil
  1359  	}
  1360  
  1361  	ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1362  	defer cancel()
  1363  	return WithContext(ctx, body, 1*time.Second)
  1364  }
  1365  
  1366  // CiliumExecContext runs cmd in the specified Cilium pod with the given context.
  1367  func (kub *Kubectl) CiliumExecContext(ctx context.Context, pod string, cmd string) *CmdRes {
  1368  	limitTimes := 5
  1369  	execute := func() *CmdRes {
  1370  		command := fmt.Sprintf("%s exec -n kube-system %s -- %s", KubectlCmd, pod, cmd)
  1371  		return kub.ExecContext(ctx, command)
  1372  	}
  1373  	var res *CmdRes
  1374  	// Sometimes Kubectl returns 126 exit code, It use to happen in Nightly
  1375  	// tests when a lot of exec are in place (Cgroups issue). The upstream
  1376  	// changes did not fix the isse, and we need to make this workaround to
  1377  	// avoid Kubectl issue.
  1378  	// https://github.com/openshift/origin/issues/16246
  1379  	for i := 0; i < limitTimes; i++ {
  1380  		res = execute()
  1381  		if res.GetExitCode() != 126 {
  1382  			break
  1383  		}
  1384  		time.Sleep(200 * time.Millisecond)
  1385  	}
  1386  	return res
  1387  }
  1388  
  1389  // CiliumExec runs cmd in the specified Cilium pod.
  1390  // Deprecated: use CiliumExecContext instead
  1391  func (kub *Kubectl) CiliumExec(pod string, cmd string) *CmdRes {
  1392  	ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout)
  1393  	defer cancel()
  1394  	return kub.CiliumExecContext(ctx, pod, cmd)
  1395  }
  1396  
  1397  // CiliumExecUntilMatch executes the specified command repeatedly for the
  1398  // specified Cilium pod until the given substring is present in stdout.
  1399  // If the timeout is reached it will return an error.
  1400  func (kub *Kubectl) CiliumExecUntilMatch(pod, cmd, substr string) error {
  1401  	body := func() bool {
  1402  		ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1403  		defer cancel()
  1404  		res := kub.CiliumExecContext(ctx, pod, cmd)
  1405  		return strings.Contains(res.Output().String(), substr)
  1406  	}
  1407  
  1408  	return WithTimeout(
  1409  		body,
  1410  		fmt.Sprintf("%s is not in the output after timeout", substr),
  1411  		&TimeoutConfig{Timeout: HelperTimeout})
  1412  }
  1413  
  1414  // WaitForCiliumInitContainerToFinish waits for all Cilium init containers to
  1415  // finish
  1416  func (kub *Kubectl) WaitForCiliumInitContainerToFinish() error {
  1417  	body := func() bool {
  1418  		podList := &v1.PodList{}
  1419  		err := kub.GetPods("kube-system", "-l k8s-app=cilium").Unmarshal(podList)
  1420  		if err != nil {
  1421  			kub.logger.Infof("Error while getting PodList: %s", err)
  1422  			return false
  1423  		}
  1424  		if len(podList.Items) == 0 {
  1425  			return false
  1426  		}
  1427  		for _, pod := range podList.Items {
  1428  			for _, v := range pod.Status.InitContainerStatuses {
  1429  				if v.State.Terminated != nil && (v.State.Terminated.Reason != "Completed" || v.State.Terminated.ExitCode != 0) {
  1430  					kub.logger.WithFields(logrus.Fields{
  1431  						"podName":      pod.Name,
  1432  						"currentState": v.State.String(),
  1433  					}).Infof("Cilium Init container not completed")
  1434  					return false
  1435  				}
  1436  			}
  1437  		}
  1438  		return true
  1439  	}
  1440  
  1441  	return WithTimeout(body, "Cilium Init Container was not able to initialize or had a successful run", &TimeoutConfig{Timeout: HelperTimeout})
  1442  }
  1443  
  1444  // CiliumNodesWait waits until all nodes in the Kubernetes cluster are annotated
  1445  // with Cilium annotations. Its runtime is bounded by a maximum of `HelperTimeout`.
  1446  // When a node is annotated with said annotations, it indicates
  1447  // that the tunnels in the nodes are set up and that cross-node traffic can be
  1448  // tested. Returns an error if the timeout is exceeded for waiting for the nodes
  1449  // to be annotated.
  1450  func (kub *Kubectl) CiliumNodesWait() (bool, error) {
  1451  	body := func() bool {
  1452  		filter := `{range .items[*]}{@.metadata.name}{"="}{@.metadata.annotations.io\.cilium\.network\.ipv4-pod-cidr}{"\n"}{end}`
  1453  		data := kub.ExecShort(fmt.Sprintf(
  1454  			"%s get nodes -o jsonpath='%s'", KubectlCmd, filter))
  1455  		if !data.WasSuccessful() {
  1456  			return false
  1457  		}
  1458  		result := data.KVOutput()
  1459  		for k, v := range result {
  1460  			if v == "" {
  1461  				kub.logger.Infof("Kubernetes node '%v' does not have Cilium metadata", k)
  1462  				return false
  1463  			}
  1464  			kub.logger.Infof("Kubernetes node '%v' IPv4 address: '%v'", k, v)
  1465  		}
  1466  		return true
  1467  	}
  1468  	err := WithTimeout(body, "Kubernetes node does not have cilium metadata", &TimeoutConfig{Timeout: HelperTimeout})
  1469  	if err != nil {
  1470  		return false, err
  1471  	}
  1472  	return true, nil
  1473  }
  1474  
  1475  // WaitPolicyDeleted waits for policy policyName to be deleted from the
  1476  // cilium-agent running in pod. Returns an error if policyName was unable to
  1477  // be deleted after some amount of time.
  1478  func (kub *Kubectl) WaitPolicyDeleted(pod string, policyName string) error {
  1479  	body := func() bool {
  1480  		ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1481  		defer cancel()
  1482  		res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyName))
  1483  
  1484  		// `cilium policy get <policy name>` fails if the policy is not loaded,
  1485  		// which is the condition we want.
  1486  		return !res.WasSuccessful()
  1487  	}
  1488  
  1489  	return WithTimeout(body, fmt.Sprintf("Policy %s was not deleted in time", policyName), &TimeoutConfig{Timeout: HelperTimeout})
  1490  }
  1491  
  1492  // CiliumIsPolicyLoaded returns true if the policy is loaded in the given
  1493  // cilium Pod. it returns false in case that the policy is not in place
  1494  func (kub *Kubectl) CiliumIsPolicyLoaded(pod string, policyCmd string) bool {
  1495  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1496  	defer cancel()
  1497  	res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyCmd))
  1498  	return res.WasSuccessful()
  1499  }
  1500  
  1501  // CiliumPolicyRevision returns the policy revision in the specified Cilium pod.
  1502  // Returns an error if the policy revision cannot be retrieved.
  1503  func (kub *Kubectl) CiliumPolicyRevision(pod string) (int, error) {
  1504  	ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout)
  1505  	defer cancel()
  1506  	res := kub.CiliumExecContext(ctx, pod, "cilium policy get -o json")
  1507  	if !res.WasSuccessful() {
  1508  		return -1, fmt.Errorf("cannot get the revision %s", res.Output())
  1509  	}
  1510  
  1511  	revision, err := res.Filter("{.revision}")
  1512  	if err != nil {
  1513  		return -1, fmt.Errorf("cannot get revision from json: %s", err)
  1514  	}
  1515  
  1516  	revi, err := strconv.Atoi(strings.Trim(revision.String(), "\n"))
  1517  	if err != nil {
  1518  		kub.logger.Errorf("revision on pod '%s' is not valid '%s'", pod, res.CombineOutput())
  1519  		return -1, err
  1520  	}
  1521  	return revi, nil
  1522  }
  1523  
  1524  // ResourceLifeCycleAction represents an action performed upon objects in
  1525  // Kubernetes.
  1526  type ResourceLifeCycleAction string
  1527  
  1528  // CiliumPolicyAction performs the specified action in Kubernetes for the policy
  1529  // stored in path filepath and waits up  until timeout seconds for the policy
  1530  // to be applied in all Cilium endpoints. Returns an error if the policy is not
  1531  // imported before the timeout is
  1532  // exceeded.
  1533  func (kub *Kubectl) CiliumPolicyAction(namespace, filepath string, action ResourceLifeCycleAction, timeout time.Duration) (string, error) {
  1534  	numNodes := kub.GetNumNodes()
  1535  
  1536  	// Test filter: https://jqplay.org/s/EgNzc06Cgn
  1537  	jqFilter := fmt.Sprintf(
  1538  		`[.items[]|{name:.metadata.name, enforcing: (.status|if has("nodes") then .nodes |to_entries|map_values(.value.enforcing) + [(.|length >= %d)]|all else true end)|tostring, status: has("status")|tostring}]`,
  1539  		numNodes)
  1540  	npFilter := fmt.Sprintf(
  1541  		`{range .items[*]}{"%s="}{.metadata.name}{" %s="}{.metadata.namespace}{"\n"}{end}`,
  1542  		KubectlPolicyNameLabel, KubectlPolicyNameSpaceLabel)
  1543  	kub.logger.Infof("Performing %s action on resource '%s'", action, filepath)
  1544  
  1545  	if status := kub.Action(action, filepath, namespace); !status.WasSuccessful() {
  1546  		return "", status.GetErr(fmt.Sprintf("Cannot perform '%s' on resorce '%s'", action, filepath))
  1547  	}
  1548  
  1549  	if action == KubectlDelete {
  1550  		// Due policy is uninstalled, there is no need to validate that the policy is enforce.
  1551  		return "", nil
  1552  	}
  1553  
  1554  	body := func() bool {
  1555  		var data []map[string]string
  1556  		cmd := fmt.Sprintf("%s get cnp --all-namespaces -o json | jq '%s'",
  1557  			KubectlCmd, jqFilter)
  1558  
  1559  		res := kub.ExecShort(cmd)
  1560  		if !res.WasSuccessful() {
  1561  			kub.logger.WithError(res.GetErr("")).Error("cannot get cnp status")
  1562  			return false
  1563  
  1564  		}
  1565  
  1566  		err := res.Unmarshal(&data)
  1567  		if err != nil {
  1568  			kub.logger.WithError(err).Error("Cannot unmarshal json")
  1569  			return false
  1570  		}
  1571  
  1572  		for _, item := range data {
  1573  			if item["enforcing"] != "true" || item["status"] != "true" {
  1574  				kub.logger.Errorf("Policy '%s' is not enforcing yet", item["name"])
  1575  				return false
  1576  			}
  1577  		}
  1578  		return true
  1579  	}
  1580  
  1581  	err := WithTimeout(
  1582  		body,
  1583  		"cannot change state of resource correctly; command timed out",
  1584  		&TimeoutConfig{Timeout: timeout})
  1585  
  1586  	if err != nil {
  1587  		return "", err
  1588  	}
  1589  
  1590  	knpBody := func() bool {
  1591  		knp := kub.ExecShort(fmt.Sprintf("%s get --all-namespaces netpol -o jsonpath='%s'",
  1592  			KubectlCmd, npFilter))
  1593  		result := knp.ByLines()
  1594  		if len(result) == 0 {
  1595  			return true
  1596  		}
  1597  
  1598  		pods, err := kub.GetCiliumPods(KubeSystemNamespace)
  1599  		if err != nil {
  1600  			kub.logger.WithError(err).Error("cannot retrieve cilium pods")
  1601  			return false
  1602  		}
  1603  		for _, item := range result {
  1604  			for _, ciliumPod := range pods {
  1605  				if !kub.CiliumIsPolicyLoaded(ciliumPod, item) {
  1606  					kub.logger.Infof("Policy '%s' is not ready on Cilium pod '%s'", item, ciliumPod)
  1607  					return false
  1608  				}
  1609  			}
  1610  		}
  1611  		return true
  1612  	}
  1613  
  1614  	err = WithTimeout(
  1615  		knpBody,
  1616  		"cannot change state of Kubernetes network policies correctly; command timed out",
  1617  		&TimeoutConfig{Timeout: timeout})
  1618  	return "", err
  1619  }
  1620  
  1621  // CiliumReport report the cilium pod to the log and appends the logs for the
  1622  // given commands.
  1623  func (kub *Kubectl) CiliumReport(namespace string, commands ...string) {
  1624  	if config.CiliumTestConfig.SkipLogGathering {
  1625  		ginkgoext.GinkgoPrint("Skipped gathering logs (-cilium.skipLogs=true)\n")
  1626  		return
  1627  	}
  1628  
  1629  	// Log gathering for Cilium should take at most 5 minutes. This ensures that
  1630  	// the CiliumReport stage doesn't cause the entire CI to hang.
  1631  
  1632  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
  1633  	defer cancel()
  1634  
  1635  	var wg sync.WaitGroup
  1636  	wg.Add(1)
  1637  
  1638  	go func() {
  1639  		defer wg.Done()
  1640  		kub.DumpCiliumCommandOutput(ctx, namespace)
  1641  		kub.GatherLogs(ctx)
  1642  	}()
  1643  
  1644  	kub.CiliumCheckReport(ctx)
  1645  
  1646  	pods, err := kub.GetCiliumPodsContext(ctx, namespace)
  1647  	if err != nil {
  1648  		kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump")
  1649  	}
  1650  	res := kub.ExecContextShort(ctx, fmt.Sprintf("%s get pods -o wide --all-namespaces", KubectlCmd))
  1651  	ginkgoext.GinkgoPrint(res.GetDebugMessage())
  1652  
  1653  	results := make([]*CmdRes, 0, len(pods)*len(commands))
  1654  	ginkgoext.GinkgoPrint("Fetching command output from pods %s", pods)
  1655  	for _, pod := range pods {
  1656  		for _, cmd := range commands {
  1657  			res = kub.ExecPodCmdBackground(ctx, namespace, pod, cmd, ExecOptions{SkipLog: true})
  1658  			results = append(results, res)
  1659  		}
  1660  	}
  1661  
  1662  	wg.Wait()
  1663  
  1664  	for _, res := range results {
  1665  		res.WaitUntilFinish()
  1666  		ginkgoext.GinkgoPrint(res.GetDebugMessage())
  1667  	}
  1668  }
  1669  
  1670  // EtcdOperatorReport dump etcd pods data into the report directory to be able
  1671  // to debug etcd operator status in case of fail test.
  1672  func (kub *Kubectl) EtcdOperatorReport(ctx context.Context, reportCmds map[string]string) {
  1673  	if reportCmds == nil {
  1674  		reportCmds = make(map[string]string)
  1675  	}
  1676  
  1677  	pods, err := kub.GetPodNamesContext(ctx, KubeSystemNamespace, "etcd_cluster=cilium-etcd")
  1678  	if err != nil {
  1679  		kub.logger.WithError(err).Error("No etcd pods")
  1680  		return
  1681  	}
  1682  
  1683  	etcdctl := "etcdctl --endpoints=https://%s.cilium-etcd.kube-system.svc:2379 " +
  1684  		"--cert-file /etc/etcdtls/member/peer-tls/peer.crt " +
  1685  		"--key-file /etc/etcdtls/member/peer-tls/peer.key " +
  1686  		"--ca-file /etc/etcdtls/member/peer-tls/peer-ca.crt " +
  1687  		" %s"
  1688  
  1689  	etcdDumpCommands := map[string]string{
  1690  		"member list":    "etcd_%s_member_list",
  1691  		"cluster-health": "etcd_%s_cluster_health",
  1692  	}
  1693  
  1694  	for _, pod := range pods {
  1695  		for cmd, reportFile := range etcdDumpCommands {
  1696  			etcdCmd := fmt.Sprintf(etcdctl, pod, cmd)
  1697  			command := fmt.Sprintf("%s -n %s exec -ti %s -- %s",
  1698  				KubectlCmd, KubeSystemNamespace, pod, etcdCmd)
  1699  			reportCmds[command] = fmt.Sprintf(reportFile, pod)
  1700  		}
  1701  	}
  1702  }
  1703  
  1704  // CiliumCheckReport prints a few checks on the Junit output to provide more
  1705  // context to users. The list of checks that prints are the following:
  1706  // - Number of Kubernetes and Cilium policies installed.
  1707  // - Policy enforcement status by endpoint.
  1708  // - Controller, health, kvstore status.
  1709  func (kub *Kubectl) CiliumCheckReport(ctx context.Context) {
  1710  	pods, _ := kub.GetCiliumPods(KubeSystemNamespace)
  1711  	fmt.Fprintf(CheckLogs, "Cilium pods: %v\n", pods)
  1712  
  1713  	var policiesFilter = `{range .items[*]}{.metadata.namespace}{"::"}{.metadata.name}{" "}{end}`
  1714  	netpols := kub.ExecContextShort(ctx, fmt.Sprintf(
  1715  		"%s get netpol -o jsonpath='%s' --all-namespaces",
  1716  		KubectlCmd, policiesFilter))
  1717  	fmt.Fprintf(CheckLogs, "Netpols loaded: %v\n", netpols.Output())
  1718  
  1719  	cnp := kub.ExecContextShort(ctx, fmt.Sprintf(
  1720  		"%s get cnp -o jsonpath='%s' --all-namespaces",
  1721  		KubectlCmd, policiesFilter))
  1722  	fmt.Fprintf(CheckLogs, "CiliumNetworkPolicies loaded: %v\n", cnp.Output())
  1723  
  1724  	cepFilter := `{range .items[*]}{.metadata.name}{"="}{.status.policy.ingress.enforcing}{":"}{.status.policy.egress.enforcing}{"\n"}{end}`
  1725  	cepStatus := kub.ExecContextShort(ctx, fmt.Sprintf(
  1726  		"%s get cep -o jsonpath='%s' --all-namespaces",
  1727  		KubectlCmd, cepFilter))
  1728  
  1729  	fmt.Fprintf(CheckLogs, "Endpoint Policy Enforcement:\n")
  1730  
  1731  	table := tabwriter.NewWriter(CheckLogs, 5, 0, 3, ' ', 0)
  1732  	fmt.Fprintf(table, "Pod\tIngress\tEgress\n")
  1733  	for pod, policy := range cepStatus.KVOutput() {
  1734  		data := strings.SplitN(policy, ":", 2)
  1735  		if len(data) != 2 {
  1736  			data[0] = "invalid value"
  1737  			data[1] = "invalid value"
  1738  		}
  1739  		fmt.Fprintf(table, "%s\t%s\t%s\n", pod, data[0], data[1])
  1740  	}
  1741  	table.Flush()
  1742  
  1743  	var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}::{.status.last-failure-msg}{"\n"}{end}`
  1744  	var failedControllers string
  1745  	for _, pod := range pods {
  1746  		var prefix = ""
  1747  		status := kub.CiliumExecContext(ctx, pod, "cilium status --all-controllers -o json")
  1748  		result, err := status.Filter(controllersFilter)
  1749  		if err != nil {
  1750  			kub.logger.WithError(err).Error("Cannot filter controller status output")
  1751  			continue
  1752  		}
  1753  		var total = 0
  1754  		var failed = 0
  1755  		for name, data := range result.KVOutput() {
  1756  			total++
  1757  			status := strings.SplitN(data, "::", 2)
  1758  			if len(status) != 2 {
  1759  				// Just make sure that the the len of the output is 2 to not
  1760  				// fail on index error in the following lines.
  1761  				continue
  1762  			}
  1763  			if status[0] != "" {
  1764  				failed++
  1765  				prefix = "⚠️  "
  1766  				failedControllers += fmt.Sprintf("controller %s failure '%s'\n", name, status[1])
  1767  			}
  1768  		}
  1769  		statusFilter := `Status: {.cilium.state}  Health: {.cluster.ciliumHealth.state}` +
  1770  			` Nodes "{.cluster.nodes[*].name}" ContinerRuntime: {.container-runtime.state}` +
  1771  			` Kubernetes: {.kubernetes.state} KVstore: {.kvstore.state}`
  1772  		data, _ := status.Filter(statusFilter)
  1773  		fmt.Fprintf(CheckLogs, "%sCilium agent '%s': %s Controllers: Total %d Failed %d\n",
  1774  			prefix, pod, data, total, failed)
  1775  		if failedControllers != "" {
  1776  			fmt.Fprintf(CheckLogs, "Failed controllers:\n %s", failedControllers)
  1777  		}
  1778  	}
  1779  }
  1780  
  1781  // ValidateNoErrorsInLogs checks in cilium logs since the given duration (By
  1782  // default `CurrentGinkgoTestDescription().Duration`) do not contain `panic`,
  1783  // `deadlocks` or `segmentation faults` messages. In case of any of these
  1784  // messages, it'll mark the test as failed.
  1785  func (kub *Kubectl) ValidateNoErrorsInLogs(duration time.Duration) {
  1786  
  1787  	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
  1788  	defer cancel()
  1789  
  1790  	var logs string
  1791  	cmd := fmt.Sprintf("%s -n %s logs --timestamps=true -l k8s-app=cilium --since=%vs",
  1792  		KubectlCmd, KubeSystemNamespace, duration.Seconds())
  1793  	res := kub.ExecContext(ctx, fmt.Sprintf("%s --previous", cmd), ExecOptions{SkipLog: true})
  1794  	if res.WasSuccessful() {
  1795  		logs += res.Output().String()
  1796  	}
  1797  	res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true})
  1798  	if res.WasSuccessful() {
  1799  		logs += res.Output().String()
  1800  	}
  1801  	defer func() {
  1802  		// Keep the cilium logs for the given test in a separate file.
  1803  		testPath, err := CreateReportDirectory()
  1804  		if err != nil {
  1805  			kub.logger.WithError(err).Error("Cannot create report directory")
  1806  			return
  1807  		}
  1808  		err = ioutil.WriteFile(
  1809  			fmt.Sprintf("%s/%s", testPath, CiliumTestLog),
  1810  			[]byte(logs), LogPerm)
  1811  
  1812  		if err != nil {
  1813  			kub.logger.WithError(err).Errorf("Cannot create %s", CiliumTestLog)
  1814  		}
  1815  	}()
  1816  
  1817  	failIfContainsBadLogMsg(logs)
  1818  
  1819  	fmt.Fprintf(CheckLogs, logutils.LogErrorsSummary(logs))
  1820  }
  1821  
  1822  // GatherCiliumCoreDumps copies core dumps if are present in the /tmp folder
  1823  // into the test report folder for further analysis.
  1824  func (kub *Kubectl) GatherCiliumCoreDumps(ctx context.Context, ciliumPod string) {
  1825  	log := kub.logger.WithField("pod", ciliumPod)
  1826  
  1827  	cores := kub.CiliumExecContext(ctx, ciliumPod, "ls /tmp/ | grep core")
  1828  	if !cores.WasSuccessful() {
  1829  		log.Debug("There is no core dumps in the pod")
  1830  		return
  1831  	}
  1832  
  1833  	testPath, err := CreateReportDirectory()
  1834  	if err != nil {
  1835  		log.WithError(err).Errorf("cannot create test result path '%s'", testPath)
  1836  		return
  1837  	}
  1838  	resultPath := filepath.Join(BasePath, testPath)
  1839  
  1840  	for _, core := range cores.ByLines() {
  1841  		dst := filepath.Join(resultPath, core)
  1842  		src := filepath.Join("/tmp/", core)
  1843  		cmd := fmt.Sprintf("%s -n %s cp %s:%s %s",
  1844  			KubectlCmd, KubeSystemNamespace,
  1845  			ciliumPod, src, dst)
  1846  		res := kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true})
  1847  		if !res.WasSuccessful() {
  1848  			log.WithField("output", res.CombineOutput()).Error("Cannot get core from pod")
  1849  		}
  1850  	}
  1851  }
  1852  
  1853  // GetCiliumHostIPv4 retrieves cilium_host IPv4 addr of the given node.
  1854  func (kub *Kubectl) GetCiliumHostIPv4(ctx context.Context, node string) (string, error) {
  1855  	pod, err := kub.GetCiliumPodOnNode(KubeSystemNamespace, node)
  1856  	if err != nil {
  1857  		return "", fmt.Errorf("unable to retrieve cilium pod: %s", err)
  1858  	}
  1859  
  1860  	cmd := "ip -4 -o a show dev cilium_host | grep -o -e 'inet [0-9.]*' | cut -d' ' -f2"
  1861  	res := kub.ExecPodCmd(KubeSystemNamespace, pod, cmd)
  1862  	if !res.WasSuccessful() {
  1863  		return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr: %s", res.GetError())
  1864  	}
  1865  	addr := res.SingleOut()
  1866  	if addr == "" {
  1867  		return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr")
  1868  	}
  1869  
  1870  	return addr, nil
  1871  }
  1872  
  1873  // DumpCiliumCommandOutput runs a variety of commands (CiliumKubCLICommands) and writes the results to
  1874  // TestResultsPath
  1875  func (kub *Kubectl) DumpCiliumCommandOutput(ctx context.Context, namespace string) {
  1876  	ReportOnPod := func(pod string) {
  1877  		logger := kub.logger.WithField("CiliumPod", pod)
  1878  
  1879  		testPath, err := CreateReportDirectory()
  1880  		if err != nil {
  1881  			logger.WithError(err).Errorf("cannot create test result path '%s'", testPath)
  1882  			return
  1883  		}
  1884  
  1885  		genReportCmds := func(cliCmds map[string]string) map[string]string {
  1886  			reportCmds := map[string]string{}
  1887  			for cmd, logfile := range cliCmds {
  1888  				command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd)
  1889  				reportCmds[command] = fmt.Sprintf("%s_%s", pod, logfile)
  1890  			}
  1891  			return reportCmds
  1892  		}
  1893  
  1894  		reportCmds := genReportCmds(ciliumKubCLICommands)
  1895  		reportMapContext(ctx, testPath, reportCmds, kub.SSHMeta)
  1896  
  1897  		logsPath := filepath.Join(BasePath, testPath)
  1898  
  1899  		// Get bugtool output. Since bugtool output is dumped in the pod's filesystem,
  1900  		// copy it over with `kubectl cp`.
  1901  		bugtoolCmd := fmt.Sprintf("%s exec -n %s %s -- %s",
  1902  			KubectlCmd, namespace, pod, CiliumBugtool)
  1903  		res := kub.ExecContext(ctx, bugtoolCmd, ExecOptions{SkipLog: true})
  1904  		if !res.WasSuccessful() {
  1905  			logger.Errorf("%s failed: %s", bugtoolCmd, res.CombineOutput().String())
  1906  			return
  1907  		}
  1908  		// Default output directory is /tmp for bugtool.
  1909  		res = kub.ExecContext(ctx, fmt.Sprintf("%s exec -n %s %s -- ls /tmp/", KubectlCmd, namespace, pod))
  1910  		tmpList := res.ByLines()
  1911  		for _, line := range tmpList {
  1912  			// Only copy over bugtool output to directory.
  1913  			if !strings.Contains(line, CiliumBugtool) {
  1914  				continue
  1915  			}
  1916  
  1917  			res = kub.ExecContext(ctx, fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s /tmp/%[4]s",
  1918  				KubectlCmd, namespace, pod, line),
  1919  				ExecOptions{SkipLog: true})
  1920  			if !res.WasSuccessful() {
  1921  				logger.Errorf("'%s' failed: %s", res.GetCmd(), res.CombineOutput())
  1922  				continue
  1923  			}
  1924  
  1925  			archiveName := filepath.Join(logsPath, fmt.Sprintf("bugtool-%s", pod))
  1926  			res = kub.ExecContext(ctx, fmt.Sprintf("mkdir -p %s", archiveName))
  1927  			if !res.WasSuccessful() {
  1928  				logger.WithField("cmd", res.GetCmd()).Errorf(
  1929  					"cannot create bugtool archive folder: %s", res.CombineOutput())
  1930  				continue
  1931  			}
  1932  
  1933  			cmd := fmt.Sprintf("tar -xf /tmp/%s -C %s --strip-components=1", line, archiveName)
  1934  			res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true})
  1935  			if !res.WasSuccessful() {
  1936  				logger.WithField("cmd", cmd).Errorf(
  1937  					"Cannot untar bugtool output: %s", res.CombineOutput())
  1938  				continue
  1939  			}
  1940  			//Remove bugtool artifact, so it'll be not used if any other fail test
  1941  			_ = kub.ExecPodCmdBackground(ctx, KubeSystemNamespace, pod, fmt.Sprintf("rm /tmp/%s", line))
  1942  		}
  1943  
  1944  		// Finally, get kvstore output - this is best effort; we do this last
  1945  		// because if connectivity to the kvstore is broken from a cilium pod,
  1946  		// we don't want the context above to timeout and as a result, get none
  1947  		// of the other logs from the tests.
  1948  
  1949  		// Use a shorter context for kvstore-related commands to avoid having
  1950  		// further log-gathering fail as well if the first Cilium pod fails to
  1951  		// gather kvstore logs.
  1952  		kvstoreCmdCtx, cancel := context.WithTimeout(ctx, MidCommandTimeout)
  1953  		defer cancel()
  1954  		reportCmds = genReportCmds(ciliumKubCLICommandsKVStore)
  1955  		reportMapContext(kvstoreCmdCtx, testPath, reportCmds, kub.SSHMeta)
  1956  	}
  1957  
  1958  	pods, err := kub.GetCiliumPodsContext(ctx, namespace)
  1959  	if err != nil {
  1960  		kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump")
  1961  		return
  1962  	}
  1963  	for _, pod := range pods {
  1964  		ReportOnPod(pod)
  1965  		kub.GatherCiliumCoreDumps(ctx, pod)
  1966  	}
  1967  }
  1968  
  1969  // GatherLogs dumps kubernetes pods, services, DaemonSet to the testResultsPath
  1970  // directory
  1971  func (kub *Kubectl) GatherLogs(ctx context.Context) {
  1972  	reportCmds := map[string]string{
  1973  		"kubectl get pods --all-namespaces -o json":                  "pods.txt",
  1974  		"kubectl get services --all-namespaces -o json":              "svc.txt",
  1975  		"kubectl get nodes -o json":                                  "nodes.txt",
  1976  		"kubectl get ds --all-namespaces -o json":                    "ds.txt",
  1977  		"kubectl get cnp --all-namespaces -o json":                   "cnp.txt",
  1978  		"kubectl get cep --all-namespaces -o json":                   "cep.txt",
  1979  		"kubectl get netpol --all-namespaces -o json":                "netpol.txt",
  1980  		"kubectl describe pods --all-namespaces":                     "pods_status.txt",
  1981  		"kubectl get replicationcontroller --all-namespaces -o json": "replicationcontroller.txt",
  1982  		"kubectl get deployment --all-namespaces -o json":            "deployment.txt",
  1983  	}
  1984  
  1985  	kub.GeneratePodLogGatheringCommands(ctx, reportCmds)
  1986  	kub.EtcdOperatorReport(ctx, reportCmds)
  1987  
  1988  	res := kub.ExecContext(ctx, fmt.Sprintf(`%s api-resources | grep -v "^NAME" | awk '{print $1}'`, KubectlCmd))
  1989  	if res.WasSuccessful() {
  1990  		for _, line := range res.ByLines() {
  1991  			key := fmt.Sprintf("%s get %s --all-namespaces -o wide", KubectlCmd, line)
  1992  			reportCmds[key] = fmt.Sprintf("api-resource-%s.txt", line)
  1993  		}
  1994  	} else {
  1995  		kub.logger.Errorf("Cannot get api-resoureces: %s", res.GetDebugMessage())
  1996  	}
  1997  
  1998  	testPath, err := CreateReportDirectory()
  1999  	if err != nil {
  2000  		kub.logger.WithError(err).Errorf(
  2001  			"cannot create test results path '%s'", testPath)
  2002  		return
  2003  	}
  2004  	reportMap(testPath, reportCmds, kub.SSHMeta)
  2005  
  2006  	for _, node := range []string{K8s1VMName(), K8s2VMName()} {
  2007  		vm := GetVagrantSSHMeta(node)
  2008  		reportCmds := map[string]string{
  2009  			"journalctl --no-pager -au kubelet": fmt.Sprintf("kubelet-%s.log", node),
  2010  			"sudo top -n 1 -b":                  fmt.Sprintf("top-%s.log", node),
  2011  			"sudo ps aux":                       fmt.Sprintf("ps-%s.log", node),
  2012  		}
  2013  		reportMapContext(ctx, testPath, reportCmds, vm)
  2014  	}
  2015  }
  2016  
  2017  // GeneratePodLogGatheringCommands generates the commands to gather logs for
  2018  // all pods in the Kubernetes cluster, and maps the commands to the filename
  2019  // in which they will be stored in reportCmds.
  2020  func (kub *Kubectl) GeneratePodLogGatheringCommands(ctx context.Context, reportCmds map[string]string) {
  2021  	if reportCmds == nil {
  2022  		reportCmds = make(map[string]string)
  2023  	}
  2024  	pods, err := kub.GetAllPods(ctx, ExecOptions{SkipLog: true})
  2025  	if err != nil {
  2026  		kub.logger.WithError(err).Error("Unable to get pods from Kubernetes via kubectl")
  2027  	}
  2028  
  2029  	for _, pod := range pods {
  2030  		for _, containerStatus := range pod.Status.ContainerStatuses {
  2031  			logCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name)
  2032  			logfileName := fmt.Sprintf("pod-%s-%s-%s.log", pod.Namespace, pod.Name, containerStatus.Name)
  2033  			reportCmds[logCmd] = logfileName
  2034  
  2035  			if containerStatus.RestartCount > 0 {
  2036  				previousLogCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s --previous", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name)
  2037  				previousLogfileName := fmt.Sprintf("pod-%s-%s-%s-previous.log", pod.Namespace, pod.Name, containerStatus.Name)
  2038  				reportCmds[previousLogCmd] = previousLogfileName
  2039  			}
  2040  		}
  2041  	}
  2042  }
  2043  
  2044  // GetCiliumPodOnNode returns the name of the Cilium pod that is running on / in
  2045  //the specified node / namespace.
  2046  func (kub *Kubectl) GetCiliumPodOnNode(namespace string, node string) (string, error) {
  2047  	filter := fmt.Sprintf(
  2048  		"-o jsonpath='{.items[?(@.spec.nodeName == \"%s\")].metadata.name}'", node)
  2049  
  2050  	res := kub.ExecShort(fmt.Sprintf(
  2051  		"%s -n %s get pods -l k8s-app=cilium %s", KubectlCmd, namespace, filter))
  2052  	if !res.WasSuccessful() {
  2053  		return "", fmt.Errorf("Cilium pod not found on node '%s'", node)
  2054  	}
  2055  
  2056  	return res.Output().String(), nil
  2057  }
  2058  
  2059  func (kub *Kubectl) ciliumPreFlightCheck() error {
  2060  	err := kub.ciliumStatusPreFlightCheck()
  2061  	if err != nil {
  2062  		return fmt.Errorf("status is unhealthy: %s", err)
  2063  	}
  2064  
  2065  	err = kub.ciliumControllersPreFlightCheck()
  2066  	if err != nil {
  2067  		return fmt.Errorf("controllers are failing: %s", err)
  2068  	}
  2069  
  2070  	switch GetCurrentIntegration() {
  2071  	case CIIntegrationFlannel:
  2072  	default:
  2073  		err = kub.ciliumHealthPreFlightCheck()
  2074  		if err != nil {
  2075  			return fmt.Errorf("connectivity health is failing: %s", err)
  2076  		}
  2077  	}
  2078  	err = kub.fillServiceCache()
  2079  	if err != nil {
  2080  		return fmt.Errorf("unable to fill service cache: %s", err)
  2081  	}
  2082  	err = kub.ciliumServicePreFlightCheck()
  2083  	if err != nil {
  2084  		return fmt.Errorf("cilium services are not set up correctly: %s", err)
  2085  	}
  2086  	err = kub.servicePreFlightCheck("kubernetes", "default")
  2087  	if err != nil {
  2088  		return fmt.Errorf("kubernetes service is not ready: %s", err)
  2089  	}
  2090  
  2091  	return nil
  2092  }
  2093  
  2094  // CiliumPreFlightCheck specify that it checks that various subsystems within
  2095  // Cilium are in a good state. If one of the multiple preflight fails it'll
  2096  // return an error.
  2097  func (kub *Kubectl) CiliumPreFlightCheck() error {
  2098  	ginkgoext.By("Performing Cilium preflight check")
  2099  	// Doing this withTimeout because the Status can be ready, but the other
  2100  	// nodes cannot be show up yet, and the cilium-health can fail as a false positive.
  2101  	var (
  2102  		lastError           string
  2103  		consecutiveFailures int
  2104  	)
  2105  
  2106  	body := func() bool {
  2107  		if err := kub.ciliumPreFlightCheck(); err != nil {
  2108  			newError := err.Error()
  2109  			if lastError != newError || consecutiveFailures >= 5 {
  2110  				ginkgoext.GinkgoPrint("Cilium is not ready yet: %s", newError)
  2111  				lastError = newError
  2112  				consecutiveFailures = 0
  2113  			} else {
  2114  				consecutiveFailures++
  2115  			}
  2116  			return false
  2117  		}
  2118  		return true
  2119  
  2120  	}
  2121  	timeoutErr := WithTimeout(body, "PreflightCheck failed", &TimeoutConfig{Timeout: HelperTimeout})
  2122  	if timeoutErr != nil {
  2123  		return fmt.Errorf("CiliumPreFlightCheck error: %s: Last polled error: %s", timeoutErr, lastError)
  2124  	}
  2125  	return nil
  2126  }
  2127  
  2128  func (kub *Kubectl) ciliumStatusPreFlightCheck() error {
  2129  	ginkgoext.By("Performing Cilium status preflight check")
  2130  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2131  	if err != nil {
  2132  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2133  	}
  2134  	for _, pod := range ciliumPods {
  2135  		status := kub.CiliumExec(pod, "cilium status --all-health --all-nodes")
  2136  		if !status.WasSuccessful() {
  2137  			return fmt.Errorf("cilium-agent '%s' is unhealthy: %s", pod, status.OutputPrettyPrint())
  2138  		}
  2139  		noQuorum, err := regexp.Match(`^.*KVStore:.*has-quorum=false.*$`, status.Output().Bytes())
  2140  		if err != nil {
  2141  			return fmt.Errorf("Failed to check for kvstore quorum: %s", err.Error())
  2142  		}
  2143  		if noQuorum {
  2144  			return fmt.Errorf("KVStore doesn't have quorum: %s", status.OutputPrettyPrint())
  2145  		}
  2146  	}
  2147  
  2148  	return nil
  2149  }
  2150  
  2151  func (kub *Kubectl) ciliumControllersPreFlightCheck() error {
  2152  	ginkgoext.By("Performing Cilium controllers preflight check")
  2153  	var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}{"\n"}{end}`
  2154  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2155  	if err != nil {
  2156  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2157  	}
  2158  	for _, pod := range ciliumPods {
  2159  		status := kub.CiliumExec(pod, fmt.Sprintf(
  2160  			"cilium status --all-controllers -o jsonpath='%s'", controllersFilter))
  2161  		if !status.WasSuccessful() {
  2162  			return fmt.Errorf("cilium-agent '%s': Cannot run cilium status: %s",
  2163  				pod, status.OutputPrettyPrint())
  2164  		}
  2165  		for controller, status := range status.KVOutput() {
  2166  			if status != "0" {
  2167  				failmsg := kub.CiliumExec(pod, "cilium status --all-controllers")
  2168  				return fmt.Errorf("cilium-agent '%s': controller %s is failing: %s",
  2169  					pod, controller, failmsg.OutputPrettyPrint())
  2170  			}
  2171  		}
  2172  	}
  2173  
  2174  	return nil
  2175  }
  2176  
  2177  func (kub *Kubectl) ciliumHealthPreFlightCheck() error {
  2178  	ginkgoext.By("Performing Cilium health check")
  2179  	var nodesFilter = `{.nodes[*].name}`
  2180  	var statusFilter = `{range .nodes[*]}{.name}{"="}{.host.primary-address.http.status}{"\n"}{end}`
  2181  
  2182  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2183  	if err != nil {
  2184  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2185  	}
  2186  	for _, pod := range ciliumPods {
  2187  		status := kub.CiliumExec(pod, "cilium-health status -o json --probe")
  2188  		if !status.WasSuccessful() {
  2189  			return fmt.Errorf(
  2190  				"Cluster connectivity is unhealthy on '%s': %s",
  2191  				pod, status.OutputPrettyPrint())
  2192  		}
  2193  
  2194  		// By Checking that the node list is the same
  2195  		nodes, err := status.Filter(nodesFilter)
  2196  		if err != nil {
  2197  			return fmt.Errorf("Cannot unmarshal health status: %s", err)
  2198  		}
  2199  
  2200  		nodeCount := strings.Split(nodes.String(), " ")
  2201  		if len(ciliumPods) != len(nodeCount) {
  2202  			return fmt.Errorf(
  2203  				"cilium-agent '%s': Only %d/%d nodes appeared in cilium-health status. nodes = '%+v'",
  2204  				pod, len(nodeCount), len(ciliumPods), nodeCount)
  2205  		}
  2206  
  2207  		healthStatus, err := status.Filter(statusFilter)
  2208  		if err != nil {
  2209  			return fmt.Errorf("Cannot unmarshal health status: %s", err)
  2210  		}
  2211  
  2212  		for node, status := range healthStatus.KVOutput() {
  2213  			if status != "" {
  2214  				return fmt.Errorf("cilium-agent '%s': connectivity to node '%s' is unhealthy: '%s'",
  2215  					pod, node, status)
  2216  			}
  2217  		}
  2218  	}
  2219  	return nil
  2220  }
  2221  
  2222  // serviceCache keeps service information from
  2223  // k8s, Cilium services and Cilium bpf load balancer map
  2224  type serviceCache struct {
  2225  	services  v1.ServiceList
  2226  	endpoints v1.EndpointsList
  2227  	pods      []ciliumPodServiceCache
  2228  }
  2229  
  2230  // ciliumPodServiceCache
  2231  type ciliumPodServiceCache struct {
  2232  	name          string
  2233  	services      []models.Service
  2234  	loadBalancers map[string][]string
  2235  }
  2236  
  2237  func (kub *Kubectl) fillServiceCache() error {
  2238  	cache := serviceCache{}
  2239  
  2240  	svcRes := kub.GetFromAllNS("service")
  2241  	err := svcRes.GetErr("Unable to get k8s services")
  2242  	if err != nil {
  2243  		return err
  2244  	}
  2245  	err = svcRes.Unmarshal(&cache.services)
  2246  
  2247  	if err != nil {
  2248  		return fmt.Errorf("Unable to unmarshal K8s services: %s", err.Error())
  2249  	}
  2250  
  2251  	epRes := kub.GetFromAllNS("endpoints")
  2252  	err = epRes.GetErr("Unable to get k8s endpoints")
  2253  	if err != nil {
  2254  		return err
  2255  	}
  2256  	err = epRes.Unmarshal(&cache.endpoints)
  2257  	if err != nil {
  2258  		return fmt.Errorf("Unable to unmarshal K8s endpoints: %s", err.Error())
  2259  	}
  2260  
  2261  	ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace)
  2262  	if err != nil {
  2263  		return fmt.Errorf("cannot retrieve cilium pods: %s", err)
  2264  	}
  2265  	ciliumSvcCmd := "cilium service list -o json"
  2266  	ciliumBpfLbCmd := "cilium bpf lb list -o json"
  2267  
  2268  	cache.pods = make([]ciliumPodServiceCache, 0, len(ciliumPods))
  2269  	for _, pod := range ciliumPods {
  2270  		podCache := ciliumPodServiceCache{name: pod}
  2271  
  2272  		ciliumServicesRes := kub.CiliumExec(pod, ciliumSvcCmd)
  2273  		err := ciliumServicesRes.GetErr(
  2274  			fmt.Sprintf("Unable to retrieve Cilium services on %s", pod))
  2275  		if err != nil {
  2276  			return err
  2277  		}
  2278  
  2279  		err = ciliumServicesRes.Unmarshal(&podCache.services)
  2280  		if err != nil {
  2281  			return fmt.Errorf("Unable to unmarshal Cilium services: %s", err.Error())
  2282  		}
  2283  
  2284  		ciliumLbRes := kub.CiliumExec(pod, ciliumBpfLbCmd)
  2285  		err = ciliumLbRes.GetErr(
  2286  			fmt.Sprintf("Unable to retrieve Cilium bpf lb list on %s", pod))
  2287  		if err != nil {
  2288  			return err
  2289  		}
  2290  
  2291  		err = ciliumLbRes.Unmarshal(&podCache.loadBalancers)
  2292  		if err != nil {
  2293  			return fmt.Errorf("Unable to unmarshal Cilium bpf lb list: %s", err.Error())
  2294  		}
  2295  		cache.pods = append(cache.pods, podCache)
  2296  	}
  2297  	kub.serviceCache = &cache
  2298  	return nil
  2299  }
  2300  
  2301  // KubeDNSPreFlightCheck makes sure that kube-dns is plumbed into Cilium.
  2302  func (kub *Kubectl) KubeDNSPreFlightCheck() error {
  2303  	err := kub.fillServiceCache()
  2304  	if err != nil {
  2305  		return err
  2306  	}
  2307  	return kub.servicePreFlightCheck("kube-dns", "kube-system")
  2308  }
  2309  
  2310  // servicePreFlightCheck makes sure that k8s service with given name and
  2311  // namespace is properly plumbed in Cilium
  2312  func (kub *Kubectl) servicePreFlightCheck(serviceName, serviceNamespace string) error {
  2313  	ginkgoext.By("Performing K8s service preflight check")
  2314  	var service *v1.Service
  2315  	for _, s := range kub.serviceCache.services.Items {
  2316  		if s.Name == serviceName && s.Namespace == serviceNamespace {
  2317  			service = &s
  2318  			break
  2319  		}
  2320  	}
  2321  
  2322  	if service == nil {
  2323  		return fmt.Errorf("%s/%s service not found in service cache", serviceName, serviceNamespace)
  2324  	}
  2325  
  2326  	for _, pod := range kub.serviceCache.pods {
  2327  
  2328  		err := validateK8sService(*service, kub.serviceCache.endpoints.Items, pod.services, pod.loadBalancers)
  2329  		if err != nil {
  2330  			return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error())
  2331  		}
  2332  	}
  2333  	return nil
  2334  }
  2335  
  2336  func validateK8sService(k8sService v1.Service, k8sEndpoints []v1.Endpoints, ciliumSvcs []models.Service, ciliumLB map[string][]string) error {
  2337  	var ciliumService *models.Service
  2338  CILIUM_SERVICES:
  2339  	for _, cSvc := range ciliumSvcs {
  2340  		if cSvc.Status.Realized.FrontendAddress.IP == k8sService.Spec.ClusterIP {
  2341  			for _, port := range k8sService.Spec.Ports {
  2342  				if int32(cSvc.Status.Realized.FrontendAddress.Port) == port.Port {
  2343  					ciliumService = &cSvc
  2344  					break CILIUM_SERVICES
  2345  				}
  2346  			}
  2347  		}
  2348  	}
  2349  
  2350  	if ciliumService == nil {
  2351  		return fmt.Errorf("Failed to find Cilium service corresponding to %s/%s k8s service", k8sService.Namespace, k8sService.Name)
  2352  	}
  2353  
  2354  	temp := map[string]bool{}
  2355  	err := validateCiliumSvc(*ciliumService, []v1.Service{k8sService}, k8sEndpoints, temp)
  2356  	if err != nil {
  2357  		return err
  2358  	}
  2359  	return validateCiliumSvcLB(*ciliumService, ciliumLB)
  2360  }
  2361  
  2362  // ciliumServicePreFlightCheck checks that k8s service is plumbed correctly
  2363  func (kub *Kubectl) ciliumServicePreFlightCheck() error {
  2364  	ginkgoext.By("Performing Cilium service preflight check")
  2365  	for _, pod := range kub.serviceCache.pods {
  2366  		k8sServicesFound := map[string]bool{}
  2367  
  2368  		for _, cSvc := range pod.services {
  2369  			err := validateCiliumSvc(cSvc, kub.serviceCache.services.Items, kub.serviceCache.endpoints.Items, k8sServicesFound)
  2370  			if err != nil {
  2371  				return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error())
  2372  			}
  2373  		}
  2374  
  2375  		notFoundServices := make([]string, 0, len(kub.serviceCache.services.Items))
  2376  		for _, k8sSvc := range kub.serviceCache.services.Items {
  2377  			key := serviceKey(k8sSvc)
  2378  			// ignore headless services
  2379  			if k8sSvc.Spec.Type == v1.ServiceTypeClusterIP &&
  2380  				k8sSvc.Spec.ClusterIP == v1.ClusterIPNone {
  2381  				continue
  2382  			}
  2383  			// TODO(brb) check NodePort services
  2384  			if k8sSvc.Spec.Type == v1.ServiceTypeNodePort {
  2385  				continue
  2386  			}
  2387  			if _, ok := k8sServicesFound[key]; !ok {
  2388  				notFoundServices = append(notFoundServices, key)
  2389  			}
  2390  		}
  2391  
  2392  		if len(notFoundServices) > 0 {
  2393  			return fmt.Errorf("Failed to find Cilium service corresponding to k8s services %s on pod %v",
  2394  				strings.Join(notFoundServices, ", "), pod)
  2395  		}
  2396  
  2397  		for _, cSvc := range pod.services {
  2398  			err := validateCiliumSvcLB(cSvc, pod.loadBalancers)
  2399  			if err != nil {
  2400  				return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error())
  2401  			}
  2402  		}
  2403  		if len(pod.services) != len(pod.loadBalancers) {
  2404  			return fmt.Errorf("Length of Cilium services doesn't match length of bpf LB map on pod %v", pod)
  2405  		}
  2406  	}
  2407  	return nil
  2408  }
  2409  
  2410  // DeleteETCDOperator delete the etcd-operator from the cluster pointed by kub.
  2411  func (kub *Kubectl) DeleteETCDOperator() {
  2412  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete crd etcdclusters.etcd.database.coreos.com", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2413  		log.Warningf("Unable to delete etcdclusters.etcd.database.coreos.com CRD: %s", res.OutputPrettyPrint())
  2414  	}
  2415  
  2416  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete deployment cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2417  		log.Warningf("Unable to delete cilium-etcd-operator Deployment: %s", res.OutputPrettyPrint())
  2418  	}
  2419  
  2420  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2421  		log.Warningf("Unable to delete cilium-etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint())
  2422  	}
  2423  
  2424  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2425  		log.Warningf("Unable to delete cilium-etcd-operator ClusterRole: %s", res.OutputPrettyPrint())
  2426  	}
  2427  
  2428  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2429  		log.Warningf("Unable to delete cilium-etcd-operator ServiceAccount: %s", res.OutputPrettyPrint())
  2430  	}
  2431  
  2432  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2433  		log.Warningf("Unable to delete etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint())
  2434  	}
  2435  
  2436  	if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole etcd-operator", KubectlCmd)); !res.WasSuccessful() {
  2437  		log.Warningf("Unable to delete etcd-operator ClusterRole: %s", res.OutputPrettyPrint())
  2438  	}
  2439  
  2440  	if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-sa", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() {
  2441  		log.Warningf("Unable to delete cilium-etcd-sa ServiceAccount: %s", res.OutputPrettyPrint())
  2442  	}
  2443  }
  2444  
  2445  func serviceKey(s v1.Service) string {
  2446  	return s.Namespace + "/" + s.Name
  2447  }
  2448  
  2449  // validateCiliumSvc checks if given Cilium service has corresponding k8s services and endpoints in given slices
  2450  func validateCiliumSvc(cSvc models.Service, k8sSvcs []v1.Service, k8sEps []v1.Endpoints, k8sServicesFound map[string]bool) error {
  2451  	var k8sService *v1.Service
  2452  
  2453  	// TODO(brb) validate NodePort services
  2454  	if cSvc.Status.Realized.Flags != nil && cSvc.Status.Realized.Flags.NodePort {
  2455  		return nil
  2456  	}
  2457  
  2458  	for _, k8sSvc := range k8sSvcs {
  2459  		if k8sSvc.Spec.ClusterIP == cSvc.Status.Realized.FrontendAddress.IP {
  2460  			k8sService = &k8sSvc
  2461  			break
  2462  		}
  2463  	}
  2464  	if k8sService == nil {
  2465  		return fmt.Errorf("Could not find Cilium service with ip %s in k8s", cSvc.Spec.FrontendAddress.IP)
  2466  	}
  2467  
  2468  	var k8sServicePort *v1.ServicePort
  2469  	for _, k8sPort := range k8sService.Spec.Ports {
  2470  		if k8sPort.Port == int32(cSvc.Status.Realized.FrontendAddress.Port) {
  2471  			k8sServicePort = &k8sPort
  2472  			k8sServicesFound[serviceKey(*k8sService)] = true
  2473  			break
  2474  		}
  2475  	}
  2476  	if k8sServicePort == nil {
  2477  		return fmt.Errorf("Could not find Cilium service with address %s:%d in k8s", cSvc.Spec.FrontendAddress.IP, cSvc.Spec.FrontendAddress.Port)
  2478  	}
  2479  
  2480  	for _, backAddr := range cSvc.Status.Realized.BackendAddresses {
  2481  		foundEp := false
  2482  		for _, k8sEp := range k8sEps {
  2483  			for _, epAddr := range getK8sEndpointAddresses(k8sEp) {
  2484  				if addrsEqual(backAddr, epAddr) {
  2485  					foundEp = true
  2486  				}
  2487  			}
  2488  		}
  2489  		if !foundEp {
  2490  			return fmt.Errorf(
  2491  				"Could not match cilium service backend address %s:%d with k8s endpoint",
  2492  				*backAddr.IP, backAddr.Port)
  2493  		}
  2494  	}
  2495  	return nil
  2496  }
  2497  
  2498  func validateCiliumSvcLB(cSvc models.Service, lbMap map[string][]string) error {
  2499  	frontendAddress := cSvc.Status.Realized.FrontendAddress.IP + ":" + strconv.Itoa(int(cSvc.Status.Realized.FrontendAddress.Port))
  2500  	bpfBackends, ok := lbMap[frontendAddress]
  2501  	if !ok {
  2502  		return fmt.Errorf("%s bpf lb map entry not found", frontendAddress)
  2503  	}
  2504  
  2505  BACKENDS:
  2506  	for _, addr := range cSvc.Status.Realized.BackendAddresses {
  2507  		backend := *addr.IP + ":" + strconv.Itoa(int(addr.Port))
  2508  		for _, bpfAddr := range bpfBackends {
  2509  			if strings.Contains(bpfAddr, backend) {
  2510  				continue BACKENDS
  2511  			}
  2512  		}
  2513  		return fmt.Errorf("%s not found in bpf map", backend)
  2514  	}
  2515  	return nil
  2516  }
  2517  
  2518  func getK8sEndpointAddresses(ep v1.Endpoints) []*models.BackendAddress {
  2519  	result := []*models.BackendAddress{}
  2520  	for _, subset := range ep.Subsets {
  2521  		for _, addr := range subset.Addresses {
  2522  			ip := addr.IP
  2523  			for _, port := range subset.Ports {
  2524  				ba := &models.BackendAddress{
  2525  					IP:   &ip,
  2526  					Port: uint16(port.Port),
  2527  				}
  2528  				result = append(result, ba)
  2529  			}
  2530  		}
  2531  	}
  2532  	return result
  2533  }
  2534  
  2535  func addrsEqual(addr1, addr2 *models.BackendAddress) bool {
  2536  	return *addr1.IP == *addr2.IP && addr1.Port == addr2.Port
  2537  }
  2538  
  2539  // GenerateNamespaceForTest generates a namespace based off of the current test
  2540  // which is running.
  2541  func GenerateNamespaceForTest() string {
  2542  	lowered := strings.ToLower(ginkgoext.CurrentGinkgoTestDescription().FullTestText)
  2543  	// K8s namespaces cannot have spaces.
  2544  	replaced := strings.Replace(lowered, " ", "", -1)
  2545  	return replaced
  2546  }