github.phpd.cn/cilium/cilium@v1.6.12/test/helpers/kubectl.go (about) 1 // Copyright 2018-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package helpers 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/json" 21 "fmt" 22 "io/ioutil" 23 "os" 24 "path/filepath" 25 "regexp" 26 "strconv" 27 "strings" 28 "sync" 29 "text/tabwriter" 30 "time" 31 32 "github.com/cilium/cilium/api/v1/models" 33 "github.com/cilium/cilium/pkg/annotation" 34 cnpv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 35 "github.com/cilium/cilium/test/config" 36 "github.com/cilium/cilium/test/ginkgo-ext" 37 "github.com/cilium/cilium/test/helpers/logutils" 38 39 "github.com/asaskevich/govalidator" 40 "github.com/sirupsen/logrus" 41 "k8s.io/api/core/v1" 42 ) 43 44 const ( 45 // KubectlCmd Kubernetes controller command 46 KubectlCmd = "kubectl" 47 manifestsPath = "k8sT/manifests/" 48 descriptorsPath = "../examples/kubernetes" 49 kubeDNSLabel = "k8s-app=kube-dns" 50 51 // DNSHelperTimeout is a predefined timeout value for K8s DNS commands. It 52 // must be larger than 5 minutes because kubedns has a hardcoded resync 53 // period of 5 minutes. We have experienced test failures because kubedns 54 // needed this time to recover from a connection problem to kube-apiserver. 55 // The kubedns resyncPeriod is defined at 56 // https://github.com/kubernetes/dns/blob/80fdd88276adba36a87c4f424b66fdf37cd7c9a8/pkg/dns/dns.go#L53 57 DNSHelperTimeout = 7 * time.Minute 58 59 // EnableMicroscope is true when microscope should be enabled 60 EnableMicroscope = false 61 62 // CIIntegrationFlannel contains the constant to be used when flannel is 63 // used in the CI. 64 CIIntegrationFlannel = "flannel" 65 ) 66 67 var ( 68 defaultHelmOptions = map[string]string{ 69 "global.registry": "k8s1:5000/cilium", 70 "agent.image": "cilium-dev", 71 "global.tag": "latest", 72 "operator.image": "operator", 73 "operator.tag": "latest", 74 "managed-etcd.registry": "docker.io/cilium", 75 "global.debug.enabled": "true", 76 "global.k8s.requireIPv4PodCIDR": "true", 77 "global.pprof.enabled": "true", 78 "global.logSystemLoad": "true", 79 "global.bpf.preallocateMaps": "true", 80 "global.etcd.leaseTTL": "30s", 81 "global.ipv4.enabled": "true", 82 "global.ipv6.enabled": "true", 83 } 84 85 flannelHelmOverrides = map[string]string{ 86 "global.flannel.enabled": "true", 87 "global.ipv6.enabled": "false", 88 "global.tunnel": "disabled", 89 } 90 ) 91 92 // GetCurrentK8SEnv returns the value of K8S_VERSION from the OS environment. 93 func GetCurrentK8SEnv() string { return os.Getenv("K8S_VERSION") } 94 95 // GetCurrentIntegration returns CI integration set up to run against Cilium. 96 func GetCurrentIntegration() string { 97 switch strings.ToLower(os.Getenv("CNI_INTEGRATION")) { 98 case CIIntegrationFlannel: 99 return CIIntegrationFlannel 100 default: 101 return "" 102 } 103 } 104 105 // Kubectl is a wrapper around an SSHMeta. It is used to run Kubernetes-specific 106 // commands on the node which is accessible via the SSH metadata stored in its 107 // SSHMeta. 108 type Kubectl struct { 109 *SSHMeta 110 *serviceCache 111 } 112 113 // CreateKubectl initializes a Kubectl helper with the provided vmName and log 114 // It marks the test as Fail if cannot get the ssh meta information or cannot 115 // execute a `ls` on the virtual machine. 116 func CreateKubectl(vmName string, log *logrus.Entry) *Kubectl { 117 node := GetVagrantSSHMeta(vmName) 118 if node == nil { 119 ginkgoext.Fail(fmt.Sprintf("Cannot connect to vmName '%s'", vmName), 1) 120 return nil 121 } 122 // This `ls` command is a sanity check, sometimes the meta ssh info is not 123 // nil but new commands cannot be executed using SSH, tests failed and it 124 // was hard to debug. 125 res := node.ExecShort("ls /tmp/") 126 if !res.WasSuccessful() { 127 ginkgoext.Fail(fmt.Sprintf( 128 "Cannot execute ls command on vmName '%s'", vmName), 1) 129 return nil 130 } 131 node.logger = log 132 133 return &Kubectl{ 134 SSHMeta: node, 135 } 136 } 137 138 // CepGet returns the endpoint model for the given pod name in the specified 139 // namespaces. If the pod is not present it returns nil 140 func (kub *Kubectl) CepGet(namespace string, pod string) *cnpv2.EndpointStatus { 141 log := kub.logger.WithFields(logrus.Fields{ 142 "cep": pod, 143 "namespace": namespace}) 144 145 cmd := fmt.Sprintf("%s -n %s get cep %s -o json | jq '.status'", KubectlCmd, namespace, pod) 146 res := kub.ExecShort(cmd) 147 if !res.WasSuccessful() { 148 log.Debug("cep is not present") 149 return nil 150 } 151 152 var data *cnpv2.EndpointStatus 153 err := res.Unmarshal(&data) 154 if err != nil { 155 log.WithError(err).Error("cannot Unmarshal json") 156 return nil 157 } 158 return data 159 } 160 161 // GetNumNodes returns the number of Kubernetes nodes running 162 func (kub *Kubectl) GetNumNodes() int { 163 getNodesCmd := fmt.Sprintf("%s get nodes -o jsonpath='{.items.*.metadata.name}'", KubectlCmd) 164 res := kub.ExecShort(getNodesCmd) 165 if !res.WasSuccessful() { 166 return 0 167 } 168 169 return len(strings.Split(res.SingleOut(), " ")) 170 } 171 172 // ExecKafkaPodCmd executes shell command with arguments arg in the specified pod residing in the specified 173 // namespace. It returns the stdout of the command that was executed. 174 // The kafka producer and consumer scripts do not return error if command 175 // leads to TopicAuthorizationException or any other error. Hence the 176 // function needs to also take into account the stderr messages returned. 177 func (kub *Kubectl) ExecKafkaPodCmd(namespace string, pod string, arg string) error { 178 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, arg) 179 res := kub.Exec(command) 180 if !res.WasSuccessful() { 181 return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed %s", 182 res.GetCmd(), res.OutputPrettyPrint()) 183 } 184 185 if strings.Contains(res.GetStdErr(), "ERROR") { 186 return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed '%s'", 187 res.GetCmd(), res.OutputPrettyPrint()) 188 } 189 return nil 190 } 191 192 // ExecPodCmd executes command cmd in the specified pod residing in the specified 193 // namespace. It returns a pointer to CmdRes with all the output 194 func (kub *Kubectl) ExecPodCmd(namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes { 195 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 196 return kub.Exec(command, options...) 197 } 198 199 // ExecPodCmdContext synchronously executes command cmd in the specified pod residing in the 200 // specified namespace. It returns a pointer to CmdRes with all the output. 201 func (kub *Kubectl) ExecPodCmdContext(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes { 202 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 203 return kub.ExecContext(ctx, command, options...) 204 } 205 206 // ExecPodCmdBackground executes command cmd in background in the specified pod residing 207 // in the specified namespace. It returns a pointer to CmdRes with all the 208 // output 209 // 210 // To receive the output of this function, the caller must invoke either 211 // kub.WaitUntilFinish() or kub.WaitUntilMatch() then subsequently fetch the 212 // output out of the result. 213 func (kub *Kubectl) ExecPodCmdBackground(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes { 214 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 215 return kub.ExecInBackground(ctx, command, options...) 216 } 217 218 // Get retrieves the provided Kubernetes objects from the specified namespace. 219 func (kub *Kubectl) Get(namespace string, command string) *CmdRes { 220 return kub.ExecShort(fmt.Sprintf( 221 "%s -n %s get %s -o json", KubectlCmd, namespace, command)) 222 } 223 224 // GetFromAllNS retrieves provided Kubernetes objects from all namespaces 225 func (kub *Kubectl) GetFromAllNS(kind string) *CmdRes { 226 return kub.ExecShort(fmt.Sprintf( 227 "%s get %s --all-namespaces -o json", KubectlCmd, kind)) 228 } 229 230 // GetCNP retrieves the output of `kubectl get cnp` in the given namespace for 231 // the given CNP and return a CNP struct. If the CNP does not exists or cannot 232 // unmarshal the Json output will return nil. 233 func (kub *Kubectl) GetCNP(namespace string, cnp string) *cnpv2.CiliumNetworkPolicy { 234 log := kub.logger.WithFields(logrus.Fields{ 235 "fn": "GetCNP", 236 "cnp": cnp, 237 "ns": namespace, 238 }) 239 res := kub.Get(namespace, fmt.Sprintf("cnp %s", cnp)) 240 if !res.WasSuccessful() { 241 log.WithField("error", res.CombineOutput()).Info("cannot get CNP") 242 return nil 243 } 244 var result cnpv2.CiliumNetworkPolicy 245 err := res.Unmarshal(&result) 246 if err != nil { 247 log.WithError(err).Errorf("cannot unmarshal CNP output") 248 return nil 249 } 250 return &result 251 } 252 253 func (kub *Kubectl) WaitForCRDCount(filter string, count int, timeout time.Duration) error { 254 // Set regexp flag m for multi-line matching, then add the 255 // matches for beginning and end of a line, so that we count 256 // at most one match per line (like "grep <filter> | wc -l") 257 regex := regexp.MustCompile("(?m:^.*(?:" + filter + ").*$)") 258 body := func() bool { 259 res := kub.ExecShort(fmt.Sprintf("%s get crds", KubectlCmd)) 260 if !res.WasSuccessful() { 261 log.Error(res.GetErr("kubectl get crds failed")) 262 return false 263 } 264 return len(regex.FindAllString(res.GetStdOut(), -1)) == count 265 } 266 return WithTimeout( 267 body, 268 fmt.Sprintf("timed out waiting for %d CRDs matching filter \"%s\" to be ready", count, filter), 269 &TimeoutConfig{Timeout: timeout}) 270 } 271 272 // GetPods gets all of the pods in the given namespace that match the provided 273 // filter. 274 func (kub *Kubectl) GetPods(namespace string, filter string) *CmdRes { 275 return kub.ExecShort(fmt.Sprintf("%s -n %s get pods %s -o json", KubectlCmd, namespace, filter)) 276 } 277 278 // GetPodsNodes returns a map with pod name as a key and node name as value. It 279 // only gets pods in the given namespace that match the provided filter. It 280 // returns an error if pods cannot be retrieved correctly 281 func (kub *Kubectl) GetPodsNodes(namespace string, filter string) (map[string]string, error) { 282 jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.spec.nodeName}{"\n"}{end}` 283 res := kub.Exec(fmt.Sprintf("%s -n %s get pods %s -o jsonpath='%s'", 284 KubectlCmd, namespace, filter, jsonFilter)) 285 if !res.WasSuccessful() { 286 return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput()) 287 } 288 return res.KVOutput(), nil 289 } 290 291 // GetPodsIPs returns a map with pod name as a key and pod IP name as value. It 292 // only gets pods in the given namespace that match the provided filter. It 293 // returns an error if pods cannot be retrieved correctly 294 func (kub *Kubectl) GetPodsIPs(namespace string, filter string) (map[string]string, error) { 295 jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.status.podIP}{"\n"}{end}` 296 res := kub.ExecShort(fmt.Sprintf("%s -n %s get pods -l %s -o jsonpath='%s'", 297 KubectlCmd, namespace, filter, jsonFilter)) 298 if !res.WasSuccessful() { 299 return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput()) 300 } 301 return res.KVOutput(), nil 302 } 303 304 // GetEndpoints gets all of the endpoints in the given namespace that match the 305 // provided filter. 306 func (kub *Kubectl) GetEndpoints(namespace string, filter string) *CmdRes { 307 return kub.ExecShort(fmt.Sprintf("%s -n %s get endpoints %s -o json", KubectlCmd, namespace, filter)) 308 } 309 310 // GetAllPods returns a slice of all pods present in Kubernetes cluster, along 311 // with an error if the pods could not be retrieved via `kubectl`, or if the 312 // pod objects are unable to be marshaled from JSON. 313 func (kub *Kubectl) GetAllPods(ctx context.Context, options ...ExecOptions) ([]v1.Pod, error) { 314 var ops ExecOptions 315 if len(options) > 0 { 316 ops = options[0] 317 } 318 319 getPodsCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout) 320 defer cancel() 321 322 var podsList v1.List 323 err := kub.ExecContext(getPodsCtx, 324 fmt.Sprintf("%s get pods --all-namespaces -o json", KubectlCmd), 325 ExecOptions{SkipLog: ops.SkipLog}).Unmarshal(&podsList) 326 if err != nil { 327 return nil, err 328 } 329 330 pods := make([]v1.Pod, len(podsList.Items)) 331 for _, item := range podsList.Items { 332 var pod v1.Pod 333 err = json.Unmarshal(item.Raw, &pod) 334 if err != nil { 335 return nil, err 336 } 337 pods = append(pods, pod) 338 } 339 340 return pods, nil 341 } 342 343 // GetPodNames returns the names of all of the pods that are labeled with label 344 // in the specified namespace, along with an error if the pod names cannot be 345 // retrieved. 346 func (kub *Kubectl) GetPodNames(namespace string, label string) ([]string, error) { 347 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 348 defer cancel() 349 return kub.GetPodNamesContext(ctx, namespace, label) 350 } 351 352 // GetPodNamesContext returns the names of all of the pods that are labeled with 353 // label in the specified namespace, along with an error if the pod names cannot 354 // be retrieved. 355 func (kub *Kubectl) GetPodNamesContext(ctx context.Context, namespace string, label string) ([]string, error) { 356 stdout := new(bytes.Buffer) 357 filter := "-o jsonpath='{.items[*].metadata.name}'" 358 359 cmd := fmt.Sprintf("%s -n %s get pods -l %s %s", KubectlCmd, namespace, label, filter) 360 361 // Taking more than 30 seconds to get pods means that something is wrong 362 // connecting to the node. 363 podNamesCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout) 364 defer cancel() 365 err := kub.ExecuteContext(podNamesCtx, cmd, stdout, nil) 366 367 if err != nil { 368 return nil, fmt.Errorf( 369 "could not find pods in namespace '%v' with label '%v': %s", namespace, label, err) 370 } 371 372 out := strings.Trim(stdout.String(), "\n") 373 if len(out) == 0 { 374 //Small hack. String split always return an array with an empty string 375 return []string{}, nil 376 } 377 return strings.Split(out, " "), nil 378 } 379 380 // GetServiceHostPort returns the host and the first port for the given service name. 381 // It will return an error if service cannot be retrieved. 382 func (kub *Kubectl) GetServiceHostPort(namespace string, service string) (string, int, error) { 383 var data v1.Service 384 err := kub.Get(namespace, fmt.Sprintf("service %s", service)).Unmarshal(&data) 385 if err != nil { 386 return "", 0, err 387 } 388 if len(data.Spec.Ports) == 0 { 389 return "", 0, fmt.Errorf("Service '%s' does not have ports defined", service) 390 } 391 return data.Spec.ClusterIP, int(data.Spec.Ports[0].Port), nil 392 } 393 394 // Logs returns a CmdRes with containing the resulting metadata from the 395 // execution of `kubectl logs <pod> -n <namespace>`. 396 func (kub *Kubectl) Logs(namespace string, pod string) *CmdRes { 397 return kub.Exec( 398 fmt.Sprintf("%s -n %s logs %s", KubectlCmd, namespace, pod)) 399 } 400 401 // MicroscopeStart installs (if it is not installed) a new microscope pod, 402 // waits until pod is ready, and runs microscope in background. It returns an 403 // error in the case where microscope cannot be installed, or it is not ready after 404 // a timeout. It also returns a callback function to stop the monitor and save 405 // the output to `helpers.monitorLogFileName` file. Takes an optional list of 406 // arguments to pass to mircoscope. 407 func (kub *Kubectl) MicroscopeStart(microscopeOptions ...string) (error, func() error) { 408 if !EnableMicroscope { 409 return nil, func() error { return nil } 410 } 411 412 microscope := "microscope" 413 var microscopeCmd string 414 if len(microscopeOptions) == 0 { 415 microscopeCmd = "microscope" 416 } else { 417 microscopeCmd = fmt.Sprintf("%s %s", microscope, strings.Join(microscopeOptions, " ")) 418 } 419 var microscopeCmdWithTimestamps = microscopeCmd + "| ts '[%Y-%m-%d %H:%M:%S]'" 420 var cb = func() error { return nil } 421 cmd := fmt.Sprintf("%[1]s -ti -n %[2]s exec %[3]s -- %[4]s", 422 KubectlCmd, KubeSystemNamespace, microscope, microscopeCmdWithTimestamps) 423 microscopePath := ManifestGet(microscopeManifest) 424 _ = kub.ApplyDefault(microscopePath) 425 426 err := kub.WaitforPods( 427 KubeSystemNamespace, 428 fmt.Sprintf("-l k8s-app=%s", microscope), 429 HelperTimeout) 430 if err != nil { 431 return err, cb 432 } 433 434 ctx, cancel := context.WithCancel(context.Background()) 435 res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true}) 436 437 cb = func() error { 438 cancel() 439 <-ctx.Done() 440 testPath, err := CreateReportDirectory() 441 if err != nil { 442 kub.logger.WithError(err).Errorf( 443 "cannot create test results path '%s'", testPath) 444 return err 445 } 446 447 err = WriteOrAppendToFile( 448 filepath.Join(testPath, MonitorLogFileName), 449 res.CombineOutput().Bytes(), 450 LogPerm) 451 if err != nil { 452 log.WithError(err).Errorf("cannot create monitor log file") 453 return err 454 } 455 res := kub.Exec(fmt.Sprintf("%s -n %s delete pod --grace-period=0 --force microscope", KubectlCmd, KubeSystemNamespace)) 456 if !res.WasSuccessful() { 457 return fmt.Errorf("error deleting microscope pod: %s", res.OutputPrettyPrint()) 458 } 459 return nil 460 } 461 462 return nil, cb 463 } 464 465 // MonitorStart runs cilium monitor in the background and dumps the contents 466 // into a log file for later debugging 467 func (kub *Kubectl) MonitorStart(namespace, pod, filename string) func() error { 468 cmd := fmt.Sprintf("%s exec -n %s %s -- cilium monitor -v", KubectlCmd, namespace, pod) 469 ctx, cancel := context.WithCancel(context.Background()) 470 res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true}) 471 472 cb := func() error { 473 cancel() 474 <-ctx.Done() 475 testPath, err := CreateReportDirectory() 476 if err != nil { 477 kub.logger.WithError(err).Errorf( 478 "cannot create test results path '%s'", testPath) 479 return err 480 } 481 482 err = WriteOrAppendToFile( 483 filepath.Join(testPath, filename), 484 res.CombineOutput().Bytes(), 485 LogPerm) 486 if err != nil { 487 log.WithError(err).Errorf("cannot create monitor log file %s", filename) 488 return err 489 } 490 return nil 491 } 492 493 return cb 494 } 495 496 // BackgroundReport dumps the result of the given commands on cilium pods each 497 // five seconds. 498 func (kub *Kubectl) BackgroundReport(commands ...string) (context.CancelFunc, error) { 499 backgroundCtx, cancel := context.WithCancel(context.Background()) 500 pods, err := kub.GetCiliumPods(KubeSystemNamespace) 501 if err != nil { 502 return cancel, fmt.Errorf("Cannot retrieve cilium pods: %s", err) 503 } 504 retrieveInfo := func() { 505 for _, pod := range pods { 506 for _, cmd := range commands { 507 kub.CiliumExec(pod, cmd) 508 } 509 } 510 } 511 go func(ctx context.Context) { 512 ticker := time.NewTicker(5 * time.Second) 513 defer ticker.Stop() 514 for { 515 select { 516 case <-ctx.Done(): 517 return 518 case <-ticker.C: 519 retrieveInfo() 520 } 521 } 522 }(backgroundCtx) 523 return cancel, nil 524 } 525 526 // PprofReport runs pprof on cilium nodes each 5 minutes and saves the data 527 // into the test folder saved with pprof suffix. 528 func (kub *Kubectl) PprofReport() { 529 PProfCadence := 5 * time.Minute 530 ticker := time.NewTicker(PProfCadence) 531 log := kub.logger.WithField("subsys", "pprofReport") 532 533 retrievePProf := func(pod, testPath string) { 534 res := kub.ExecPodCmd(KubeSystemNamespace, pod, "gops pprof-cpu 1") 535 if !res.WasSuccessful() { 536 log.Errorf("cannot execute pprof: %s", res.OutputPrettyPrint()) 537 return 538 } 539 files := kub.ExecPodCmd(KubeSystemNamespace, pod, `ls -1 /tmp/`) 540 for _, file := range files.ByLines() { 541 if !strings.Contains(file, "profile") { 542 continue 543 } 544 545 dest := filepath.Join( 546 BasePath, testPath, 547 fmt.Sprintf("%s-profile-%s.pprof", pod, file)) 548 _ = kub.Exec(fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s %[5]s", 549 KubectlCmd, KubeSystemNamespace, pod, file, dest), 550 ExecOptions{SkipLog: true}) 551 552 _ = kub.ExecPodCmd(KubeSystemNamespace, pod, fmt.Sprintf( 553 "rm %s", filepath.Join("/tmp/", file))) 554 } 555 } 556 557 for { 558 select { 559 case <-ticker.C: 560 561 testPath, err := CreateReportDirectory() 562 if err != nil { 563 log.WithError(err).Errorf("cannot create test result path '%s'", testPath) 564 return 565 } 566 567 pods, err := kub.GetCiliumPods(KubeSystemNamespace) 568 if err != nil { 569 log.Errorf("cannot get cilium pods") 570 } 571 572 for _, pod := range pods { 573 retrievePProf(pod, testPath) 574 } 575 576 } 577 } 578 } 579 580 // NodeCleanMetadata annotates each node in the Kubernetes cluster with the 581 // annotation.V4CIDRName and annotation.V6CIDRName annotations. It returns an 582 // error if the nodes cannot be retrieved via the Kubernetes API. 583 func (kub *Kubectl) NodeCleanMetadata() error { 584 metadata := []string{ 585 annotation.V4CIDRName, 586 annotation.V6CIDRName, 587 } 588 589 data := kub.ExecShort(fmt.Sprintf("%s get nodes -o jsonpath='{.items[*].metadata.name}'", KubectlCmd)) 590 if !data.WasSuccessful() { 591 return fmt.Errorf("could not get nodes via %s: %s", KubectlCmd, data.CombineOutput()) 592 } 593 for _, node := range strings.Split(data.Output().String(), " ") { 594 for _, label := range metadata { 595 kub.ExecShort(fmt.Sprintf("%s annotate --overwrite nodes %s %s=''", KubectlCmd, node, label)) 596 } 597 } 598 return nil 599 } 600 601 // NamespaceCreate creates a new Kubernetes namespace with the given name 602 func (kub *Kubectl) NamespaceCreate(name string) *CmdRes { 603 ginkgoext.By("Creating namespace %s", name) 604 return kub.ExecShort(fmt.Sprintf("%s create namespace %s", KubectlCmd, name)) 605 } 606 607 // NamespaceDelete deletes a given Kubernetes namespace 608 func (kub *Kubectl) NamespaceDelete(name string) *CmdRes { 609 return kub.ExecShort(fmt.Sprintf("%s delete namespace %s", KubectlCmd, name)) 610 } 611 612 // NamespaceLabel sets a label in a Kubernetes namespace 613 func (kub *Kubectl) NamespaceLabel(namespace string, label string) *CmdRes { 614 ginkgoext.By("Setting label %s in namespace %s", label, namespace) 615 return kub.ExecShort(fmt.Sprintf("%s label --overwrite namespace %s %s", KubectlCmd, namespace, label)) 616 } 617 618 // WaitforPods waits up until timeout seconds have elapsed for all pods in the 619 // specified namespace that match the provided JSONPath filter to have their 620 // containterStatuses equal to "ready". Returns true if all pods achieve 621 // the aforementioned desired state within timeout seconds. Returns false and 622 // an error if the command failed or the timeout was exceeded. 623 func (kub *Kubectl) WaitforPods(namespace string, filter string, timeout time.Duration) error { 624 return kub.waitForNPods(checkReady, namespace, filter, 0, timeout) 625 } 626 627 // checkPodStatusFunc returns true if the pod is in the desired state, or false 628 // otherwise. 629 type checkPodStatusFunc func(v1.Pod) bool 630 631 // checkRunning checks that the pods are running, but not necessarily ready. 632 func checkRunning(pod v1.Pod) bool { 633 if pod.Status.Phase != v1.PodRunning || pod.ObjectMeta.DeletionTimestamp != nil { 634 return false 635 } 636 return true 637 } 638 639 // checkReady determines whether the pods are running and ready. 640 func checkReady(pod v1.Pod) bool { 641 if !checkRunning(pod) { 642 return false 643 } 644 645 for _, container := range pod.Status.ContainerStatuses { 646 if !container.Ready { 647 return false 648 } 649 } 650 return true 651 } 652 653 // WaitforNPodsRunning waits up until timeout duration has elapsed for at least 654 // minRequired pods in the specified namespace that match the provided JSONPath 655 // filter to have their containterStatuses equal to "running". 656 // Returns no error if minRequired pods achieve the aforementioned desired 657 // state within timeout seconds. Returns an error if the command failed or the 658 // timeout was exceeded. 659 // When minRequired is 0, the function will derive required pod count from number 660 // of pods in the cluster for every iteration. 661 func (kub *Kubectl) WaitforNPodsRunning(namespace string, filter string, minRequired int, timeout time.Duration) error { 662 return kub.waitForNPods(checkRunning, namespace, filter, minRequired, timeout) 663 } 664 665 // WaitforNPods waits up until timeout seconds have elapsed for at least 666 // minRequired pods in the specified namespace that match the provided JSONPath 667 // filter to have their containterStatuses equal to "ready". 668 // Returns no error if minRequired pods achieve the aforementioned desired 669 // state within timeout seconds. Returns an error if the command failed or the 670 // timeout was exceeded. 671 // When minRequired is 0, the function will derive required pod count from number 672 // of pods in the cluster for every iteration. 673 func (kub *Kubectl) WaitforNPods(namespace string, filter string, minRequired int, timeout time.Duration) error { 674 return kub.waitForNPods(checkReady, namespace, filter, minRequired, timeout) 675 } 676 677 func (kub *Kubectl) waitForNPods(checkStatus checkPodStatusFunc, namespace string, filter string, minRequired int, timeout time.Duration) error { 678 body := func() bool { 679 podList := &v1.PodList{} 680 err := kub.GetPods(namespace, filter).Unmarshal(podList) 681 if err != nil { 682 kub.logger.Infof("Error while getting PodList: %s", err) 683 return false 684 } 685 686 if len(podList.Items) == 0 { 687 return false 688 } 689 690 var required int 691 692 if minRequired == 0 { 693 required = len(podList.Items) 694 } else { 695 required = minRequired 696 } 697 698 if len(podList.Items) < required { 699 return false 700 } 701 702 // For each pod, count it as running when all conditions are true: 703 // - It is scheduled via Phase == v1.PodRunning 704 // - It is not scheduled for deletion when DeletionTimestamp is set 705 // - All containers in the pod have passed the liveness check via 706 // containerStatuses.Ready 707 currScheduled := 0 708 for _, pod := range podList.Items { 709 if checkStatus(pod) { 710 currScheduled++ 711 } 712 } 713 714 return currScheduled >= required 715 } 716 717 return WithTimeout( 718 body, 719 fmt.Sprintf("timed out waiting for pods with filter %s to be ready", filter), 720 &TimeoutConfig{Timeout: timeout}) 721 } 722 723 // WaitForServiceEndpoints waits up until timeout seconds have elapsed for all 724 // endpoints in the specified namespace that match the provided JSONPath 725 // filter. Returns true if all pods achieve the aforementioned desired state 726 // within timeout seconds. Returns false and an error if the command failed or 727 // the timeout was exceeded. 728 func (kub *Kubectl) WaitForServiceEndpoints(namespace string, filter string, service string, timeout time.Duration) error { 729 body := func() bool { 730 var jsonPath = fmt.Sprintf("{.items[?(@.metadata.name == '%s')].subsets[0].ports[0].port}", service) 731 data, err := kub.GetEndpoints(namespace, filter).Filter(jsonPath) 732 733 if err != nil { 734 kub.logger.WithError(err) 735 return false 736 } 737 738 if data.String() != "" { 739 return true 740 } 741 742 kub.logger.WithFields(logrus.Fields{ 743 "namespace": namespace, 744 "filter": filter, 745 "data": data, 746 "service": service, 747 }).Info("WaitForServiceEndpoints: service endpoint not ready") 748 return false 749 } 750 751 return WithTimeout(body, "could not get service endpoints", &TimeoutConfig{Timeout: timeout}) 752 } 753 754 // Action performs the specified ResourceLifeCycleAction on the Kubernetes 755 // manifest located at path filepath in the given namespace 756 func (kub *Kubectl) Action(action ResourceLifeCycleAction, filePath string, namespace ...string) *CmdRes { 757 if len(namespace) == 0 { 758 kub.logger.Debugf("performing '%v' on '%v'", action, filePath) 759 return kub.ExecShort(fmt.Sprintf("%s %s -f %s", KubectlCmd, action, filePath)) 760 } 761 762 kub.logger.Debugf("performing '%v' on '%v' in namespace '%v'", action, filePath, namespace[0]) 763 return kub.ExecShort(fmt.Sprintf("%s %s -f %s -n %s", KubectlCmd, action, filePath, namespace[0])) 764 } 765 766 // ApplyOptions stores options for kubectl apply command 767 type ApplyOptions struct { 768 FilePath string 769 Namespace string 770 Force bool 771 DryRun bool 772 Output string 773 Piped string 774 } 775 776 // Apply applies the Kubernetes manifest located at path filepath. 777 func (kub *Kubectl) Apply(options ApplyOptions) *CmdRes { 778 var force string 779 if options.Force { 780 force = "--force=true" 781 } else { 782 force = "--force=false" 783 } 784 785 cmd := fmt.Sprintf("%s apply %s -f %s", KubectlCmd, force, options.FilePath) 786 787 if options.DryRun { 788 cmd = cmd + " --dry-run" 789 } 790 791 if len(options.Output) > 0 { 792 cmd = cmd + " -o " + options.Output 793 } 794 795 if len(options.Namespace) == 0 { 796 kub.logger.Debugf("applying %s", options.FilePath) 797 } else { 798 kub.logger.Debugf("applying %s in namespace %s", options.FilePath, options.Namespace) 799 cmd = cmd + " -n " + options.Namespace 800 } 801 802 if len(options.Piped) > 0 { 803 cmd = options.Piped + " | " + cmd 804 } 805 return kub.ExecMiddle(cmd) 806 } 807 808 // ApplyDefault applies give filepath with other options set to default 809 func (kub *Kubectl) ApplyDefault(filePath string) *CmdRes { 810 return kub.Apply(ApplyOptions{FilePath: filePath}) 811 } 812 813 // Create creates the Kubernetes kanifest located at path filepath. 814 func (kub *Kubectl) Create(filePath string) *CmdRes { 815 kub.logger.Debugf("creating %s", filePath) 816 return kub.ExecShort( 817 fmt.Sprintf("%s create -f %s", KubectlCmd, filePath)) 818 } 819 820 // CreateResource is a wrapper around `kubernetes create <resource> 821 // <resourceName>. 822 func (kub *Kubectl) CreateResource(resource, resourceName string) *CmdRes { 823 kub.logger.Debug(fmt.Sprintf("creating resource %s with name %s", resource, resourceName)) 824 return kub.ExecShort(fmt.Sprintf("kubectl create %s %s", resource, resourceName)) 825 } 826 827 // DeleteResource is a wrapper around `kubernetes delete <resource> 828 // resourceName>. 829 func (kub *Kubectl) DeleteResource(resource, resourceName string) *CmdRes { 830 kub.logger.Debug(fmt.Sprintf("deleting resource %s with name %s", resource, resourceName)) 831 return kub.Exec(fmt.Sprintf("kubectl delete %s %s", resource, resourceName)) 832 } 833 834 // Delete deletes the Kubernetes manifest at path filepath. 835 func (kub *Kubectl) Delete(filePath string) *CmdRes { 836 kub.logger.Debugf("deleting %s", filePath) 837 return kub.ExecShort( 838 fmt.Sprintf("%s delete -f %s", KubectlCmd, filePath)) 839 } 840 841 // WaitKubeDNS waits until the kubeDNS pods are ready. In case of exceeding the 842 // default timeout it returns an error. 843 func (kub *Kubectl) WaitKubeDNS() error { 844 return kub.WaitforPods(KubeSystemNamespace, fmt.Sprintf("-l %s", kubeDNSLabel), DNSHelperTimeout) 845 } 846 847 // WaitForKubeDNSEntry waits until the given DNS entry exists in the kube-dns 848 // service. If the container is not ready after timeout it returns an error. The 849 // name's format query should be `${name}.${namespace}`. If `svc.cluster.local` 850 // is not present, it appends to the given name and it checks the service's FQDN. 851 func (kub *Kubectl) WaitForKubeDNSEntry(serviceName, serviceNamespace string) error { 852 svcSuffix := "svc.cluster.local" 853 logger := kub.logger.WithFields(logrus.Fields{"serviceName": serviceName, "serviceNamespace": serviceNamespace}) 854 855 serviceNameWithNamespace := fmt.Sprintf("%s.%s", serviceName, serviceNamespace) 856 if !strings.HasSuffix(serviceNameWithNamespace, svcSuffix) { 857 serviceNameWithNamespace = fmt.Sprintf("%s.%s", serviceNameWithNamespace, svcSuffix) 858 } 859 // https://bugs.launchpad.net/ubuntu/+source/bind9/+bug/854705 860 digCMD := "dig +short %s @%s | grep -v -e '^;'" 861 862 // If it fails we want to know if it's because of connection cannot be 863 // established or DNS does not exist. 864 digCMDFallback := "dig +tcp %s @%s" 865 866 dnsClusterIP, _, err := kub.GetServiceHostPort(KubeSystemNamespace, "kube-dns") 867 if err != nil { 868 logger.WithError(err).Error("cannot get kube-dns service IP") 869 return err 870 } 871 872 body := func() bool { 873 serviceIP, _, err := kub.GetServiceHostPort(serviceNamespace, serviceName) 874 if err != nil { 875 log.WithError(err).Errorf("cannot get service IP for service %s", serviceNameWithNamespace) 876 return false 877 } 878 879 // ClusterIPNone denotes that this service is headless; there is no 880 // service IP for this service, and thus the IP returned by `dig` is 881 // an IP of the pod itself, not ClusterIPNone, which is what Kubernetes 882 // shows as the IP for the service for headless services. 883 if serviceIP == v1.ClusterIPNone { 884 res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP)) 885 _ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP)) 886 return res.WasSuccessful() 887 } 888 log.Debugf("service is not headless; checking whether IP retrieved from DNS matches the IP for the service stored in Kubernetes") 889 res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP)) 890 serviceIPFromDNS := res.SingleOut() 891 if !govalidator.IsIP(serviceIPFromDNS) { 892 logger.Debugf("output of dig (%s) did not return an IP", serviceIPFromDNS) 893 return false 894 } 895 896 // Due to lag between new IPs for the same service being synced between 897 // kube-apiserver and DNS, check if the IP for the service that is 898 // stored in K8s matches the IP of the service cached in DNS. These 899 // can be different, because some tests use the same service names. 900 // Wait accordingly for services to match, and for resolving the service 901 // name to resolve via DNS. 902 if !strings.Contains(serviceIPFromDNS, serviceIP) { 903 logger.Debugf("service IP retrieved from DNS (%s) does not match the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP) 904 _ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP)) 905 return false 906 } 907 logger.Debugf("service IP retrieved from DNS (%s) matches the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP) 908 return true 909 } 910 911 return WithTimeout( 912 body, 913 fmt.Sprintf("DNS '%s' is not ready after timeout", serviceNameWithNamespace), 914 &TimeoutConfig{Timeout: DNSHelperTimeout}) 915 } 916 917 // WaitCleanAllTerminatingPods waits until all nodes that are in `Terminating` 918 // state are deleted correctly in the platform. In case of excedding the 919 // given timeout (in seconds) it returns an error 920 func (kub *Kubectl) WaitCleanAllTerminatingPods(timeout time.Duration) error { 921 body := func() bool { 922 res := kub.ExecShort(fmt.Sprintf( 923 "%s get pods --all-namespaces -o jsonpath='{.items[*].metadata.deletionTimestamp}'", 924 KubectlCmd)) 925 if !res.WasSuccessful() { 926 return false 927 } 928 929 if res.Output().String() == "" { 930 // Output is empty so no terminating containers 931 return true 932 } 933 934 podsTerminating := len(strings.Split(res.Output().String(), " ")) 935 kub.logger.WithField("Terminating pods", podsTerminating).Info("List of pods terminating") 936 if podsTerminating > 0 { 937 return false 938 } 939 return true 940 } 941 942 err := WithTimeout( 943 body, 944 "Pods are still not deleted after a timeout", 945 &TimeoutConfig{Timeout: timeout}) 946 return err 947 } 948 949 // DeployPatch deploys the original kubernetes descriptor with the given patch. 950 func (kub *Kubectl) DeployPatch(original, patch string) error { 951 // debugYaml only dumps the full created yaml file to the test output if 952 // the cilium manifest can not be created correctly. 953 debugYaml := func(original, patch string) { 954 // dry-run is only available since k8s 1.11 955 switch GetCurrentK8SEnv() { 956 case "1.8", "1.9", "1.10": 957 _ = kub.ExecShort(fmt.Sprintf( 958 `%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`, 959 KubectlCmd, original, patch)) 960 default: 961 _ = kub.ExecShort(fmt.Sprintf( 962 `%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run -o yaml`, 963 KubectlCmd, original, patch)) 964 } 965 } 966 967 var res *CmdRes 968 // validation 1st 969 // dry-run is only available since k8s 1.11 970 switch GetCurrentK8SEnv() { 971 case "1.8", "1.9", "1.10": 972 default: 973 res = kub.ExecShort(fmt.Sprintf( 974 `%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run`, 975 KubectlCmd, original, patch)) 976 if !res.WasSuccessful() { 977 debugYaml(original, patch) 978 return res.GetErr("Cilium patch validation failed") 979 } 980 } 981 982 res = kub.Apply(ApplyOptions{ 983 FilePath: "-", 984 Force: true, 985 Piped: fmt.Sprintf( 986 `%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`, 987 KubectlCmd, original, patch), 988 }) 989 if !res.WasSuccessful() { 990 debugYaml(original, patch) 991 return res.GetErr("Cilium manifest patch instalation failed") 992 } 993 return nil 994 } 995 996 // ciliumInstall installs all Cilium descriptors into kubernetes. 997 // dsPatchName corresponds to the DaemonSet patch, found by 998 // getK8sDescriptorPatch, that will be applied to the original Cilium DaemonSet 999 // descriptor, found by getK8sDescriptor. 1000 // cmPatchName corresponds to the ConfigMap patch, found by 1001 // getK8sDescriptorPatch, that will be applied to the original Cilium ConfigMap 1002 // descriptor, found by getK8sDescriptor. 1003 // Returns an error if any patch or if any original descriptors files were not 1004 // found. 1005 func (kub *Kubectl) ciliumInstall(dsPatchName, cmPatchName string, getK8sDescriptor, getK8sDescriptorPatch func(filename string) string) error { 1006 cmPathname := getK8sDescriptor("cilium-cm.yaml") 1007 if cmPathname == "" { 1008 return fmt.Errorf("Cilium ConfigMap descriptor not found") 1009 } 1010 dsPathname := getK8sDescriptor("cilium-ds.yaml") 1011 if dsPathname == "" { 1012 return fmt.Errorf("Cilium DaemonSet descriptor not found") 1013 } 1014 rbacPathname := getK8sDescriptor("cilium-rbac.yaml") 1015 if rbacPathname == "" { 1016 return fmt.Errorf("Cilium RBAC descriptor not found") 1017 } 1018 1019 deployOriginal := func(original string) error { 1020 // debugYaml only dumps the full created yaml file to the test output if 1021 // the cilium manifest can not be created correctly. 1022 debugYaml := func(original string) { 1023 kub.Apply(ApplyOptions{ 1024 FilePath: original, 1025 DryRun: true, 1026 Output: "yaml", 1027 }) 1028 } 1029 1030 // validation 1st 1031 res := kub.Apply(ApplyOptions{ 1032 FilePath: original, 1033 DryRun: true, 1034 }) 1035 if !res.WasSuccessful() { 1036 debugYaml(original) 1037 return res.GetErr("Cilium manifest validation fails") 1038 } 1039 1040 res = kub.ApplyDefault(original) 1041 if !res.WasSuccessful() { 1042 debugYaml(original) 1043 return res.GetErr("Cannot apply Cilium manifest") 1044 } 1045 return nil 1046 } 1047 1048 if err := deployOriginal(rbacPathname); err != nil { 1049 return err 1050 } 1051 1052 if err := kub.DeployPatch(cmPathname, getK8sDescriptorPatch(cmPatchName)); err != nil { 1053 return err 1054 } 1055 1056 if err := kub.DeployPatch(dsPathname, getK8sDescriptorPatch(dsPatchName)); err != nil { 1057 return err 1058 } 1059 1060 cmdRes := kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorSA)) 1061 if !cmdRes.WasSuccessful() { 1062 return fmt.Errorf("Unable to deploy descriptor of etcd-operator SA %s: %s", ciliumEtcdOperatorSA, cmdRes.OutputPrettyPrint()) 1063 } 1064 1065 cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorRBAC)) 1066 if !cmdRes.WasSuccessful() { 1067 return fmt.Errorf("Unable to deploy descriptor of etcd-operator RBAC %s: %s", ciliumEtcdOperatorRBAC, cmdRes.OutputPrettyPrint()) 1068 } 1069 1070 cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperator)) 1071 if !cmdRes.WasSuccessful() { 1072 return fmt.Errorf("Unable to deploy descriptor of etcd-operator %s: %s", ciliumEtcdOperator, cmdRes.OutputPrettyPrint()) 1073 } 1074 1075 _ = kub.ApplyDefault(getK8sDescriptor("cilium-operator-sa.yaml")) 1076 err := kub.DeployPatch(getK8sDescriptor("cilium-operator.yaml"), getK8sDescriptorPatch("cilium-operator-patch.yaml")) 1077 if err != nil { 1078 return fmt.Errorf("Unable to deploy descriptor of cilium-operators: %s", err) 1079 } 1080 1081 return nil 1082 } 1083 1084 func addIfNotOverwritten(options []string, field, value string) []string { 1085 for _, s := range options { 1086 if strings.HasPrefix(s, "--set "+field) { 1087 return options 1088 } 1089 } 1090 1091 options = append(options, "--set "+field+"="+value) 1092 return options 1093 } 1094 1095 func (kub *Kubectl) generateCiliumYaml(options []string, filename string) error { 1096 for key, value := range defaultHelmOptions { 1097 options = addIfNotOverwritten(options, key, value) 1098 } 1099 1100 switch GetCurrentIntegration() { 1101 case CIIntegrationFlannel: 1102 // Appending the options will override earlier options on CLI. 1103 for k, v := range flannelHelmOverrides { 1104 options = append(options, fmt.Sprintf("--set %s=%s", k, v)) 1105 } 1106 default: 1107 } 1108 1109 // TODO GH-8753: Use helm rendering library instead of shelling out to 1110 // helm template 1111 res := kub.ExecMiddle(fmt.Sprintf("helm template %s --namespace=kube-system %s > %s", 1112 HelmTemplate, strings.Join(options, " "), filename)) 1113 if !res.WasSuccessful() { 1114 return res.GetErr("Unable to generate YAML") 1115 } 1116 1117 return nil 1118 } 1119 1120 // ciliumInstallHelm installs Cilium with the Helm options provided. 1121 func (kub *Kubectl) ciliumInstallHelm(options []string) error { 1122 if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil { 1123 return err 1124 } 1125 1126 res := kub.Apply(ApplyOptions{FilePath: "cilium.yaml", Force: true}) 1127 if !res.WasSuccessful() { 1128 return res.GetErr("Unable to apply YAML") 1129 } 1130 1131 return nil 1132 } 1133 1134 // ciliumUninstallHelm uninstalls Cilium with the Helm options provided. 1135 func (kub *Kubectl) ciliumUninstallHelm(options []string) error { 1136 if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil { 1137 return err 1138 } 1139 1140 res := kub.Delete("cilium.yaml") 1141 if !res.WasSuccessful() { 1142 return res.GetErr("Unable to delete YAML") 1143 } 1144 1145 return nil 1146 } 1147 1148 // CiliumInstall installs Cilium with the provided Helm options. 1149 func (kub *Kubectl) CiliumInstall(options []string) error { 1150 return kub.ciliumInstallHelm(options) 1151 } 1152 1153 // CiliumUninstall uninstalls Cilium with the provided Helm options. 1154 func (kub *Kubectl) CiliumUninstall(options []string) error { 1155 return kub.ciliumUninstallHelm(options) 1156 } 1157 1158 // CiliumInstallVersion installs all Cilium descriptors into kubernetes for 1159 // a given Cilium Version tag. 1160 // dsPatchName corresponds to the DaemonSet patch that will be applied to the 1161 // original Cilium DaemonSet descriptor of that given Cilium Version tag. 1162 // cmPatchName corresponds to the ConfigMap patch that will be applied to the 1163 // original Cilium ConfigMap descriptor of that given Cilium Version tag. 1164 // Returns an error if any patch or if any original descriptors files were not 1165 // found. 1166 func (kub *Kubectl) CiliumInstallVersion(dsPatchName, cmPatchName, versionTag string) error { 1167 getK8sDescriptorPatch := func(filename string) string { 1168 // try dependent Cilium, k8s and integration version patch file 1169 ginkgoVersionedPath := filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), GetCurrentIntegration(), filename) 1170 _, err := os.Stat(ginkgoVersionedPath) 1171 if err == nil { 1172 return filepath.Join(BasePath, ginkgoVersionedPath) 1173 } 1174 // try dependent Cilium version and integration patch file 1175 ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentIntegration(), filename) 1176 _, err = os.Stat(ginkgoVersionedPath) 1177 if err == nil { 1178 return filepath.Join(BasePath, ginkgoVersionedPath) 1179 } 1180 // try dependent Cilium and k8s version patch file 1181 ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), filename) 1182 _, err = os.Stat(ginkgoVersionedPath) 1183 if err == nil { 1184 return filepath.Join(BasePath, ginkgoVersionedPath) 1185 } 1186 // try dependent Cilium version patch file 1187 ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, filename) 1188 _, err = os.Stat(ginkgoVersionedPath) 1189 if err == nil { 1190 return filepath.Join(BasePath, ginkgoVersionedPath) 1191 } 1192 // try dependent integration patch file 1193 ginkgoVersionedPath = filepath.Join(manifestsPath, GetCurrentIntegration(), filename) 1194 _, err = os.Stat(ginkgoVersionedPath) 1195 if err == nil { 1196 return filepath.Join(BasePath, ginkgoVersionedPath) 1197 } 1198 return filepath.Join(BasePath, manifestsPath, filename) 1199 } 1200 getK8sDescriptor := func(filename string) string { 1201 return fmt.Sprintf("https://raw.githubusercontent.com/cilium/cilium/%s/examples/kubernetes/%s/%s", versionTag, GetCurrentK8SEnv(), filename) 1202 } 1203 return kub.ciliumInstall(dsPatchName, cmPatchName, getK8sDescriptor, getK8sDescriptorPatch) 1204 } 1205 1206 // GetCiliumPods returns a list of all Cilium pods in the specified namespace, 1207 // and an error if the Cilium pods were not able to be retrieved. 1208 func (kub *Kubectl) GetCiliumPods(namespace string) ([]string, error) { 1209 return kub.GetPodNames(namespace, "k8s-app=cilium") 1210 } 1211 1212 // GetCiliumPodsContext returns a list of all Cilium pods in the specified 1213 // namespace, and an error if the Cilium pods were not able to be retrieved. 1214 func (kub *Kubectl) GetCiliumPodsContext(ctx context.Context, namespace string) ([]string, error) { 1215 return kub.GetPodNamesContext(ctx, namespace, "k8s-app=cilium") 1216 } 1217 1218 // CiliumEndpointsList returns the result of `cilium endpoint list` from the 1219 // specified pod. 1220 func (kub *Kubectl) CiliumEndpointsList(ctx context.Context, pod string) *CmdRes { 1221 return kub.CiliumExecContext(ctx, pod, "cilium endpoint list -o json") 1222 } 1223 1224 // CiliumEndpointsStatus returns a mapping of a pod name to it is corresponding 1225 // endpoint's status 1226 func (kub *Kubectl) CiliumEndpointsStatus(pod string) map[string]string { 1227 filter := `{range [*]}{@.status.external-identifiers.pod-name}{"="}{@.status.state}{"\n"}{end}` 1228 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1229 defer cancel() 1230 return kub.CiliumExecContext(ctx, pod, fmt.Sprintf( 1231 "cilium endpoint list -o jsonpath='%s'", filter)).KVOutput() 1232 } 1233 1234 // CiliumEndpointWaitReady waits until all endpoints managed by all Cilium pod 1235 // are ready. Returns an error if the Cilium pods cannot be retrieved via 1236 // Kubernetes, or endpoints are not ready after a specified timeout 1237 func (kub *Kubectl) CiliumEndpointWaitReady() error { 1238 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 1239 if err != nil { 1240 kub.logger.WithError(err).Error("cannot get Cilium pods") 1241 return err 1242 } 1243 1244 body := func(ctx context.Context) (bool, error) { 1245 var wg sync.WaitGroup 1246 queue := make(chan bool, len(ciliumPods)) 1247 endpointsReady := func(pod string) { 1248 valid := false 1249 defer func() { 1250 queue <- valid 1251 wg.Done() 1252 }() 1253 logCtx := kub.logger.WithField("pod", pod) 1254 status, err := kub.CiliumEndpointsList(ctx, pod).Filter(`{range [*]}{.status.state}{"="}{.status.identity.id}{"\n"}{end}`) 1255 if err != nil { 1256 logCtx.WithError(err).Errorf("cannot get endpoints states on Cilium pod") 1257 return 1258 } 1259 total := 0 1260 invalid := 0 1261 for _, line := range strings.Split(status.String(), "\n") { 1262 if line == "" { 1263 continue 1264 } 1265 // each line is like status=identityID. 1266 // IdentityID is needed because the reserved:init identity 1267 // means that the pod is not ready to accept traffic. 1268 total++ 1269 vals := strings.Split(line, "=") 1270 if len(vals) != 2 { 1271 logCtx.Errorf("Endpoint list does not have a correct output '%s'", line) 1272 return 1273 } 1274 if vals[0] != "ready" { 1275 invalid++ 1276 } 1277 // Consider an endpoint with reserved identity 5 (reserved:init) as not ready. 1278 if vals[1] == "5" { 1279 invalid++ 1280 } 1281 } 1282 logCtx.WithFields(logrus.Fields{ 1283 "total": total, 1284 "invalid": invalid, 1285 }).Info("Waiting for cilium endpoints to be ready") 1286 1287 if invalid != 0 { 1288 return 1289 } 1290 valid = true 1291 return 1292 } 1293 wg.Add(len(ciliumPods)) 1294 for _, pod := range ciliumPods { 1295 go endpointsReady(pod) 1296 } 1297 1298 wg.Wait() 1299 close(queue) 1300 1301 for status := range queue { 1302 if status == false { 1303 return false, nil 1304 } 1305 } 1306 return true, nil 1307 } 1308 1309 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1310 defer cancel() 1311 err = WithContext(ctx, body, 1*time.Second) 1312 if err == nil { 1313 return err 1314 } 1315 1316 callback := func() string { 1317 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1318 defer cancel() 1319 1320 var errorMessage string 1321 for _, pod := range ciliumPods { 1322 var endpoints []models.Endpoint 1323 cmdRes := kub.CiliumEndpointsList(ctx, pod) 1324 if !cmdRes.WasSuccessful() { 1325 errorMessage += fmt.Sprintf( 1326 "\tCilium Pod: %s \terror: unable to get endpoint list: %s", 1327 pod, cmdRes.err) 1328 continue 1329 } 1330 err := cmdRes.Unmarshal(&endpoints) 1331 if err != nil { 1332 errorMessage += fmt.Sprintf( 1333 "\tCilium Pod: %s \terror: unable to parse endpoint list: %s", 1334 pod, err) 1335 continue 1336 } 1337 for _, ep := range endpoints { 1338 errorMessage += fmt.Sprintf( 1339 "\tCilium Pod: %s \tEndpoint: %d \tIdentity: %d\t State: %s\n", 1340 pod, ep.ID, ep.Status.Identity.ID, ep.Status.State) 1341 } 1342 } 1343 return errorMessage 1344 } 1345 return NewSSHMetaError(err.Error(), callback) 1346 } 1347 1348 // WaitForCEPIdentity waits for a particular CEP to have an identity present. 1349 func (kub *Kubectl) WaitForCEPIdentity(ns, podName string) error { 1350 body := func(ctx context.Context) (bool, error) { 1351 ep := kub.CepGet(ns, podName) 1352 if ep == nil { 1353 return false, nil 1354 } 1355 if ep.Identity == nil { 1356 return false, nil 1357 } 1358 return ep.Identity.ID != 0, nil 1359 } 1360 1361 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1362 defer cancel() 1363 return WithContext(ctx, body, 1*time.Second) 1364 } 1365 1366 // CiliumExecContext runs cmd in the specified Cilium pod with the given context. 1367 func (kub *Kubectl) CiliumExecContext(ctx context.Context, pod string, cmd string) *CmdRes { 1368 limitTimes := 5 1369 execute := func() *CmdRes { 1370 command := fmt.Sprintf("%s exec -n kube-system %s -- %s", KubectlCmd, pod, cmd) 1371 return kub.ExecContext(ctx, command) 1372 } 1373 var res *CmdRes 1374 // Sometimes Kubectl returns 126 exit code, It use to happen in Nightly 1375 // tests when a lot of exec are in place (Cgroups issue). The upstream 1376 // changes did not fix the isse, and we need to make this workaround to 1377 // avoid Kubectl issue. 1378 // https://github.com/openshift/origin/issues/16246 1379 for i := 0; i < limitTimes; i++ { 1380 res = execute() 1381 if res.GetExitCode() != 126 { 1382 break 1383 } 1384 time.Sleep(200 * time.Millisecond) 1385 } 1386 return res 1387 } 1388 1389 // CiliumExec runs cmd in the specified Cilium pod. 1390 // Deprecated: use CiliumExecContext instead 1391 func (kub *Kubectl) CiliumExec(pod string, cmd string) *CmdRes { 1392 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1393 defer cancel() 1394 return kub.CiliumExecContext(ctx, pod, cmd) 1395 } 1396 1397 // CiliumExecUntilMatch executes the specified command repeatedly for the 1398 // specified Cilium pod until the given substring is present in stdout. 1399 // If the timeout is reached it will return an error. 1400 func (kub *Kubectl) CiliumExecUntilMatch(pod, cmd, substr string) error { 1401 body := func() bool { 1402 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1403 defer cancel() 1404 res := kub.CiliumExecContext(ctx, pod, cmd) 1405 return strings.Contains(res.Output().String(), substr) 1406 } 1407 1408 return WithTimeout( 1409 body, 1410 fmt.Sprintf("%s is not in the output after timeout", substr), 1411 &TimeoutConfig{Timeout: HelperTimeout}) 1412 } 1413 1414 // WaitForCiliumInitContainerToFinish waits for all Cilium init containers to 1415 // finish 1416 func (kub *Kubectl) WaitForCiliumInitContainerToFinish() error { 1417 body := func() bool { 1418 podList := &v1.PodList{} 1419 err := kub.GetPods("kube-system", "-l k8s-app=cilium").Unmarshal(podList) 1420 if err != nil { 1421 kub.logger.Infof("Error while getting PodList: %s", err) 1422 return false 1423 } 1424 if len(podList.Items) == 0 { 1425 return false 1426 } 1427 for _, pod := range podList.Items { 1428 for _, v := range pod.Status.InitContainerStatuses { 1429 if v.State.Terminated != nil && (v.State.Terminated.Reason != "Completed" || v.State.Terminated.ExitCode != 0) { 1430 kub.logger.WithFields(logrus.Fields{ 1431 "podName": pod.Name, 1432 "currentState": v.State.String(), 1433 }).Infof("Cilium Init container not completed") 1434 return false 1435 } 1436 } 1437 } 1438 return true 1439 } 1440 1441 return WithTimeout(body, "Cilium Init Container was not able to initialize or had a successful run", &TimeoutConfig{Timeout: HelperTimeout}) 1442 } 1443 1444 // CiliumNodesWait waits until all nodes in the Kubernetes cluster are annotated 1445 // with Cilium annotations. Its runtime is bounded by a maximum of `HelperTimeout`. 1446 // When a node is annotated with said annotations, it indicates 1447 // that the tunnels in the nodes are set up and that cross-node traffic can be 1448 // tested. Returns an error if the timeout is exceeded for waiting for the nodes 1449 // to be annotated. 1450 func (kub *Kubectl) CiliumNodesWait() (bool, error) { 1451 body := func() bool { 1452 filter := `{range .items[*]}{@.metadata.name}{"="}{@.metadata.annotations.io\.cilium\.network\.ipv4-pod-cidr}{"\n"}{end}` 1453 data := kub.ExecShort(fmt.Sprintf( 1454 "%s get nodes -o jsonpath='%s'", KubectlCmd, filter)) 1455 if !data.WasSuccessful() { 1456 return false 1457 } 1458 result := data.KVOutput() 1459 for k, v := range result { 1460 if v == "" { 1461 kub.logger.Infof("Kubernetes node '%v' does not have Cilium metadata", k) 1462 return false 1463 } 1464 kub.logger.Infof("Kubernetes node '%v' IPv4 address: '%v'", k, v) 1465 } 1466 return true 1467 } 1468 err := WithTimeout(body, "Kubernetes node does not have cilium metadata", &TimeoutConfig{Timeout: HelperTimeout}) 1469 if err != nil { 1470 return false, err 1471 } 1472 return true, nil 1473 } 1474 1475 // WaitPolicyDeleted waits for policy policyName to be deleted from the 1476 // cilium-agent running in pod. Returns an error if policyName was unable to 1477 // be deleted after some amount of time. 1478 func (kub *Kubectl) WaitPolicyDeleted(pod string, policyName string) error { 1479 body := func() bool { 1480 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1481 defer cancel() 1482 res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyName)) 1483 1484 // `cilium policy get <policy name>` fails if the policy is not loaded, 1485 // which is the condition we want. 1486 return !res.WasSuccessful() 1487 } 1488 1489 return WithTimeout(body, fmt.Sprintf("Policy %s was not deleted in time", policyName), &TimeoutConfig{Timeout: HelperTimeout}) 1490 } 1491 1492 // CiliumIsPolicyLoaded returns true if the policy is loaded in the given 1493 // cilium Pod. it returns false in case that the policy is not in place 1494 func (kub *Kubectl) CiliumIsPolicyLoaded(pod string, policyCmd string) bool { 1495 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1496 defer cancel() 1497 res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyCmd)) 1498 return res.WasSuccessful() 1499 } 1500 1501 // CiliumPolicyRevision returns the policy revision in the specified Cilium pod. 1502 // Returns an error if the policy revision cannot be retrieved. 1503 func (kub *Kubectl) CiliumPolicyRevision(pod string) (int, error) { 1504 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1505 defer cancel() 1506 res := kub.CiliumExecContext(ctx, pod, "cilium policy get -o json") 1507 if !res.WasSuccessful() { 1508 return -1, fmt.Errorf("cannot get the revision %s", res.Output()) 1509 } 1510 1511 revision, err := res.Filter("{.revision}") 1512 if err != nil { 1513 return -1, fmt.Errorf("cannot get revision from json: %s", err) 1514 } 1515 1516 revi, err := strconv.Atoi(strings.Trim(revision.String(), "\n")) 1517 if err != nil { 1518 kub.logger.Errorf("revision on pod '%s' is not valid '%s'", pod, res.CombineOutput()) 1519 return -1, err 1520 } 1521 return revi, nil 1522 } 1523 1524 // ResourceLifeCycleAction represents an action performed upon objects in 1525 // Kubernetes. 1526 type ResourceLifeCycleAction string 1527 1528 // CiliumPolicyAction performs the specified action in Kubernetes for the policy 1529 // stored in path filepath and waits up until timeout seconds for the policy 1530 // to be applied in all Cilium endpoints. Returns an error if the policy is not 1531 // imported before the timeout is 1532 // exceeded. 1533 func (kub *Kubectl) CiliumPolicyAction(namespace, filepath string, action ResourceLifeCycleAction, timeout time.Duration) (string, error) { 1534 numNodes := kub.GetNumNodes() 1535 1536 // Test filter: https://jqplay.org/s/EgNzc06Cgn 1537 jqFilter := fmt.Sprintf( 1538 `[.items[]|{name:.metadata.name, enforcing: (.status|if has("nodes") then .nodes |to_entries|map_values(.value.enforcing) + [(.|length >= %d)]|all else true end)|tostring, status: has("status")|tostring}]`, 1539 numNodes) 1540 npFilter := fmt.Sprintf( 1541 `{range .items[*]}{"%s="}{.metadata.name}{" %s="}{.metadata.namespace}{"\n"}{end}`, 1542 KubectlPolicyNameLabel, KubectlPolicyNameSpaceLabel) 1543 kub.logger.Infof("Performing %s action on resource '%s'", action, filepath) 1544 1545 if status := kub.Action(action, filepath, namespace); !status.WasSuccessful() { 1546 return "", status.GetErr(fmt.Sprintf("Cannot perform '%s' on resorce '%s'", action, filepath)) 1547 } 1548 1549 if action == KubectlDelete { 1550 // Due policy is uninstalled, there is no need to validate that the policy is enforce. 1551 return "", nil 1552 } 1553 1554 body := func() bool { 1555 var data []map[string]string 1556 cmd := fmt.Sprintf("%s get cnp --all-namespaces -o json | jq '%s'", 1557 KubectlCmd, jqFilter) 1558 1559 res := kub.ExecShort(cmd) 1560 if !res.WasSuccessful() { 1561 kub.logger.WithError(res.GetErr("")).Error("cannot get cnp status") 1562 return false 1563 1564 } 1565 1566 err := res.Unmarshal(&data) 1567 if err != nil { 1568 kub.logger.WithError(err).Error("Cannot unmarshal json") 1569 return false 1570 } 1571 1572 for _, item := range data { 1573 if item["enforcing"] != "true" || item["status"] != "true" { 1574 kub.logger.Errorf("Policy '%s' is not enforcing yet", item["name"]) 1575 return false 1576 } 1577 } 1578 return true 1579 } 1580 1581 err := WithTimeout( 1582 body, 1583 "cannot change state of resource correctly; command timed out", 1584 &TimeoutConfig{Timeout: timeout}) 1585 1586 if err != nil { 1587 return "", err 1588 } 1589 1590 knpBody := func() bool { 1591 knp := kub.ExecShort(fmt.Sprintf("%s get --all-namespaces netpol -o jsonpath='%s'", 1592 KubectlCmd, npFilter)) 1593 result := knp.ByLines() 1594 if len(result) == 0 { 1595 return true 1596 } 1597 1598 pods, err := kub.GetCiliumPods(KubeSystemNamespace) 1599 if err != nil { 1600 kub.logger.WithError(err).Error("cannot retrieve cilium pods") 1601 return false 1602 } 1603 for _, item := range result { 1604 for _, ciliumPod := range pods { 1605 if !kub.CiliumIsPolicyLoaded(ciliumPod, item) { 1606 kub.logger.Infof("Policy '%s' is not ready on Cilium pod '%s'", item, ciliumPod) 1607 return false 1608 } 1609 } 1610 } 1611 return true 1612 } 1613 1614 err = WithTimeout( 1615 knpBody, 1616 "cannot change state of Kubernetes network policies correctly; command timed out", 1617 &TimeoutConfig{Timeout: timeout}) 1618 return "", err 1619 } 1620 1621 // CiliumReport report the cilium pod to the log and appends the logs for the 1622 // given commands. 1623 func (kub *Kubectl) CiliumReport(namespace string, commands ...string) { 1624 if config.CiliumTestConfig.SkipLogGathering { 1625 ginkgoext.GinkgoPrint("Skipped gathering logs (-cilium.skipLogs=true)\n") 1626 return 1627 } 1628 1629 // Log gathering for Cilium should take at most 5 minutes. This ensures that 1630 // the CiliumReport stage doesn't cause the entire CI to hang. 1631 1632 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) 1633 defer cancel() 1634 1635 var wg sync.WaitGroup 1636 wg.Add(1) 1637 1638 go func() { 1639 defer wg.Done() 1640 kub.DumpCiliumCommandOutput(ctx, namespace) 1641 kub.GatherLogs(ctx) 1642 }() 1643 1644 kub.CiliumCheckReport(ctx) 1645 1646 pods, err := kub.GetCiliumPodsContext(ctx, namespace) 1647 if err != nil { 1648 kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump") 1649 } 1650 res := kub.ExecContextShort(ctx, fmt.Sprintf("%s get pods -o wide --all-namespaces", KubectlCmd)) 1651 ginkgoext.GinkgoPrint(res.GetDebugMessage()) 1652 1653 results := make([]*CmdRes, 0, len(pods)*len(commands)) 1654 ginkgoext.GinkgoPrint("Fetching command output from pods %s", pods) 1655 for _, pod := range pods { 1656 for _, cmd := range commands { 1657 res = kub.ExecPodCmdBackground(ctx, namespace, pod, cmd, ExecOptions{SkipLog: true}) 1658 results = append(results, res) 1659 } 1660 } 1661 1662 wg.Wait() 1663 1664 for _, res := range results { 1665 res.WaitUntilFinish() 1666 ginkgoext.GinkgoPrint(res.GetDebugMessage()) 1667 } 1668 } 1669 1670 // EtcdOperatorReport dump etcd pods data into the report directory to be able 1671 // to debug etcd operator status in case of fail test. 1672 func (kub *Kubectl) EtcdOperatorReport(ctx context.Context, reportCmds map[string]string) { 1673 if reportCmds == nil { 1674 reportCmds = make(map[string]string) 1675 } 1676 1677 pods, err := kub.GetPodNamesContext(ctx, KubeSystemNamespace, "etcd_cluster=cilium-etcd") 1678 if err != nil { 1679 kub.logger.WithError(err).Error("No etcd pods") 1680 return 1681 } 1682 1683 etcdctl := "etcdctl --endpoints=https://%s.cilium-etcd.kube-system.svc:2379 " + 1684 "--cert-file /etc/etcdtls/member/peer-tls/peer.crt " + 1685 "--key-file /etc/etcdtls/member/peer-tls/peer.key " + 1686 "--ca-file /etc/etcdtls/member/peer-tls/peer-ca.crt " + 1687 " %s" 1688 1689 etcdDumpCommands := map[string]string{ 1690 "member list": "etcd_%s_member_list", 1691 "cluster-health": "etcd_%s_cluster_health", 1692 } 1693 1694 for _, pod := range pods { 1695 for cmd, reportFile := range etcdDumpCommands { 1696 etcdCmd := fmt.Sprintf(etcdctl, pod, cmd) 1697 command := fmt.Sprintf("%s -n %s exec -ti %s -- %s", 1698 KubectlCmd, KubeSystemNamespace, pod, etcdCmd) 1699 reportCmds[command] = fmt.Sprintf(reportFile, pod) 1700 } 1701 } 1702 } 1703 1704 // CiliumCheckReport prints a few checks on the Junit output to provide more 1705 // context to users. The list of checks that prints are the following: 1706 // - Number of Kubernetes and Cilium policies installed. 1707 // - Policy enforcement status by endpoint. 1708 // - Controller, health, kvstore status. 1709 func (kub *Kubectl) CiliumCheckReport(ctx context.Context) { 1710 pods, _ := kub.GetCiliumPods(KubeSystemNamespace) 1711 fmt.Fprintf(CheckLogs, "Cilium pods: %v\n", pods) 1712 1713 var policiesFilter = `{range .items[*]}{.metadata.namespace}{"::"}{.metadata.name}{" "}{end}` 1714 netpols := kub.ExecContextShort(ctx, fmt.Sprintf( 1715 "%s get netpol -o jsonpath='%s' --all-namespaces", 1716 KubectlCmd, policiesFilter)) 1717 fmt.Fprintf(CheckLogs, "Netpols loaded: %v\n", netpols.Output()) 1718 1719 cnp := kub.ExecContextShort(ctx, fmt.Sprintf( 1720 "%s get cnp -o jsonpath='%s' --all-namespaces", 1721 KubectlCmd, policiesFilter)) 1722 fmt.Fprintf(CheckLogs, "CiliumNetworkPolicies loaded: %v\n", cnp.Output()) 1723 1724 cepFilter := `{range .items[*]}{.metadata.name}{"="}{.status.policy.ingress.enforcing}{":"}{.status.policy.egress.enforcing}{"\n"}{end}` 1725 cepStatus := kub.ExecContextShort(ctx, fmt.Sprintf( 1726 "%s get cep -o jsonpath='%s' --all-namespaces", 1727 KubectlCmd, cepFilter)) 1728 1729 fmt.Fprintf(CheckLogs, "Endpoint Policy Enforcement:\n") 1730 1731 table := tabwriter.NewWriter(CheckLogs, 5, 0, 3, ' ', 0) 1732 fmt.Fprintf(table, "Pod\tIngress\tEgress\n") 1733 for pod, policy := range cepStatus.KVOutput() { 1734 data := strings.SplitN(policy, ":", 2) 1735 if len(data) != 2 { 1736 data[0] = "invalid value" 1737 data[1] = "invalid value" 1738 } 1739 fmt.Fprintf(table, "%s\t%s\t%s\n", pod, data[0], data[1]) 1740 } 1741 table.Flush() 1742 1743 var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}::{.status.last-failure-msg}{"\n"}{end}` 1744 var failedControllers string 1745 for _, pod := range pods { 1746 var prefix = "" 1747 status := kub.CiliumExecContext(ctx, pod, "cilium status --all-controllers -o json") 1748 result, err := status.Filter(controllersFilter) 1749 if err != nil { 1750 kub.logger.WithError(err).Error("Cannot filter controller status output") 1751 continue 1752 } 1753 var total = 0 1754 var failed = 0 1755 for name, data := range result.KVOutput() { 1756 total++ 1757 status := strings.SplitN(data, "::", 2) 1758 if len(status) != 2 { 1759 // Just make sure that the the len of the output is 2 to not 1760 // fail on index error in the following lines. 1761 continue 1762 } 1763 if status[0] != "" { 1764 failed++ 1765 prefix = "⚠️ " 1766 failedControllers += fmt.Sprintf("controller %s failure '%s'\n", name, status[1]) 1767 } 1768 } 1769 statusFilter := `Status: {.cilium.state} Health: {.cluster.ciliumHealth.state}` + 1770 ` Nodes "{.cluster.nodes[*].name}" ContinerRuntime: {.container-runtime.state}` + 1771 ` Kubernetes: {.kubernetes.state} KVstore: {.kvstore.state}` 1772 data, _ := status.Filter(statusFilter) 1773 fmt.Fprintf(CheckLogs, "%sCilium agent '%s': %s Controllers: Total %d Failed %d\n", 1774 prefix, pod, data, total, failed) 1775 if failedControllers != "" { 1776 fmt.Fprintf(CheckLogs, "Failed controllers:\n %s", failedControllers) 1777 } 1778 } 1779 } 1780 1781 // ValidateNoErrorsInLogs checks in cilium logs since the given duration (By 1782 // default `CurrentGinkgoTestDescription().Duration`) do not contain `panic`, 1783 // `deadlocks` or `segmentation faults` messages. In case of any of these 1784 // messages, it'll mark the test as failed. 1785 func (kub *Kubectl) ValidateNoErrorsInLogs(duration time.Duration) { 1786 1787 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) 1788 defer cancel() 1789 1790 var logs string 1791 cmd := fmt.Sprintf("%s -n %s logs --timestamps=true -l k8s-app=cilium --since=%vs", 1792 KubectlCmd, KubeSystemNamespace, duration.Seconds()) 1793 res := kub.ExecContext(ctx, fmt.Sprintf("%s --previous", cmd), ExecOptions{SkipLog: true}) 1794 if res.WasSuccessful() { 1795 logs += res.Output().String() 1796 } 1797 res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true}) 1798 if res.WasSuccessful() { 1799 logs += res.Output().String() 1800 } 1801 defer func() { 1802 // Keep the cilium logs for the given test in a separate file. 1803 testPath, err := CreateReportDirectory() 1804 if err != nil { 1805 kub.logger.WithError(err).Error("Cannot create report directory") 1806 return 1807 } 1808 err = ioutil.WriteFile( 1809 fmt.Sprintf("%s/%s", testPath, CiliumTestLog), 1810 []byte(logs), LogPerm) 1811 1812 if err != nil { 1813 kub.logger.WithError(err).Errorf("Cannot create %s", CiliumTestLog) 1814 } 1815 }() 1816 1817 failIfContainsBadLogMsg(logs) 1818 1819 fmt.Fprintf(CheckLogs, logutils.LogErrorsSummary(logs)) 1820 } 1821 1822 // GatherCiliumCoreDumps copies core dumps if are present in the /tmp folder 1823 // into the test report folder for further analysis. 1824 func (kub *Kubectl) GatherCiliumCoreDumps(ctx context.Context, ciliumPod string) { 1825 log := kub.logger.WithField("pod", ciliumPod) 1826 1827 cores := kub.CiliumExecContext(ctx, ciliumPod, "ls /tmp/ | grep core") 1828 if !cores.WasSuccessful() { 1829 log.Debug("There is no core dumps in the pod") 1830 return 1831 } 1832 1833 testPath, err := CreateReportDirectory() 1834 if err != nil { 1835 log.WithError(err).Errorf("cannot create test result path '%s'", testPath) 1836 return 1837 } 1838 resultPath := filepath.Join(BasePath, testPath) 1839 1840 for _, core := range cores.ByLines() { 1841 dst := filepath.Join(resultPath, core) 1842 src := filepath.Join("/tmp/", core) 1843 cmd := fmt.Sprintf("%s -n %s cp %s:%s %s", 1844 KubectlCmd, KubeSystemNamespace, 1845 ciliumPod, src, dst) 1846 res := kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true}) 1847 if !res.WasSuccessful() { 1848 log.WithField("output", res.CombineOutput()).Error("Cannot get core from pod") 1849 } 1850 } 1851 } 1852 1853 // GetCiliumHostIPv4 retrieves cilium_host IPv4 addr of the given node. 1854 func (kub *Kubectl) GetCiliumHostIPv4(ctx context.Context, node string) (string, error) { 1855 pod, err := kub.GetCiliumPodOnNode(KubeSystemNamespace, node) 1856 if err != nil { 1857 return "", fmt.Errorf("unable to retrieve cilium pod: %s", err) 1858 } 1859 1860 cmd := "ip -4 -o a show dev cilium_host | grep -o -e 'inet [0-9.]*' | cut -d' ' -f2" 1861 res := kub.ExecPodCmd(KubeSystemNamespace, pod, cmd) 1862 if !res.WasSuccessful() { 1863 return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr: %s", res.GetError()) 1864 } 1865 addr := res.SingleOut() 1866 if addr == "" { 1867 return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr") 1868 } 1869 1870 return addr, nil 1871 } 1872 1873 // DumpCiliumCommandOutput runs a variety of commands (CiliumKubCLICommands) and writes the results to 1874 // TestResultsPath 1875 func (kub *Kubectl) DumpCiliumCommandOutput(ctx context.Context, namespace string) { 1876 ReportOnPod := func(pod string) { 1877 logger := kub.logger.WithField("CiliumPod", pod) 1878 1879 testPath, err := CreateReportDirectory() 1880 if err != nil { 1881 logger.WithError(err).Errorf("cannot create test result path '%s'", testPath) 1882 return 1883 } 1884 1885 genReportCmds := func(cliCmds map[string]string) map[string]string { 1886 reportCmds := map[string]string{} 1887 for cmd, logfile := range cliCmds { 1888 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 1889 reportCmds[command] = fmt.Sprintf("%s_%s", pod, logfile) 1890 } 1891 return reportCmds 1892 } 1893 1894 reportCmds := genReportCmds(ciliumKubCLICommands) 1895 reportMapContext(ctx, testPath, reportCmds, kub.SSHMeta) 1896 1897 logsPath := filepath.Join(BasePath, testPath) 1898 1899 // Get bugtool output. Since bugtool output is dumped in the pod's filesystem, 1900 // copy it over with `kubectl cp`. 1901 bugtoolCmd := fmt.Sprintf("%s exec -n %s %s -- %s", 1902 KubectlCmd, namespace, pod, CiliumBugtool) 1903 res := kub.ExecContext(ctx, bugtoolCmd, ExecOptions{SkipLog: true}) 1904 if !res.WasSuccessful() { 1905 logger.Errorf("%s failed: %s", bugtoolCmd, res.CombineOutput().String()) 1906 return 1907 } 1908 // Default output directory is /tmp for bugtool. 1909 res = kub.ExecContext(ctx, fmt.Sprintf("%s exec -n %s %s -- ls /tmp/", KubectlCmd, namespace, pod)) 1910 tmpList := res.ByLines() 1911 for _, line := range tmpList { 1912 // Only copy over bugtool output to directory. 1913 if !strings.Contains(line, CiliumBugtool) { 1914 continue 1915 } 1916 1917 res = kub.ExecContext(ctx, fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s /tmp/%[4]s", 1918 KubectlCmd, namespace, pod, line), 1919 ExecOptions{SkipLog: true}) 1920 if !res.WasSuccessful() { 1921 logger.Errorf("'%s' failed: %s", res.GetCmd(), res.CombineOutput()) 1922 continue 1923 } 1924 1925 archiveName := filepath.Join(logsPath, fmt.Sprintf("bugtool-%s", pod)) 1926 res = kub.ExecContext(ctx, fmt.Sprintf("mkdir -p %s", archiveName)) 1927 if !res.WasSuccessful() { 1928 logger.WithField("cmd", res.GetCmd()).Errorf( 1929 "cannot create bugtool archive folder: %s", res.CombineOutput()) 1930 continue 1931 } 1932 1933 cmd := fmt.Sprintf("tar -xf /tmp/%s -C %s --strip-components=1", line, archiveName) 1934 res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true}) 1935 if !res.WasSuccessful() { 1936 logger.WithField("cmd", cmd).Errorf( 1937 "Cannot untar bugtool output: %s", res.CombineOutput()) 1938 continue 1939 } 1940 //Remove bugtool artifact, so it'll be not used if any other fail test 1941 _ = kub.ExecPodCmdBackground(ctx, KubeSystemNamespace, pod, fmt.Sprintf("rm /tmp/%s", line)) 1942 } 1943 1944 // Finally, get kvstore output - this is best effort; we do this last 1945 // because if connectivity to the kvstore is broken from a cilium pod, 1946 // we don't want the context above to timeout and as a result, get none 1947 // of the other logs from the tests. 1948 1949 // Use a shorter context for kvstore-related commands to avoid having 1950 // further log-gathering fail as well if the first Cilium pod fails to 1951 // gather kvstore logs. 1952 kvstoreCmdCtx, cancel := context.WithTimeout(ctx, MidCommandTimeout) 1953 defer cancel() 1954 reportCmds = genReportCmds(ciliumKubCLICommandsKVStore) 1955 reportMapContext(kvstoreCmdCtx, testPath, reportCmds, kub.SSHMeta) 1956 } 1957 1958 pods, err := kub.GetCiliumPodsContext(ctx, namespace) 1959 if err != nil { 1960 kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump") 1961 return 1962 } 1963 for _, pod := range pods { 1964 ReportOnPod(pod) 1965 kub.GatherCiliumCoreDumps(ctx, pod) 1966 } 1967 } 1968 1969 // GatherLogs dumps kubernetes pods, services, DaemonSet to the testResultsPath 1970 // directory 1971 func (kub *Kubectl) GatherLogs(ctx context.Context) { 1972 reportCmds := map[string]string{ 1973 "kubectl get pods --all-namespaces -o json": "pods.txt", 1974 "kubectl get services --all-namespaces -o json": "svc.txt", 1975 "kubectl get nodes -o json": "nodes.txt", 1976 "kubectl get ds --all-namespaces -o json": "ds.txt", 1977 "kubectl get cnp --all-namespaces -o json": "cnp.txt", 1978 "kubectl get cep --all-namespaces -o json": "cep.txt", 1979 "kubectl get netpol --all-namespaces -o json": "netpol.txt", 1980 "kubectl describe pods --all-namespaces": "pods_status.txt", 1981 "kubectl get replicationcontroller --all-namespaces -o json": "replicationcontroller.txt", 1982 "kubectl get deployment --all-namespaces -o json": "deployment.txt", 1983 } 1984 1985 kub.GeneratePodLogGatheringCommands(ctx, reportCmds) 1986 kub.EtcdOperatorReport(ctx, reportCmds) 1987 1988 res := kub.ExecContext(ctx, fmt.Sprintf(`%s api-resources | grep -v "^NAME" | awk '{print $1}'`, KubectlCmd)) 1989 if res.WasSuccessful() { 1990 for _, line := range res.ByLines() { 1991 key := fmt.Sprintf("%s get %s --all-namespaces -o wide", KubectlCmd, line) 1992 reportCmds[key] = fmt.Sprintf("api-resource-%s.txt", line) 1993 } 1994 } else { 1995 kub.logger.Errorf("Cannot get api-resoureces: %s", res.GetDebugMessage()) 1996 } 1997 1998 testPath, err := CreateReportDirectory() 1999 if err != nil { 2000 kub.logger.WithError(err).Errorf( 2001 "cannot create test results path '%s'", testPath) 2002 return 2003 } 2004 reportMap(testPath, reportCmds, kub.SSHMeta) 2005 2006 for _, node := range []string{K8s1VMName(), K8s2VMName()} { 2007 vm := GetVagrantSSHMeta(node) 2008 reportCmds := map[string]string{ 2009 "journalctl --no-pager -au kubelet": fmt.Sprintf("kubelet-%s.log", node), 2010 "sudo top -n 1 -b": fmt.Sprintf("top-%s.log", node), 2011 "sudo ps aux": fmt.Sprintf("ps-%s.log", node), 2012 } 2013 reportMapContext(ctx, testPath, reportCmds, vm) 2014 } 2015 } 2016 2017 // GeneratePodLogGatheringCommands generates the commands to gather logs for 2018 // all pods in the Kubernetes cluster, and maps the commands to the filename 2019 // in which they will be stored in reportCmds. 2020 func (kub *Kubectl) GeneratePodLogGatheringCommands(ctx context.Context, reportCmds map[string]string) { 2021 if reportCmds == nil { 2022 reportCmds = make(map[string]string) 2023 } 2024 pods, err := kub.GetAllPods(ctx, ExecOptions{SkipLog: true}) 2025 if err != nil { 2026 kub.logger.WithError(err).Error("Unable to get pods from Kubernetes via kubectl") 2027 } 2028 2029 for _, pod := range pods { 2030 for _, containerStatus := range pod.Status.ContainerStatuses { 2031 logCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name) 2032 logfileName := fmt.Sprintf("pod-%s-%s-%s.log", pod.Namespace, pod.Name, containerStatus.Name) 2033 reportCmds[logCmd] = logfileName 2034 2035 if containerStatus.RestartCount > 0 { 2036 previousLogCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s --previous", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name) 2037 previousLogfileName := fmt.Sprintf("pod-%s-%s-%s-previous.log", pod.Namespace, pod.Name, containerStatus.Name) 2038 reportCmds[previousLogCmd] = previousLogfileName 2039 } 2040 } 2041 } 2042 } 2043 2044 // GetCiliumPodOnNode returns the name of the Cilium pod that is running on / in 2045 //the specified node / namespace. 2046 func (kub *Kubectl) GetCiliumPodOnNode(namespace string, node string) (string, error) { 2047 filter := fmt.Sprintf( 2048 "-o jsonpath='{.items[?(@.spec.nodeName == \"%s\")].metadata.name}'", node) 2049 2050 res := kub.ExecShort(fmt.Sprintf( 2051 "%s -n %s get pods -l k8s-app=cilium %s", KubectlCmd, namespace, filter)) 2052 if !res.WasSuccessful() { 2053 return "", fmt.Errorf("Cilium pod not found on node '%s'", node) 2054 } 2055 2056 return res.Output().String(), nil 2057 } 2058 2059 func (kub *Kubectl) ciliumPreFlightCheck() error { 2060 err := kub.ciliumStatusPreFlightCheck() 2061 if err != nil { 2062 return fmt.Errorf("status is unhealthy: %s", err) 2063 } 2064 2065 err = kub.ciliumControllersPreFlightCheck() 2066 if err != nil { 2067 return fmt.Errorf("controllers are failing: %s", err) 2068 } 2069 2070 switch GetCurrentIntegration() { 2071 case CIIntegrationFlannel: 2072 default: 2073 err = kub.ciliumHealthPreFlightCheck() 2074 if err != nil { 2075 return fmt.Errorf("connectivity health is failing: %s", err) 2076 } 2077 } 2078 err = kub.fillServiceCache() 2079 if err != nil { 2080 return fmt.Errorf("unable to fill service cache: %s", err) 2081 } 2082 err = kub.ciliumServicePreFlightCheck() 2083 if err != nil { 2084 return fmt.Errorf("cilium services are not set up correctly: %s", err) 2085 } 2086 err = kub.servicePreFlightCheck("kubernetes", "default") 2087 if err != nil { 2088 return fmt.Errorf("kubernetes service is not ready: %s", err) 2089 } 2090 2091 return nil 2092 } 2093 2094 // CiliumPreFlightCheck specify that it checks that various subsystems within 2095 // Cilium are in a good state. If one of the multiple preflight fails it'll 2096 // return an error. 2097 func (kub *Kubectl) CiliumPreFlightCheck() error { 2098 ginkgoext.By("Performing Cilium preflight check") 2099 // Doing this withTimeout because the Status can be ready, but the other 2100 // nodes cannot be show up yet, and the cilium-health can fail as a false positive. 2101 var ( 2102 lastError string 2103 consecutiveFailures int 2104 ) 2105 2106 body := func() bool { 2107 if err := kub.ciliumPreFlightCheck(); err != nil { 2108 newError := err.Error() 2109 if lastError != newError || consecutiveFailures >= 5 { 2110 ginkgoext.GinkgoPrint("Cilium is not ready yet: %s", newError) 2111 lastError = newError 2112 consecutiveFailures = 0 2113 } else { 2114 consecutiveFailures++ 2115 } 2116 return false 2117 } 2118 return true 2119 2120 } 2121 timeoutErr := WithTimeout(body, "PreflightCheck failed", &TimeoutConfig{Timeout: HelperTimeout}) 2122 if timeoutErr != nil { 2123 return fmt.Errorf("CiliumPreFlightCheck error: %s: Last polled error: %s", timeoutErr, lastError) 2124 } 2125 return nil 2126 } 2127 2128 func (kub *Kubectl) ciliumStatusPreFlightCheck() error { 2129 ginkgoext.By("Performing Cilium status preflight check") 2130 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2131 if err != nil { 2132 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2133 } 2134 for _, pod := range ciliumPods { 2135 status := kub.CiliumExec(pod, "cilium status --all-health --all-nodes") 2136 if !status.WasSuccessful() { 2137 return fmt.Errorf("cilium-agent '%s' is unhealthy: %s", pod, status.OutputPrettyPrint()) 2138 } 2139 noQuorum, err := regexp.Match(`^.*KVStore:.*has-quorum=false.*$`, status.Output().Bytes()) 2140 if err != nil { 2141 return fmt.Errorf("Failed to check for kvstore quorum: %s", err.Error()) 2142 } 2143 if noQuorum { 2144 return fmt.Errorf("KVStore doesn't have quorum: %s", status.OutputPrettyPrint()) 2145 } 2146 } 2147 2148 return nil 2149 } 2150 2151 func (kub *Kubectl) ciliumControllersPreFlightCheck() error { 2152 ginkgoext.By("Performing Cilium controllers preflight check") 2153 var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}{"\n"}{end}` 2154 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2155 if err != nil { 2156 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2157 } 2158 for _, pod := range ciliumPods { 2159 status := kub.CiliumExec(pod, fmt.Sprintf( 2160 "cilium status --all-controllers -o jsonpath='%s'", controllersFilter)) 2161 if !status.WasSuccessful() { 2162 return fmt.Errorf("cilium-agent '%s': Cannot run cilium status: %s", 2163 pod, status.OutputPrettyPrint()) 2164 } 2165 for controller, status := range status.KVOutput() { 2166 if status != "0" { 2167 failmsg := kub.CiliumExec(pod, "cilium status --all-controllers") 2168 return fmt.Errorf("cilium-agent '%s': controller %s is failing: %s", 2169 pod, controller, failmsg.OutputPrettyPrint()) 2170 } 2171 } 2172 } 2173 2174 return nil 2175 } 2176 2177 func (kub *Kubectl) ciliumHealthPreFlightCheck() error { 2178 ginkgoext.By("Performing Cilium health check") 2179 var nodesFilter = `{.nodes[*].name}` 2180 var statusFilter = `{range .nodes[*]}{.name}{"="}{.host.primary-address.http.status}{"\n"}{end}` 2181 2182 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2183 if err != nil { 2184 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2185 } 2186 for _, pod := range ciliumPods { 2187 status := kub.CiliumExec(pod, "cilium-health status -o json --probe") 2188 if !status.WasSuccessful() { 2189 return fmt.Errorf( 2190 "Cluster connectivity is unhealthy on '%s': %s", 2191 pod, status.OutputPrettyPrint()) 2192 } 2193 2194 // By Checking that the node list is the same 2195 nodes, err := status.Filter(nodesFilter) 2196 if err != nil { 2197 return fmt.Errorf("Cannot unmarshal health status: %s", err) 2198 } 2199 2200 nodeCount := strings.Split(nodes.String(), " ") 2201 if len(ciliumPods) != len(nodeCount) { 2202 return fmt.Errorf( 2203 "cilium-agent '%s': Only %d/%d nodes appeared in cilium-health status. nodes = '%+v'", 2204 pod, len(nodeCount), len(ciliumPods), nodeCount) 2205 } 2206 2207 healthStatus, err := status.Filter(statusFilter) 2208 if err != nil { 2209 return fmt.Errorf("Cannot unmarshal health status: %s", err) 2210 } 2211 2212 for node, status := range healthStatus.KVOutput() { 2213 if status != "" { 2214 return fmt.Errorf("cilium-agent '%s': connectivity to node '%s' is unhealthy: '%s'", 2215 pod, node, status) 2216 } 2217 } 2218 } 2219 return nil 2220 } 2221 2222 // serviceCache keeps service information from 2223 // k8s, Cilium services and Cilium bpf load balancer map 2224 type serviceCache struct { 2225 services v1.ServiceList 2226 endpoints v1.EndpointsList 2227 pods []ciliumPodServiceCache 2228 } 2229 2230 // ciliumPodServiceCache 2231 type ciliumPodServiceCache struct { 2232 name string 2233 services []models.Service 2234 loadBalancers map[string][]string 2235 } 2236 2237 func (kub *Kubectl) fillServiceCache() error { 2238 cache := serviceCache{} 2239 2240 svcRes := kub.GetFromAllNS("service") 2241 err := svcRes.GetErr("Unable to get k8s services") 2242 if err != nil { 2243 return err 2244 } 2245 err = svcRes.Unmarshal(&cache.services) 2246 2247 if err != nil { 2248 return fmt.Errorf("Unable to unmarshal K8s services: %s", err.Error()) 2249 } 2250 2251 epRes := kub.GetFromAllNS("endpoints") 2252 err = epRes.GetErr("Unable to get k8s endpoints") 2253 if err != nil { 2254 return err 2255 } 2256 err = epRes.Unmarshal(&cache.endpoints) 2257 if err != nil { 2258 return fmt.Errorf("Unable to unmarshal K8s endpoints: %s", err.Error()) 2259 } 2260 2261 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2262 if err != nil { 2263 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2264 } 2265 ciliumSvcCmd := "cilium service list -o json" 2266 ciliumBpfLbCmd := "cilium bpf lb list -o json" 2267 2268 cache.pods = make([]ciliumPodServiceCache, 0, len(ciliumPods)) 2269 for _, pod := range ciliumPods { 2270 podCache := ciliumPodServiceCache{name: pod} 2271 2272 ciliumServicesRes := kub.CiliumExec(pod, ciliumSvcCmd) 2273 err := ciliumServicesRes.GetErr( 2274 fmt.Sprintf("Unable to retrieve Cilium services on %s", pod)) 2275 if err != nil { 2276 return err 2277 } 2278 2279 err = ciliumServicesRes.Unmarshal(&podCache.services) 2280 if err != nil { 2281 return fmt.Errorf("Unable to unmarshal Cilium services: %s", err.Error()) 2282 } 2283 2284 ciliumLbRes := kub.CiliumExec(pod, ciliumBpfLbCmd) 2285 err = ciliumLbRes.GetErr( 2286 fmt.Sprintf("Unable to retrieve Cilium bpf lb list on %s", pod)) 2287 if err != nil { 2288 return err 2289 } 2290 2291 err = ciliumLbRes.Unmarshal(&podCache.loadBalancers) 2292 if err != nil { 2293 return fmt.Errorf("Unable to unmarshal Cilium bpf lb list: %s", err.Error()) 2294 } 2295 cache.pods = append(cache.pods, podCache) 2296 } 2297 kub.serviceCache = &cache 2298 return nil 2299 } 2300 2301 // KubeDNSPreFlightCheck makes sure that kube-dns is plumbed into Cilium. 2302 func (kub *Kubectl) KubeDNSPreFlightCheck() error { 2303 err := kub.fillServiceCache() 2304 if err != nil { 2305 return err 2306 } 2307 return kub.servicePreFlightCheck("kube-dns", "kube-system") 2308 } 2309 2310 // servicePreFlightCheck makes sure that k8s service with given name and 2311 // namespace is properly plumbed in Cilium 2312 func (kub *Kubectl) servicePreFlightCheck(serviceName, serviceNamespace string) error { 2313 ginkgoext.By("Performing K8s service preflight check") 2314 var service *v1.Service 2315 for _, s := range kub.serviceCache.services.Items { 2316 if s.Name == serviceName && s.Namespace == serviceNamespace { 2317 service = &s 2318 break 2319 } 2320 } 2321 2322 if service == nil { 2323 return fmt.Errorf("%s/%s service not found in service cache", serviceName, serviceNamespace) 2324 } 2325 2326 for _, pod := range kub.serviceCache.pods { 2327 2328 err := validateK8sService(*service, kub.serviceCache.endpoints.Items, pod.services, pod.loadBalancers) 2329 if err != nil { 2330 return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error()) 2331 } 2332 } 2333 return nil 2334 } 2335 2336 func validateK8sService(k8sService v1.Service, k8sEndpoints []v1.Endpoints, ciliumSvcs []models.Service, ciliumLB map[string][]string) error { 2337 var ciliumService *models.Service 2338 CILIUM_SERVICES: 2339 for _, cSvc := range ciliumSvcs { 2340 if cSvc.Status.Realized.FrontendAddress.IP == k8sService.Spec.ClusterIP { 2341 for _, port := range k8sService.Spec.Ports { 2342 if int32(cSvc.Status.Realized.FrontendAddress.Port) == port.Port { 2343 ciliumService = &cSvc 2344 break CILIUM_SERVICES 2345 } 2346 } 2347 } 2348 } 2349 2350 if ciliumService == nil { 2351 return fmt.Errorf("Failed to find Cilium service corresponding to %s/%s k8s service", k8sService.Namespace, k8sService.Name) 2352 } 2353 2354 temp := map[string]bool{} 2355 err := validateCiliumSvc(*ciliumService, []v1.Service{k8sService}, k8sEndpoints, temp) 2356 if err != nil { 2357 return err 2358 } 2359 return validateCiliumSvcLB(*ciliumService, ciliumLB) 2360 } 2361 2362 // ciliumServicePreFlightCheck checks that k8s service is plumbed correctly 2363 func (kub *Kubectl) ciliumServicePreFlightCheck() error { 2364 ginkgoext.By("Performing Cilium service preflight check") 2365 for _, pod := range kub.serviceCache.pods { 2366 k8sServicesFound := map[string]bool{} 2367 2368 for _, cSvc := range pod.services { 2369 err := validateCiliumSvc(cSvc, kub.serviceCache.services.Items, kub.serviceCache.endpoints.Items, k8sServicesFound) 2370 if err != nil { 2371 return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error()) 2372 } 2373 } 2374 2375 notFoundServices := make([]string, 0, len(kub.serviceCache.services.Items)) 2376 for _, k8sSvc := range kub.serviceCache.services.Items { 2377 key := serviceKey(k8sSvc) 2378 // ignore headless services 2379 if k8sSvc.Spec.Type == v1.ServiceTypeClusterIP && 2380 k8sSvc.Spec.ClusterIP == v1.ClusterIPNone { 2381 continue 2382 } 2383 // TODO(brb) check NodePort services 2384 if k8sSvc.Spec.Type == v1.ServiceTypeNodePort { 2385 continue 2386 } 2387 if _, ok := k8sServicesFound[key]; !ok { 2388 notFoundServices = append(notFoundServices, key) 2389 } 2390 } 2391 2392 if len(notFoundServices) > 0 { 2393 return fmt.Errorf("Failed to find Cilium service corresponding to k8s services %s on pod %v", 2394 strings.Join(notFoundServices, ", "), pod) 2395 } 2396 2397 for _, cSvc := range pod.services { 2398 err := validateCiliumSvcLB(cSvc, pod.loadBalancers) 2399 if err != nil { 2400 return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error()) 2401 } 2402 } 2403 if len(pod.services) != len(pod.loadBalancers) { 2404 return fmt.Errorf("Length of Cilium services doesn't match length of bpf LB map on pod %v", pod) 2405 } 2406 } 2407 return nil 2408 } 2409 2410 // DeleteETCDOperator delete the etcd-operator from the cluster pointed by kub. 2411 func (kub *Kubectl) DeleteETCDOperator() { 2412 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete crd etcdclusters.etcd.database.coreos.com", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2413 log.Warningf("Unable to delete etcdclusters.etcd.database.coreos.com CRD: %s", res.OutputPrettyPrint()) 2414 } 2415 2416 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete deployment cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2417 log.Warningf("Unable to delete cilium-etcd-operator Deployment: %s", res.OutputPrettyPrint()) 2418 } 2419 2420 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2421 log.Warningf("Unable to delete cilium-etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint()) 2422 } 2423 2424 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2425 log.Warningf("Unable to delete cilium-etcd-operator ClusterRole: %s", res.OutputPrettyPrint()) 2426 } 2427 2428 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2429 log.Warningf("Unable to delete cilium-etcd-operator ServiceAccount: %s", res.OutputPrettyPrint()) 2430 } 2431 2432 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2433 log.Warningf("Unable to delete etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint()) 2434 } 2435 2436 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2437 log.Warningf("Unable to delete etcd-operator ClusterRole: %s", res.OutputPrettyPrint()) 2438 } 2439 2440 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-sa", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2441 log.Warningf("Unable to delete cilium-etcd-sa ServiceAccount: %s", res.OutputPrettyPrint()) 2442 } 2443 } 2444 2445 func serviceKey(s v1.Service) string { 2446 return s.Namespace + "/" + s.Name 2447 } 2448 2449 // validateCiliumSvc checks if given Cilium service has corresponding k8s services and endpoints in given slices 2450 func validateCiliumSvc(cSvc models.Service, k8sSvcs []v1.Service, k8sEps []v1.Endpoints, k8sServicesFound map[string]bool) error { 2451 var k8sService *v1.Service 2452 2453 // TODO(brb) validate NodePort services 2454 if cSvc.Status.Realized.Flags != nil && cSvc.Status.Realized.Flags.NodePort { 2455 return nil 2456 } 2457 2458 for _, k8sSvc := range k8sSvcs { 2459 if k8sSvc.Spec.ClusterIP == cSvc.Status.Realized.FrontendAddress.IP { 2460 k8sService = &k8sSvc 2461 break 2462 } 2463 } 2464 if k8sService == nil { 2465 return fmt.Errorf("Could not find Cilium service with ip %s in k8s", cSvc.Spec.FrontendAddress.IP) 2466 } 2467 2468 var k8sServicePort *v1.ServicePort 2469 for _, k8sPort := range k8sService.Spec.Ports { 2470 if k8sPort.Port == int32(cSvc.Status.Realized.FrontendAddress.Port) { 2471 k8sServicePort = &k8sPort 2472 k8sServicesFound[serviceKey(*k8sService)] = true 2473 break 2474 } 2475 } 2476 if k8sServicePort == nil { 2477 return fmt.Errorf("Could not find Cilium service with address %s:%d in k8s", cSvc.Spec.FrontendAddress.IP, cSvc.Spec.FrontendAddress.Port) 2478 } 2479 2480 for _, backAddr := range cSvc.Status.Realized.BackendAddresses { 2481 foundEp := false 2482 for _, k8sEp := range k8sEps { 2483 for _, epAddr := range getK8sEndpointAddresses(k8sEp) { 2484 if addrsEqual(backAddr, epAddr) { 2485 foundEp = true 2486 } 2487 } 2488 } 2489 if !foundEp { 2490 return fmt.Errorf( 2491 "Could not match cilium service backend address %s:%d with k8s endpoint", 2492 *backAddr.IP, backAddr.Port) 2493 } 2494 } 2495 return nil 2496 } 2497 2498 func validateCiliumSvcLB(cSvc models.Service, lbMap map[string][]string) error { 2499 frontendAddress := cSvc.Status.Realized.FrontendAddress.IP + ":" + strconv.Itoa(int(cSvc.Status.Realized.FrontendAddress.Port)) 2500 bpfBackends, ok := lbMap[frontendAddress] 2501 if !ok { 2502 return fmt.Errorf("%s bpf lb map entry not found", frontendAddress) 2503 } 2504 2505 BACKENDS: 2506 for _, addr := range cSvc.Status.Realized.BackendAddresses { 2507 backend := *addr.IP + ":" + strconv.Itoa(int(addr.Port)) 2508 for _, bpfAddr := range bpfBackends { 2509 if strings.Contains(bpfAddr, backend) { 2510 continue BACKENDS 2511 } 2512 } 2513 return fmt.Errorf("%s not found in bpf map", backend) 2514 } 2515 return nil 2516 } 2517 2518 func getK8sEndpointAddresses(ep v1.Endpoints) []*models.BackendAddress { 2519 result := []*models.BackendAddress{} 2520 for _, subset := range ep.Subsets { 2521 for _, addr := range subset.Addresses { 2522 ip := addr.IP 2523 for _, port := range subset.Ports { 2524 ba := &models.BackendAddress{ 2525 IP: &ip, 2526 Port: uint16(port.Port), 2527 } 2528 result = append(result, ba) 2529 } 2530 } 2531 } 2532 return result 2533 } 2534 2535 func addrsEqual(addr1, addr2 *models.BackendAddress) bool { 2536 return *addr1.IP == *addr2.IP && addr1.Port == addr2.Port 2537 } 2538 2539 // GenerateNamespaceForTest generates a namespace based off of the current test 2540 // which is running. 2541 func GenerateNamespaceForTest() string { 2542 lowered := strings.ToLower(ginkgoext.CurrentGinkgoTestDescription().FullTestText) 2543 // K8s namespaces cannot have spaces. 2544 replaced := strings.Replace(lowered, " ", "", -1) 2545 return replaced 2546 }