github.com/looshlee/beatles@v0.0.0-20220727174639-742810ab631c/test/helpers/kubectl.go (about) 1 // Copyright 2018-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package helpers 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/json" 21 "fmt" 22 "io/ioutil" 23 "os" 24 "path/filepath" 25 "regexp" 26 "strconv" 27 "strings" 28 "sync" 29 "text/tabwriter" 30 "time" 31 32 "github.com/cilium/cilium/api/v1/models" 33 "github.com/cilium/cilium/pkg/annotation" 34 cnpv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 35 "github.com/cilium/cilium/test/config" 36 "github.com/cilium/cilium/test/ginkgo-ext" 37 "github.com/cilium/cilium/test/helpers/logutils" 38 39 "github.com/asaskevich/govalidator" 40 "github.com/sirupsen/logrus" 41 "k8s.io/api/core/v1" 42 ) 43 44 const ( 45 // KubectlCmd Kubernetes controller command 46 KubectlCmd = "kubectl" 47 manifestsPath = "k8sT/manifests/" 48 descriptorsPath = "../examples/kubernetes" 49 kubeDNSLabel = "k8s-app=kube-dns" 50 51 // DNSHelperTimeout is a predefined timeout value for K8s DNS commands. It 52 // must be larger than 5 minutes because kubedns has a hardcoded resync 53 // period of 5 minutes. We have experienced test failures because kubedns 54 // needed this time to recover from a connection problem to kube-apiserver. 55 // The kubedns resyncPeriod is defined at 56 // https://github.com/kubernetes/dns/blob/80fdd88276adba36a87c4f424b66fdf37cd7c9a8/pkg/dns/dns.go#L53 57 DNSHelperTimeout = 7 * time.Minute 58 59 // EnableMicroscope is true when microscope should be enabled 60 EnableMicroscope = false 61 62 // CIIntegrationFlannel contains the constant to be used when flannel is 63 // used in the CI. 64 CIIntegrationFlannel = "flannel" 65 ) 66 67 var ( 68 defaultHelmOptions = map[string]string{ 69 "global.registry": "k8s1:5000/cilium", 70 "agent.image": "cilium-dev", 71 "global.tag": "latest", 72 "operator.image": "operator", 73 "operator.tag": "latest", 74 "managed-etcd.registry": "docker.io/cilium", 75 "global.debug.enabled": "true", 76 "global.k8s.requireIPv4PodCIDR": "true", 77 "global.pprof.enabled": "true", 78 "global.logSystemLoad": "true", 79 "global.bpf.preallocateMaps": "true", 80 "global.etcd.leaseTTL": "30s", 81 "global.ipv4.enabled": "true", 82 "global.ipv6.enabled": "true", 83 } 84 85 flannelHelmOverrides = map[string]string{ 86 "global.flannel.enabled": "true", 87 "global.ipv6.enabled": "false", 88 "global.tunnel": "disabled", 89 } 90 ) 91 92 // GetCurrentK8SEnv returns the value of K8S_VERSION from the OS environment. 93 func GetCurrentK8SEnv() string { return os.Getenv("K8S_VERSION") } 94 95 // GetCurrentIntegration returns CI integration set up to run against Cilium. 96 func GetCurrentIntegration() string { 97 switch strings.ToLower(os.Getenv("CNI_INTEGRATION")) { 98 case CIIntegrationFlannel: 99 return CIIntegrationFlannel 100 default: 101 return "" 102 } 103 } 104 105 // Kubectl is a wrapper around an SSHMeta. It is used to run Kubernetes-specific 106 // commands on the node which is accessible via the SSH metadata stored in its 107 // SSHMeta. 108 type Kubectl struct { 109 *SSHMeta 110 *serviceCache 111 } 112 113 // CreateKubectl initializes a Kubectl helper with the provided vmName and log 114 // It marks the test as Fail if cannot get the ssh meta information or cannot 115 // execute a `ls` on the virtual machine. 116 func CreateKubectl(vmName string, log *logrus.Entry) *Kubectl { 117 node := GetVagrantSSHMeta(vmName) 118 if node == nil { 119 ginkgoext.Fail(fmt.Sprintf("Cannot connect to vmName '%s'", vmName), 1) 120 return nil 121 } 122 // This `ls` command is a sanity check, sometimes the meta ssh info is not 123 // nil but new commands cannot be executed using SSH, tests failed and it 124 // was hard to debug. 125 res := node.ExecShort("ls /tmp/") 126 if !res.WasSuccessful() { 127 ginkgoext.Fail(fmt.Sprintf( 128 "Cannot execute ls command on vmName '%s'", vmName), 1) 129 return nil 130 } 131 node.logger = log 132 133 return &Kubectl{ 134 SSHMeta: node, 135 } 136 } 137 138 // CepGet returns the endpoint model for the given pod name in the specified 139 // namespaces. If the pod is not present it returns nil 140 func (kub *Kubectl) CepGet(namespace string, pod string) *cnpv2.EndpointStatus { 141 log := kub.logger.WithFields(logrus.Fields{ 142 "cep": pod, 143 "namespace": namespace}) 144 145 cmd := fmt.Sprintf("%s -n %s get cep %s -o json | jq '.status'", KubectlCmd, namespace, pod) 146 res := kub.ExecShort(cmd) 147 if !res.WasSuccessful() { 148 log.Debug("cep is not present") 149 return nil 150 } 151 152 var data *cnpv2.EndpointStatus 153 err := res.Unmarshal(&data) 154 if err != nil { 155 log.WithError(err).Error("cannot Unmarshal json") 156 return nil 157 } 158 return data 159 } 160 161 // GetNumNodes returns the number of Kubernetes nodes running 162 func (kub *Kubectl) GetNumNodes() int { 163 getNodesCmd := fmt.Sprintf("%s get nodes -o jsonpath='{.items.*.metadata.name}'", KubectlCmd) 164 res := kub.ExecShort(getNodesCmd) 165 if !res.WasSuccessful() { 166 return 0 167 } 168 169 return len(strings.Split(res.SingleOut(), " ")) 170 } 171 172 // ExecKafkaPodCmd executes shell command with arguments arg in the specified pod residing in the specified 173 // namespace. It returns the stdout of the command that was executed. 174 // The kafka producer and consumer scripts do not return error if command 175 // leads to TopicAuthorizationException or any other error. Hence the 176 // function needs to also take into account the stderr messages returned. 177 func (kub *Kubectl) ExecKafkaPodCmd(namespace string, pod string, arg string) error { 178 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, arg) 179 res := kub.Exec(command) 180 if !res.WasSuccessful() { 181 return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed %s", 182 res.GetCmd(), res.OutputPrettyPrint()) 183 } 184 185 if strings.Contains(res.GetStdErr(), "ERROR") { 186 return fmt.Errorf("ExecKafkaPodCmd: command '%s' failed '%s'", 187 res.GetCmd(), res.OutputPrettyPrint()) 188 } 189 return nil 190 } 191 192 // ExecPodCmd executes command cmd in the specified pod residing in the specified 193 // namespace. It returns a pointer to CmdRes with all the output 194 func (kub *Kubectl) ExecPodCmd(namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes { 195 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 196 return kub.Exec(command, options...) 197 } 198 199 // ExecPodCmdContext synchronously executes command cmd in the specified pod residing in the 200 // specified namespace. It returns a pointer to CmdRes with all the output. 201 func (kub *Kubectl) ExecPodCmdContext(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes { 202 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 203 return kub.ExecContext(ctx, command, options...) 204 } 205 206 // ExecPodCmdBackground executes command cmd in background in the specified pod residing 207 // in the specified namespace. It returns a pointer to CmdRes with all the 208 // output 209 // 210 // To receive the output of this function, the caller must invoke either 211 // kub.WaitUntilFinish() or kub.WaitUntilMatch() then subsequently fetch the 212 // output out of the result. 213 func (kub *Kubectl) ExecPodCmdBackground(ctx context.Context, namespace string, pod string, cmd string, options ...ExecOptions) *CmdRes { 214 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 215 return kub.ExecInBackground(ctx, command, options...) 216 } 217 218 // Get retrieves the provided Kubernetes objects from the specified namespace. 219 func (kub *Kubectl) Get(namespace string, command string) *CmdRes { 220 return kub.ExecShort(fmt.Sprintf( 221 "%s -n %s get %s -o json", KubectlCmd, namespace, command)) 222 } 223 224 // GetFromAllNS retrieves provided Kubernetes objects from all namespaces 225 func (kub *Kubectl) GetFromAllNS(kind string) *CmdRes { 226 return kub.ExecShort(fmt.Sprintf( 227 "%s get %s --all-namespaces -o json", KubectlCmd, kind)) 228 } 229 230 // GetCNP retrieves the output of `kubectl get cnp` in the given namespace for 231 // the given CNP and return a CNP struct. If the CNP does not exists or cannot 232 // unmarshal the Json output will return nil. 233 func (kub *Kubectl) GetCNP(namespace string, cnp string) *cnpv2.CiliumNetworkPolicy { 234 log := kub.logger.WithFields(logrus.Fields{ 235 "fn": "GetCNP", 236 "cnp": cnp, 237 "ns": namespace, 238 }) 239 res := kub.Get(namespace, fmt.Sprintf("cnp %s", cnp)) 240 if !res.WasSuccessful() { 241 log.WithField("error", res.CombineOutput()).Info("cannot get CNP") 242 return nil 243 } 244 var result cnpv2.CiliumNetworkPolicy 245 err := res.Unmarshal(&result) 246 if err != nil { 247 log.WithError(err).Errorf("cannot unmarshal CNP output") 248 return nil 249 } 250 return &result 251 } 252 253 func (kub *Kubectl) WaitForCRDCount(filter string, count int, timeout time.Duration) error { 254 // Set regexp flag m for multi-line matching, then add the 255 // matches for beginning and end of a line, so that we count 256 // at most one match per line (like "grep <filter> | wc -l") 257 regex := regexp.MustCompile("(?m:^.*(?:" + filter + ").*$)") 258 body := func() bool { 259 res := kub.ExecShort(fmt.Sprintf("%s get crds", KubectlCmd)) 260 if !res.WasSuccessful() { 261 log.Error(res.GetErr("kubectl get crds failed")) 262 return false 263 } 264 return len(regex.FindAllString(res.GetStdOut(), -1)) == count 265 } 266 return WithTimeout( 267 body, 268 fmt.Sprintf("timed out waiting for %d CRDs matching filter \"%s\" to be ready", count, filter), 269 &TimeoutConfig{Timeout: timeout}) 270 } 271 272 // GetPods gets all of the pods in the given namespace that match the provided 273 // filter. 274 func (kub *Kubectl) GetPods(namespace string, filter string) *CmdRes { 275 return kub.ExecShort(fmt.Sprintf("%s -n %s get pods %s -o json", KubectlCmd, namespace, filter)) 276 } 277 278 // GetPodsNodes returns a map with pod name as a key and node name as value. It 279 // only gets pods in the given namespace that match the provided filter. It 280 // returns an error if pods cannot be retrieved correctly 281 func (kub *Kubectl) GetPodsNodes(namespace string, filter string) (map[string]string, error) { 282 jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.spec.nodeName}{"\n"}{end}` 283 res := kub.Exec(fmt.Sprintf("%s -n %s get pods %s -o jsonpath='%s'", 284 KubectlCmd, namespace, filter, jsonFilter)) 285 if !res.WasSuccessful() { 286 return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput()) 287 } 288 return res.KVOutput(), nil 289 } 290 291 // GetPodsIPs returns a map with pod name as a key and pod IP name as value. It 292 // only gets pods in the given namespace that match the provided filter. It 293 // returns an error if pods cannot be retrieved correctly 294 func (kub *Kubectl) GetPodsIPs(namespace string, filter string) (map[string]string, error) { 295 jsonFilter := `{range .items[*]}{@.metadata.name}{"="}{@.status.podIP}{"\n"}{end}` 296 res := kub.ExecShort(fmt.Sprintf("%s -n %s get pods -l %s -o jsonpath='%s'", 297 KubectlCmd, namespace, filter, jsonFilter)) 298 if !res.WasSuccessful() { 299 return nil, fmt.Errorf("cannot retrieve pods: %s", res.CombineOutput()) 300 } 301 return res.KVOutput(), nil 302 } 303 304 // GetEndpoints gets all of the endpoints in the given namespace that match the 305 // provided filter. 306 func (kub *Kubectl) GetEndpoints(namespace string, filter string) *CmdRes { 307 return kub.ExecShort(fmt.Sprintf("%s -n %s get endpoints %s -o json", KubectlCmd, namespace, filter)) 308 } 309 310 // GetAllPods returns a slice of all pods present in Kubernetes cluster, along 311 // with an error if the pods could not be retrieved via `kubectl`, or if the 312 // pod objects are unable to be marshaled from JSON. 313 func (kub *Kubectl) GetAllPods(ctx context.Context, options ...ExecOptions) ([]v1.Pod, error) { 314 var ops ExecOptions 315 if len(options) > 0 { 316 ops = options[0] 317 } 318 319 getPodsCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout) 320 defer cancel() 321 322 var podsList v1.List 323 err := kub.ExecContext(getPodsCtx, 324 fmt.Sprintf("%s get pods --all-namespaces -o json", KubectlCmd), 325 ExecOptions{SkipLog: ops.SkipLog}).Unmarshal(&podsList) 326 if err != nil { 327 return nil, err 328 } 329 330 pods := make([]v1.Pod, len(podsList.Items)) 331 for _, item := range podsList.Items { 332 var pod v1.Pod 333 err = json.Unmarshal(item.Raw, &pod) 334 if err != nil { 335 return nil, err 336 } 337 pods = append(pods, pod) 338 } 339 340 return pods, nil 341 } 342 343 // GetPodNames returns the names of all of the pods that are labeled with label 344 // in the specified namespace, along with an error if the pod names cannot be 345 // retrieved. 346 func (kub *Kubectl) GetPodNames(namespace string, label string) ([]string, error) { 347 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 348 defer cancel() 349 return kub.GetPodNamesContext(ctx, namespace, label) 350 } 351 352 // GetPodNamesContext returns the names of all of the pods that are labeled with 353 // label in the specified namespace, along with an error if the pod names cannot 354 // be retrieved. 355 func (kub *Kubectl) GetPodNamesContext(ctx context.Context, namespace string, label string) ([]string, error) { 356 stdout := new(bytes.Buffer) 357 filter := "-o jsonpath='{.items[*].metadata.name}'" 358 359 cmd := fmt.Sprintf("%s -n %s get pods -l %s %s", KubectlCmd, namespace, label, filter) 360 361 // Taking more than 30 seconds to get pods means that something is wrong 362 // connecting to the node. 363 podNamesCtx, cancel := context.WithTimeout(ctx, ShortCommandTimeout) 364 defer cancel() 365 err := kub.ExecuteContext(podNamesCtx, cmd, stdout, nil) 366 367 if err != nil { 368 return nil, fmt.Errorf( 369 "could not find pods in namespace '%v' with label '%v': %s", namespace, label, err) 370 } 371 372 out := strings.Trim(stdout.String(), "\n") 373 if len(out) == 0 { 374 //Small hack. String split always return an array with an empty string 375 return []string{}, nil 376 } 377 return strings.Split(out, " "), nil 378 } 379 380 // GetServiceHostPort returns the host and the first port for the given service name. 381 // It will return an error if service cannot be retrieved. 382 func (kub *Kubectl) GetServiceHostPort(namespace string, service string) (string, int, error) { 383 var data v1.Service 384 err := kub.Get(namespace, fmt.Sprintf("service %s", service)).Unmarshal(&data) 385 if err != nil { 386 return "", 0, err 387 } 388 if len(data.Spec.Ports) == 0 { 389 return "", 0, fmt.Errorf("Service '%s' does not have ports defined", service) 390 } 391 return data.Spec.ClusterIP, int(data.Spec.Ports[0].Port), nil 392 } 393 394 // Logs returns a CmdRes with containing the resulting metadata from the 395 // execution of `kubectl logs <pod> -n <namespace>`. 396 func (kub *Kubectl) Logs(namespace string, pod string) *CmdRes { 397 return kub.Exec( 398 fmt.Sprintf("%s -n %s logs %s", KubectlCmd, namespace, pod)) 399 } 400 401 // MicroscopeStart installs (if it is not installed) a new microscope pod, 402 // waits until pod is ready, and runs microscope in background. It returns an 403 // error in the case where microscope cannot be installed, or it is not ready after 404 // a timeout. It also returns a callback function to stop the monitor and save 405 // the output to `helpers.monitorLogFileName` file. Takes an optional list of 406 // arguments to pass to mircoscope. 407 func (kub *Kubectl) MicroscopeStart(microscopeOptions ...string) (error, func() error) { 408 if !EnableMicroscope { 409 return nil, func() error { return nil } 410 } 411 412 microscope := "microscope" 413 var microscopeCmd string 414 if len(microscopeOptions) == 0 { 415 microscopeCmd = "microscope" 416 } else { 417 microscopeCmd = fmt.Sprintf("%s %s", microscope, strings.Join(microscopeOptions, " ")) 418 } 419 var microscopeCmdWithTimestamps = microscopeCmd + "| ts '[%Y-%m-%d %H:%M:%S]'" 420 var cb = func() error { return nil } 421 cmd := fmt.Sprintf("%[1]s -ti -n %[2]s exec %[3]s -- %[4]s", 422 KubectlCmd, KubeSystemNamespace, microscope, microscopeCmdWithTimestamps) 423 microscopePath := ManifestGet(microscopeManifest) 424 _ = kub.ApplyDefault(microscopePath) 425 426 err := kub.WaitforPods( 427 KubeSystemNamespace, 428 fmt.Sprintf("-l k8s-app=%s", microscope), 429 HelperTimeout) 430 if err != nil { 431 return err, cb 432 } 433 434 ctx, cancel := context.WithCancel(context.Background()) 435 res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true}) 436 437 cb = func() error { 438 cancel() 439 <-ctx.Done() 440 testPath, err := CreateReportDirectory() 441 if err != nil { 442 kub.logger.WithError(err).Errorf( 443 "cannot create test results path '%s'", testPath) 444 return err 445 } 446 447 err = WriteOrAppendToFile( 448 filepath.Join(testPath, MonitorLogFileName), 449 res.CombineOutput().Bytes(), 450 LogPerm) 451 if err != nil { 452 log.WithError(err).Errorf("cannot create monitor log file") 453 return err 454 } 455 res := kub.Exec(fmt.Sprintf("%s -n %s delete pod --grace-period=0 --force microscope", KubectlCmd, KubeSystemNamespace)) 456 if !res.WasSuccessful() { 457 return fmt.Errorf("error deleting microscope pod: %s", res.OutputPrettyPrint()) 458 } 459 return nil 460 } 461 462 return nil, cb 463 } 464 465 // MonitorStart runs cilium monitor in the background and dumps the contents 466 // into a log file for later debugging 467 func (kub *Kubectl) MonitorStart(namespace, pod, filename string) func() error { 468 cmd := fmt.Sprintf("%s exec -n %s %s -- cilium monitor -v", KubectlCmd, namespace, pod) 469 ctx, cancel := context.WithCancel(context.Background()) 470 res := kub.ExecInBackground(ctx, cmd, ExecOptions{SkipLog: true}) 471 472 cb := func() error { 473 cancel() 474 <-ctx.Done() 475 testPath, err := CreateReportDirectory() 476 if err != nil { 477 kub.logger.WithError(err).Errorf( 478 "cannot create test results path '%s'", testPath) 479 return err 480 } 481 482 err = WriteOrAppendToFile( 483 filepath.Join(testPath, filename), 484 res.CombineOutput().Bytes(), 485 LogPerm) 486 if err != nil { 487 log.WithError(err).Errorf("cannot create monitor log file %s", filename) 488 return err 489 } 490 return nil 491 } 492 493 return cb 494 } 495 496 // BackgroundReport dumps the result of the given commands on cilium pods each 497 // five seconds. 498 func (kub *Kubectl) BackgroundReport(commands ...string) (context.CancelFunc, error) { 499 backgroundCtx, cancel := context.WithCancel(context.Background()) 500 pods, err := kub.GetCiliumPods(KubeSystemNamespace) 501 if err != nil { 502 return cancel, fmt.Errorf("Cannot retrieve cilium pods: %s", err) 503 } 504 retrieveInfo := func() { 505 for _, pod := range pods { 506 for _, cmd := range commands { 507 kub.CiliumExec(pod, cmd) 508 } 509 } 510 } 511 go func(ctx context.Context) { 512 ticker := time.NewTicker(5 * time.Second) 513 defer ticker.Stop() 514 for { 515 select { 516 case <-ctx.Done(): 517 return 518 case <-ticker.C: 519 retrieveInfo() 520 } 521 } 522 }(backgroundCtx) 523 return cancel, nil 524 } 525 526 // PprofReport runs pprof on cilium nodes each 5 minutes and saves the data 527 // into the test folder saved with pprof suffix. 528 func (kub *Kubectl) PprofReport() { 529 PProfCadence := 5 * time.Minute 530 ticker := time.NewTicker(PProfCadence) 531 log := kub.logger.WithField("subsys", "pprofReport") 532 533 retrievePProf := func(pod, testPath string) { 534 res := kub.ExecPodCmd(KubeSystemNamespace, pod, "gops pprof-cpu 1") 535 if !res.WasSuccessful() { 536 log.Errorf("cannot execute pprof: %s", res.OutputPrettyPrint()) 537 return 538 } 539 files := kub.ExecPodCmd(KubeSystemNamespace, pod, `ls -1 /tmp/`) 540 for _, file := range files.ByLines() { 541 if !strings.Contains(file, "profile") { 542 continue 543 } 544 545 dest := filepath.Join( 546 BasePath, testPath, 547 fmt.Sprintf("%s-profile-%s.pprof", pod, file)) 548 _ = kub.Exec(fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s %[5]s", 549 KubectlCmd, KubeSystemNamespace, pod, file, dest), 550 ExecOptions{SkipLog: true}) 551 552 _ = kub.ExecPodCmd(KubeSystemNamespace, pod, fmt.Sprintf( 553 "rm %s", filepath.Join("/tmp/", file))) 554 } 555 } 556 557 for { 558 select { 559 case <-ticker.C: 560 561 testPath, err := CreateReportDirectory() 562 if err != nil { 563 log.WithError(err).Errorf("cannot create test result path '%s'", testPath) 564 return 565 } 566 567 pods, err := kub.GetCiliumPods(KubeSystemNamespace) 568 if err != nil { 569 log.Errorf("cannot get cilium pods") 570 } 571 572 for _, pod := range pods { 573 retrievePProf(pod, testPath) 574 } 575 576 } 577 } 578 } 579 580 // NodeCleanMetadata annotates each node in the Kubernetes cluster with the 581 // annotation.V4CIDRName and annotation.V6CIDRName annotations. It returns an 582 // error if the nodes cannot be retrieved via the Kubernetes API. 583 func (kub *Kubectl) NodeCleanMetadata() error { 584 metadata := []string{ 585 annotation.V4CIDRName, 586 annotation.V6CIDRName, 587 } 588 589 data := kub.ExecShort(fmt.Sprintf("%s get nodes -o jsonpath='{.items[*].metadata.name}'", KubectlCmd)) 590 if !data.WasSuccessful() { 591 return fmt.Errorf("could not get nodes via %s: %s", KubectlCmd, data.CombineOutput()) 592 } 593 for _, node := range strings.Split(data.Output().String(), " ") { 594 for _, label := range metadata { 595 kub.ExecShort(fmt.Sprintf("%s annotate --overwrite nodes %s %s=''", KubectlCmd, node, label)) 596 } 597 } 598 return nil 599 } 600 601 // NamespaceCreate creates a new Kubernetes namespace with the given name 602 func (kub *Kubectl) NamespaceCreate(name string) *CmdRes { 603 ginkgoext.By("Creating namespace %s", name) 604 return kub.ExecShort(fmt.Sprintf("%s create namespace %s", KubectlCmd, name)) 605 } 606 607 // NamespaceDelete deletes a given Kubernetes namespace 608 func (kub *Kubectl) NamespaceDelete(name string) *CmdRes { 609 return kub.ExecShort(fmt.Sprintf("%s delete namespace %s", KubectlCmd, name)) 610 } 611 612 // WaitforPods waits up until timeout seconds have elapsed for all pods in the 613 // specified namespace that match the provided JSONPath filter to have their 614 // containterStatuses equal to "ready". Returns true if all pods achieve 615 // the aforementioned desired state within timeout seconds. Returns false and 616 // an error if the command failed or the timeout was exceeded. 617 func (kub *Kubectl) WaitforPods(namespace string, filter string, timeout time.Duration) error { 618 return kub.waitForNPods(checkReady, namespace, filter, 0, timeout) 619 } 620 621 // checkPodStatusFunc returns true if the pod is in the desired state, or false 622 // otherwise. 623 type checkPodStatusFunc func(v1.Pod) bool 624 625 // checkRunning checks that the pods are running, but not necessarily ready. 626 func checkRunning(pod v1.Pod) bool { 627 if pod.Status.Phase != v1.PodRunning || pod.ObjectMeta.DeletionTimestamp != nil { 628 return false 629 } 630 return true 631 } 632 633 // checkReady determines whether the pods are running and ready. 634 func checkReady(pod v1.Pod) bool { 635 if !checkRunning(pod) { 636 return false 637 } 638 639 for _, container := range pod.Status.ContainerStatuses { 640 if !container.Ready { 641 return false 642 } 643 } 644 return true 645 } 646 647 // WaitforNPodsRunning waits up until timeout duration has elapsed for at least 648 // minRequired pods in the specified namespace that match the provided JSONPath 649 // filter to have their containterStatuses equal to "running". 650 // Returns no error if minRequired pods achieve the aforementioned desired 651 // state within timeout seconds. Returns an error if the command failed or the 652 // timeout was exceeded. 653 // When minRequired is 0, the function will derive required pod count from number 654 // of pods in the cluster for every iteration. 655 func (kub *Kubectl) WaitforNPodsRunning(namespace string, filter string, minRequired int, timeout time.Duration) error { 656 return kub.waitForNPods(checkRunning, namespace, filter, minRequired, timeout) 657 } 658 659 // WaitforNPods waits up until timeout seconds have elapsed for at least 660 // minRequired pods in the specified namespace that match the provided JSONPath 661 // filter to have their containterStatuses equal to "ready". 662 // Returns no error if minRequired pods achieve the aforementioned desired 663 // state within timeout seconds. Returns an error if the command failed or the 664 // timeout was exceeded. 665 // When minRequired is 0, the function will derive required pod count from number 666 // of pods in the cluster for every iteration. 667 func (kub *Kubectl) WaitforNPods(namespace string, filter string, minRequired int, timeout time.Duration) error { 668 return kub.waitForNPods(checkReady, namespace, filter, minRequired, timeout) 669 } 670 671 func (kub *Kubectl) waitForNPods(checkStatus checkPodStatusFunc, namespace string, filter string, minRequired int, timeout time.Duration) error { 672 body := func() bool { 673 podList := &v1.PodList{} 674 err := kub.GetPods(namespace, filter).Unmarshal(podList) 675 if err != nil { 676 kub.logger.Infof("Error while getting PodList: %s", err) 677 return false 678 } 679 680 if len(podList.Items) == 0 { 681 return false 682 } 683 684 var required int 685 686 if minRequired == 0 { 687 required = len(podList.Items) 688 } else { 689 required = minRequired 690 } 691 692 if len(podList.Items) < required { 693 return false 694 } 695 696 // For each pod, count it as running when all conditions are true: 697 // - It is scheduled via Phase == v1.PodRunning 698 // - It is not scheduled for deletion when DeletionTimestamp is set 699 // - All containers in the pod have passed the liveness check via 700 // containerStatuses.Ready 701 currScheduled := 0 702 for _, pod := range podList.Items { 703 if checkStatus(pod) { 704 currScheduled++ 705 } 706 } 707 708 return currScheduled >= required 709 } 710 711 return WithTimeout( 712 body, 713 fmt.Sprintf("timed out waiting for pods with filter %s to be ready", filter), 714 &TimeoutConfig{Timeout: timeout}) 715 } 716 717 // WaitForServiceEndpoints waits up until timeout seconds have elapsed for all 718 // endpoints in the specified namespace that match the provided JSONPath 719 // filter. Returns true if all pods achieve the aforementioned desired state 720 // within timeout seconds. Returns false and an error if the command failed or 721 // the timeout was exceeded. 722 func (kub *Kubectl) WaitForServiceEndpoints(namespace string, filter string, service string, timeout time.Duration) error { 723 body := func() bool { 724 var jsonPath = fmt.Sprintf("{.items[?(@.metadata.name == '%s')].subsets[0].ports[0].port}", service) 725 data, err := kub.GetEndpoints(namespace, filter).Filter(jsonPath) 726 727 if err != nil { 728 kub.logger.WithError(err) 729 return false 730 } 731 732 if data.String() != "" { 733 return true 734 } 735 736 kub.logger.WithFields(logrus.Fields{ 737 "namespace": namespace, 738 "filter": filter, 739 "data": data, 740 "service": service, 741 }).Info("WaitForServiceEndpoints: service endpoint not ready") 742 return false 743 } 744 745 return WithTimeout(body, "could not get service endpoints", &TimeoutConfig{Timeout: timeout}) 746 } 747 748 // Action performs the specified ResourceLifeCycleAction on the Kubernetes 749 // manifest located at path filepath in the given namespace 750 func (kub *Kubectl) Action(action ResourceLifeCycleAction, filePath string, namespace ...string) *CmdRes { 751 if len(namespace) == 0 { 752 kub.logger.Debugf("performing '%v' on '%v'", action, filePath) 753 return kub.ExecShort(fmt.Sprintf("%s %s -f %s", KubectlCmd, action, filePath)) 754 } 755 756 kub.logger.Debugf("performing '%v' on '%v' in namespace '%v'", action, filePath, namespace[0]) 757 return kub.ExecShort(fmt.Sprintf("%s %s -f %s -n %s", KubectlCmd, action, filePath, namespace[0])) 758 } 759 760 // ApplyOptions stores options for kubectl apply command 761 type ApplyOptions struct { 762 FilePath string 763 Namespace string 764 Force bool 765 DryRun bool 766 Output string 767 Piped string 768 } 769 770 // Apply applies the Kubernetes manifest located at path filepath. 771 func (kub *Kubectl) Apply(options ApplyOptions) *CmdRes { 772 var force string 773 if options.Force { 774 force = "--force=true" 775 } else { 776 force = "--force=false" 777 } 778 779 cmd := fmt.Sprintf("%s apply %s -f %s", KubectlCmd, force, options.FilePath) 780 781 if options.DryRun { 782 cmd = cmd + " --dry-run" 783 } 784 785 if len(options.Output) > 0 { 786 cmd = cmd + " -o " + options.Output 787 } 788 789 if len(options.Namespace) == 0 { 790 kub.logger.Debugf("applying %s", options.FilePath) 791 } else { 792 kub.logger.Debugf("applying %s in namespace %s", options.FilePath, options.Namespace) 793 cmd = cmd + " -n " + options.Namespace 794 } 795 796 if len(options.Piped) > 0 { 797 cmd = options.Piped + " | " + cmd 798 } 799 return kub.ExecMiddle(cmd) 800 } 801 802 // ApplyDefault applies give filepath with other options set to default 803 func (kub *Kubectl) ApplyDefault(filePath string) *CmdRes { 804 return kub.Apply(ApplyOptions{FilePath: filePath}) 805 } 806 807 // Create creates the Kubernetes kanifest located at path filepath. 808 func (kub *Kubectl) Create(filePath string) *CmdRes { 809 kub.logger.Debugf("creating %s", filePath) 810 return kub.ExecShort( 811 fmt.Sprintf("%s create -f %s", KubectlCmd, filePath)) 812 } 813 814 // CreateResource is a wrapper around `kubernetes create <resource> 815 // <resourceName>. 816 func (kub *Kubectl) CreateResource(resource, resourceName string) *CmdRes { 817 kub.logger.Debug(fmt.Sprintf("creating resource %s with name %s", resource, resourceName)) 818 return kub.ExecShort(fmt.Sprintf("kubectl create %s %s", resource, resourceName)) 819 } 820 821 // DeleteResource is a wrapper around `kubernetes delete <resource> 822 // resourceName>. 823 func (kub *Kubectl) DeleteResource(resource, resourceName string) *CmdRes { 824 kub.logger.Debug(fmt.Sprintf("deleting resource %s with name %s", resource, resourceName)) 825 return kub.Exec(fmt.Sprintf("kubectl delete %s %s", resource, resourceName)) 826 } 827 828 // Delete deletes the Kubernetes manifest at path filepath. 829 func (kub *Kubectl) Delete(filePath string) *CmdRes { 830 kub.logger.Debugf("deleting %s", filePath) 831 return kub.ExecShort( 832 fmt.Sprintf("%s delete -f %s", KubectlCmd, filePath)) 833 } 834 835 // WaitKubeDNS waits until the kubeDNS pods are ready. In case of exceeding the 836 // default timeout it returns an error. 837 func (kub *Kubectl) WaitKubeDNS() error { 838 return kub.WaitforPods(KubeSystemNamespace, fmt.Sprintf("-l %s", kubeDNSLabel), DNSHelperTimeout) 839 } 840 841 // WaitForKubeDNSEntry waits until the given DNS entry exists in the kube-dns 842 // service. If the container is not ready after timeout it returns an error. The 843 // name's format query should be `${name}.${namespace}`. If `svc.cluster.local` 844 // is not present, it appends to the given name and it checks the service's FQDN. 845 func (kub *Kubectl) WaitForKubeDNSEntry(serviceName, serviceNamespace string) error { 846 svcSuffix := "svc.cluster.local" 847 logger := kub.logger.WithFields(logrus.Fields{"serviceName": serviceName, "serviceNamespace": serviceNamespace}) 848 849 serviceNameWithNamespace := fmt.Sprintf("%s.%s", serviceName, serviceNamespace) 850 if !strings.HasSuffix(serviceNameWithNamespace, svcSuffix) { 851 serviceNameWithNamespace = fmt.Sprintf("%s.%s", serviceNameWithNamespace, svcSuffix) 852 } 853 // https://bugs.launchpad.net/ubuntu/+source/bind9/+bug/854705 854 digCMD := "dig +short %s @%s | grep -v -e '^;'" 855 856 // If it fails we want to know if it's because of connection cannot be 857 // established or DNS does not exist. 858 digCMDFallback := "dig +tcp %s @%s" 859 860 dnsClusterIP, _, err := kub.GetServiceHostPort(KubeSystemNamespace, "kube-dns") 861 if err != nil { 862 logger.WithError(err).Error("cannot get kube-dns service IP") 863 return err 864 } 865 866 body := func() bool { 867 serviceIP, _, err := kub.GetServiceHostPort(serviceNamespace, serviceName) 868 if err != nil { 869 log.WithError(err).Errorf("cannot get service IP for service %s", serviceNameWithNamespace) 870 return false 871 } 872 873 // ClusterIPNone denotes that this service is headless; there is no 874 // service IP for this service, and thus the IP returned by `dig` is 875 // an IP of the pod itself, not ClusterIPNone, which is what Kubernetes 876 // shows as the IP for the service for headless services. 877 if serviceIP == v1.ClusterIPNone { 878 res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP)) 879 _ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP)) 880 return res.WasSuccessful() 881 } 882 log.Debugf("service is not headless; checking whether IP retrieved from DNS matches the IP for the service stored in Kubernetes") 883 res := kub.ExecShort(fmt.Sprintf(digCMD, serviceNameWithNamespace, dnsClusterIP)) 884 serviceIPFromDNS := res.SingleOut() 885 if !govalidator.IsIP(serviceIPFromDNS) { 886 logger.Debugf("output of dig (%s) did not return an IP", serviceIPFromDNS) 887 return false 888 } 889 890 // Due to lag between new IPs for the same service being synced between 891 // kube-apiserver and DNS, check if the IP for the service that is 892 // stored in K8s matches the IP of the service cached in DNS. These 893 // can be different, because some tests use the same service names. 894 // Wait accordingly for services to match, and for resolving the service 895 // name to resolve via DNS. 896 if !strings.Contains(serviceIPFromDNS, serviceIP) { 897 logger.Debugf("service IP retrieved from DNS (%s) does not match the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP) 898 _ = kub.ExecShort(fmt.Sprintf(digCMDFallback, serviceNameWithNamespace, dnsClusterIP)) 899 return false 900 } 901 logger.Debugf("service IP retrieved from DNS (%s) matches the IP for the service stored in Kubernetes (%s)", serviceIPFromDNS, serviceIP) 902 return true 903 } 904 905 return WithTimeout( 906 body, 907 fmt.Sprintf("DNS '%s' is not ready after timeout", serviceNameWithNamespace), 908 &TimeoutConfig{Timeout: DNSHelperTimeout}) 909 } 910 911 // WaitCleanAllTerminatingPods waits until all nodes that are in `Terminating` 912 // state are deleted correctly in the platform. In case of excedding the 913 // given timeout (in seconds) it returns an error 914 func (kub *Kubectl) WaitCleanAllTerminatingPods(timeout time.Duration) error { 915 body := func() bool { 916 res := kub.ExecShort(fmt.Sprintf( 917 "%s get pods --all-namespaces -o jsonpath='{.items[*].metadata.deletionTimestamp}'", 918 KubectlCmd)) 919 if !res.WasSuccessful() { 920 return false 921 } 922 923 if res.Output().String() == "" { 924 // Output is empty so no terminating containers 925 return true 926 } 927 928 podsTerminating := len(strings.Split(res.Output().String(), " ")) 929 kub.logger.WithField("Terminating pods", podsTerminating).Info("List of pods terminating") 930 if podsTerminating > 0 { 931 return false 932 } 933 return true 934 } 935 936 err := WithTimeout( 937 body, 938 "Pods are still not deleted after a timeout", 939 &TimeoutConfig{Timeout: timeout}) 940 return err 941 } 942 943 // DeployPatch deploys the original kubernetes descriptor with the given patch. 944 func (kub *Kubectl) DeployPatch(original, patch string) error { 945 // debugYaml only dumps the full created yaml file to the test output if 946 // the cilium manifest can not be created correctly. 947 debugYaml := func(original, patch string) { 948 // dry-run is only available since k8s 1.11 949 switch GetCurrentK8SEnv() { 950 case "1.8", "1.9", "1.10": 951 _ = kub.ExecShort(fmt.Sprintf( 952 `%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`, 953 KubectlCmd, original, patch)) 954 default: 955 _ = kub.ExecShort(fmt.Sprintf( 956 `%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run -o yaml`, 957 KubectlCmd, original, patch)) 958 } 959 } 960 961 var res *CmdRes 962 // validation 1st 963 // dry-run is only available since k8s 1.11 964 switch GetCurrentK8SEnv() { 965 case "1.8", "1.9", "1.10": 966 default: 967 res = kub.ExecShort(fmt.Sprintf( 968 `%s patch --filename='%s' --patch "$(cat '%s')" --local --dry-run`, 969 KubectlCmd, original, patch)) 970 if !res.WasSuccessful() { 971 debugYaml(original, patch) 972 return res.GetErr("Cilium patch validation failed") 973 } 974 } 975 976 res = kub.Apply(ApplyOptions{ 977 FilePath: "-", 978 Force: true, 979 Piped: fmt.Sprintf( 980 `%s patch --filename='%s' --patch "$(cat '%s')" --local -o yaml`, 981 KubectlCmd, original, patch), 982 }) 983 if !res.WasSuccessful() { 984 debugYaml(original, patch) 985 return res.GetErr("Cilium manifest patch instalation failed") 986 } 987 return nil 988 } 989 990 // ciliumInstall installs all Cilium descriptors into kubernetes. 991 // dsPatchName corresponds to the DaemonSet patch, found by 992 // getK8sDescriptorPatch, that will be applied to the original Cilium DaemonSet 993 // descriptor, found by getK8sDescriptor. 994 // cmPatchName corresponds to the ConfigMap patch, found by 995 // getK8sDescriptorPatch, that will be applied to the original Cilium ConfigMap 996 // descriptor, found by getK8sDescriptor. 997 // Returns an error if any patch or if any original descriptors files were not 998 // found. 999 func (kub *Kubectl) ciliumInstall(dsPatchName, cmPatchName string, getK8sDescriptor, getK8sDescriptorPatch func(filename string) string) error { 1000 cmPathname := getK8sDescriptor("cilium-cm.yaml") 1001 if cmPathname == "" { 1002 return fmt.Errorf("Cilium ConfigMap descriptor not found") 1003 } 1004 dsPathname := getK8sDescriptor("cilium-ds.yaml") 1005 if dsPathname == "" { 1006 return fmt.Errorf("Cilium DaemonSet descriptor not found") 1007 } 1008 rbacPathname := getK8sDescriptor("cilium-rbac.yaml") 1009 if rbacPathname == "" { 1010 return fmt.Errorf("Cilium RBAC descriptor not found") 1011 } 1012 1013 deployOriginal := func(original string) error { 1014 // debugYaml only dumps the full created yaml file to the test output if 1015 // the cilium manifest can not be created correctly. 1016 debugYaml := func(original string) { 1017 kub.Apply(ApplyOptions{ 1018 FilePath: original, 1019 DryRun: true, 1020 Output: "yaml", 1021 }) 1022 } 1023 1024 // validation 1st 1025 res := kub.Apply(ApplyOptions{ 1026 FilePath: original, 1027 DryRun: true, 1028 }) 1029 if !res.WasSuccessful() { 1030 debugYaml(original) 1031 return res.GetErr("Cilium manifest validation fails") 1032 } 1033 1034 res = kub.ApplyDefault(original) 1035 if !res.WasSuccessful() { 1036 debugYaml(original) 1037 return res.GetErr("Cannot apply Cilium manifest") 1038 } 1039 return nil 1040 } 1041 1042 if err := deployOriginal(rbacPathname); err != nil { 1043 return err 1044 } 1045 1046 if err := kub.DeployPatch(cmPathname, getK8sDescriptorPatch(cmPatchName)); err != nil { 1047 return err 1048 } 1049 1050 if err := kub.DeployPatch(dsPathname, getK8sDescriptorPatch(dsPatchName)); err != nil { 1051 return err 1052 } 1053 1054 cmdRes := kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorSA)) 1055 if !cmdRes.WasSuccessful() { 1056 return fmt.Errorf("Unable to deploy descriptor of etcd-operator SA %s: %s", ciliumEtcdOperatorSA, cmdRes.OutputPrettyPrint()) 1057 } 1058 1059 cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperatorRBAC)) 1060 if !cmdRes.WasSuccessful() { 1061 return fmt.Errorf("Unable to deploy descriptor of etcd-operator RBAC %s: %s", ciliumEtcdOperatorRBAC, cmdRes.OutputPrettyPrint()) 1062 } 1063 1064 cmdRes = kub.ApplyDefault(getK8sDescriptor(ciliumEtcdOperator)) 1065 if !cmdRes.WasSuccessful() { 1066 return fmt.Errorf("Unable to deploy descriptor of etcd-operator %s: %s", ciliumEtcdOperator, cmdRes.OutputPrettyPrint()) 1067 } 1068 1069 _ = kub.ApplyDefault(getK8sDescriptor("cilium-operator-sa.yaml")) 1070 err := kub.DeployPatch(getK8sDescriptor("cilium-operator.yaml"), getK8sDescriptorPatch("cilium-operator-patch.yaml")) 1071 if err != nil { 1072 return fmt.Errorf("Unable to deploy descriptor of cilium-operators: %s", err) 1073 } 1074 1075 return nil 1076 } 1077 1078 func addIfNotOverwritten(options []string, field, value string) []string { 1079 for _, s := range options { 1080 if strings.HasPrefix(s, "--set "+field) { 1081 return options 1082 } 1083 } 1084 1085 options = append(options, "--set "+field+"="+value) 1086 return options 1087 } 1088 1089 func (kub *Kubectl) generateCiliumYaml(options []string, filename string) error { 1090 for key, value := range defaultHelmOptions { 1091 options = addIfNotOverwritten(options, key, value) 1092 } 1093 1094 switch GetCurrentIntegration() { 1095 case CIIntegrationFlannel: 1096 // Appending the options will override earlier options on CLI. 1097 for k, v := range flannelHelmOverrides { 1098 options = append(options, fmt.Sprintf("--set %s=%s", k, v)) 1099 } 1100 default: 1101 } 1102 1103 // TODO GH-8753: Use helm rendering library instead of shelling out to 1104 // helm template 1105 res := kub.ExecMiddle(fmt.Sprintf("helm template %s --namespace=kube-system %s > %s", 1106 HelmTemplate, strings.Join(options, " "), filename)) 1107 if !res.WasSuccessful() { 1108 return res.GetErr("Unable to generate YAML") 1109 } 1110 1111 return nil 1112 } 1113 1114 // ciliumInstallHelm installs Cilium with the Helm options provided. 1115 func (kub *Kubectl) ciliumInstallHelm(options []string) error { 1116 if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil { 1117 return err 1118 } 1119 1120 res := kub.Apply(ApplyOptions{FilePath: "cilium.yaml", Force: true}) 1121 if !res.WasSuccessful() { 1122 return res.GetErr("Unable to apply YAML") 1123 } 1124 1125 return nil 1126 } 1127 1128 // ciliumUninstallHelm uninstalls Cilium with the Helm options provided. 1129 func (kub *Kubectl) ciliumUninstallHelm(options []string) error { 1130 if err := kub.generateCiliumYaml(options, "cilium.yaml"); err != nil { 1131 return err 1132 } 1133 1134 res := kub.Delete("cilium.yaml") 1135 if !res.WasSuccessful() { 1136 return res.GetErr("Unable to delete YAML") 1137 } 1138 1139 return nil 1140 } 1141 1142 // CiliumInstall installs Cilium with the provided Helm options. 1143 func (kub *Kubectl) CiliumInstall(options []string) error { 1144 return kub.ciliumInstallHelm(options) 1145 } 1146 1147 // CiliumUninstall uninstalls Cilium with the provided Helm options. 1148 func (kub *Kubectl) CiliumUninstall(options []string) error { 1149 return kub.ciliumUninstallHelm(options) 1150 } 1151 1152 // CiliumInstallVersion installs all Cilium descriptors into kubernetes for 1153 // a given Cilium Version tag. 1154 // dsPatchName corresponds to the DaemonSet patch that will be applied to the 1155 // original Cilium DaemonSet descriptor of that given Cilium Version tag. 1156 // cmPatchName corresponds to the ConfigMap patch that will be applied to the 1157 // original Cilium ConfigMap descriptor of that given Cilium Version tag. 1158 // Returns an error if any patch or if any original descriptors files were not 1159 // found. 1160 func (kub *Kubectl) CiliumInstallVersion(dsPatchName, cmPatchName, versionTag string) error { 1161 getK8sDescriptorPatch := func(filename string) string { 1162 // try dependent Cilium, k8s and integration version patch file 1163 ginkgoVersionedPath := filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), GetCurrentIntegration(), filename) 1164 _, err := os.Stat(ginkgoVersionedPath) 1165 if err == nil { 1166 return filepath.Join(BasePath, ginkgoVersionedPath) 1167 } 1168 // try dependent Cilium version and integration patch file 1169 ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentIntegration(), filename) 1170 _, err = os.Stat(ginkgoVersionedPath) 1171 if err == nil { 1172 return filepath.Join(BasePath, ginkgoVersionedPath) 1173 } 1174 // try dependent Cilium and k8s version patch file 1175 ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, GetCurrentK8SEnv(), filename) 1176 _, err = os.Stat(ginkgoVersionedPath) 1177 if err == nil { 1178 return filepath.Join(BasePath, ginkgoVersionedPath) 1179 } 1180 // try dependent Cilium version patch file 1181 ginkgoVersionedPath = filepath.Join(manifestsPath, versionTag, filename) 1182 _, err = os.Stat(ginkgoVersionedPath) 1183 if err == nil { 1184 return filepath.Join(BasePath, ginkgoVersionedPath) 1185 } 1186 // try dependent integration patch file 1187 ginkgoVersionedPath = filepath.Join(manifestsPath, GetCurrentIntegration(), filename) 1188 _, err = os.Stat(ginkgoVersionedPath) 1189 if err == nil { 1190 return filepath.Join(BasePath, ginkgoVersionedPath) 1191 } 1192 return filepath.Join(BasePath, manifestsPath, filename) 1193 } 1194 getK8sDescriptor := func(filename string) string { 1195 return fmt.Sprintf("https://raw.githubusercontent.com/cilium/cilium/%s/examples/kubernetes/%s/%s", versionTag, GetCurrentK8SEnv(), filename) 1196 } 1197 return kub.ciliumInstall(dsPatchName, cmPatchName, getK8sDescriptor, getK8sDescriptorPatch) 1198 } 1199 1200 // GetCiliumPods returns a list of all Cilium pods in the specified namespace, 1201 // and an error if the Cilium pods were not able to be retrieved. 1202 func (kub *Kubectl) GetCiliumPods(namespace string) ([]string, error) { 1203 return kub.GetPodNames(namespace, "k8s-app=cilium") 1204 } 1205 1206 // GetCiliumPodsContext returns a list of all Cilium pods in the specified 1207 // namespace, and an error if the Cilium pods were not able to be retrieved. 1208 func (kub *Kubectl) GetCiliumPodsContext(ctx context.Context, namespace string) ([]string, error) { 1209 return kub.GetPodNamesContext(ctx, namespace, "k8s-app=cilium") 1210 } 1211 1212 // CiliumEndpointsList returns the result of `cilium endpoint list` from the 1213 // specified pod. 1214 func (kub *Kubectl) CiliumEndpointsList(ctx context.Context, pod string) *CmdRes { 1215 return kub.CiliumExecContext(ctx, pod, "cilium endpoint list -o json") 1216 } 1217 1218 // CiliumEndpointsStatus returns a mapping of a pod name to it is corresponding 1219 // endpoint's status 1220 func (kub *Kubectl) CiliumEndpointsStatus(pod string) map[string]string { 1221 filter := `{range [*]}{@.status.external-identifiers.pod-name}{"="}{@.status.state}{"\n"}{end}` 1222 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1223 defer cancel() 1224 return kub.CiliumExecContext(ctx, pod, fmt.Sprintf( 1225 "cilium endpoint list -o jsonpath='%s'", filter)).KVOutput() 1226 } 1227 1228 // CiliumEndpointWaitReady waits until all endpoints managed by all Cilium pod 1229 // are ready. Returns an error if the Cilium pods cannot be retrieved via 1230 // Kubernetes, or endpoints are not ready after a specified timeout 1231 func (kub *Kubectl) CiliumEndpointWaitReady() error { 1232 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 1233 if err != nil { 1234 kub.logger.WithError(err).Error("cannot get Cilium pods") 1235 return err 1236 } 1237 1238 body := func(ctx context.Context) (bool, error) { 1239 var wg sync.WaitGroup 1240 queue := make(chan bool, len(ciliumPods)) 1241 endpointsReady := func(pod string) { 1242 valid := false 1243 defer func() { 1244 queue <- valid 1245 wg.Done() 1246 }() 1247 logCtx := kub.logger.WithField("pod", pod) 1248 status, err := kub.CiliumEndpointsList(ctx, pod).Filter(`{range [*]}{.status.state}{"="}{.status.identity.id}{"\n"}{end}`) 1249 if err != nil { 1250 logCtx.WithError(err).Errorf("cannot get endpoints states on Cilium pod") 1251 return 1252 } 1253 total := 0 1254 invalid := 0 1255 for _, line := range strings.Split(status.String(), "\n") { 1256 if line == "" { 1257 continue 1258 } 1259 // each line is like status=identityID. 1260 // IdentityID is needed because the reserved:init identity 1261 // means that the pod is not ready to accept traffic. 1262 total++ 1263 vals := strings.Split(line, "=") 1264 if len(vals) != 2 { 1265 logCtx.Errorf("Endpoint list does not have a correct output '%s'", line) 1266 return 1267 } 1268 if vals[0] != "ready" { 1269 invalid++ 1270 } 1271 // Consider an endpoint with reserved identity 5 (reserved:init) as not ready. 1272 if vals[1] == "5" { 1273 invalid++ 1274 } 1275 } 1276 logCtx.WithFields(logrus.Fields{ 1277 "total": total, 1278 "invalid": invalid, 1279 }).Info("Waiting for cilium endpoints to be ready") 1280 1281 if invalid != 0 { 1282 return 1283 } 1284 valid = true 1285 return 1286 } 1287 wg.Add(len(ciliumPods)) 1288 for _, pod := range ciliumPods { 1289 go endpointsReady(pod) 1290 } 1291 1292 wg.Wait() 1293 close(queue) 1294 1295 for status := range queue { 1296 if status == false { 1297 return false, nil 1298 } 1299 } 1300 return true, nil 1301 } 1302 1303 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1304 defer cancel() 1305 err = WithContext(ctx, body, 1*time.Second) 1306 if err == nil { 1307 return err 1308 } 1309 1310 callback := func() string { 1311 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1312 defer cancel() 1313 1314 var errorMessage string 1315 for _, pod := range ciliumPods { 1316 var endpoints []models.Endpoint 1317 cmdRes := kub.CiliumEndpointsList(ctx, pod) 1318 if !cmdRes.WasSuccessful() { 1319 errorMessage += fmt.Sprintf( 1320 "\tCilium Pod: %s \terror: unable to get endpoint list: %s", 1321 pod, cmdRes.err) 1322 continue 1323 } 1324 err := cmdRes.Unmarshal(&endpoints) 1325 if err != nil { 1326 errorMessage += fmt.Sprintf( 1327 "\tCilium Pod: %s \terror: unable to parse endpoint list: %s", 1328 pod, err) 1329 continue 1330 } 1331 for _, ep := range endpoints { 1332 errorMessage += fmt.Sprintf( 1333 "\tCilium Pod: %s \tEndpoint: %d \tIdentity: %d\t State: %s\n", 1334 pod, ep.ID, ep.Status.Identity.ID, ep.Status.State) 1335 } 1336 } 1337 return errorMessage 1338 } 1339 return NewSSHMetaError(err.Error(), callback) 1340 } 1341 1342 // WaitForCEPIdentity waits for a particular CEP to have an identity present. 1343 func (kub *Kubectl) WaitForCEPIdentity(ns, podName string) error { 1344 body := func(ctx context.Context) (bool, error) { 1345 ep := kub.CepGet(ns, podName) 1346 if ep == nil { 1347 return false, nil 1348 } 1349 if ep.Identity == nil { 1350 return false, nil 1351 } 1352 return ep.Identity.ID != 0, nil 1353 } 1354 1355 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1356 defer cancel() 1357 return WithContext(ctx, body, 1*time.Second) 1358 } 1359 1360 // CiliumExecContext runs cmd in the specified Cilium pod with the given context. 1361 func (kub *Kubectl) CiliumExecContext(ctx context.Context, pod string, cmd string) *CmdRes { 1362 limitTimes := 5 1363 execute := func() *CmdRes { 1364 command := fmt.Sprintf("%s exec -n kube-system %s -- %s", KubectlCmd, pod, cmd) 1365 return kub.ExecContext(ctx, command) 1366 } 1367 var res *CmdRes 1368 // Sometimes Kubectl returns 126 exit code, It use to happen in Nightly 1369 // tests when a lot of exec are in place (Cgroups issue). The upstream 1370 // changes did not fix the isse, and we need to make this workaround to 1371 // avoid Kubectl issue. 1372 // https://github.com/openshift/origin/issues/16246 1373 for i := 0; i < limitTimes; i++ { 1374 res = execute() 1375 if res.GetExitCode() != 126 { 1376 break 1377 } 1378 time.Sleep(200 * time.Millisecond) 1379 } 1380 return res 1381 } 1382 1383 // CiliumExec runs cmd in the specified Cilium pod. 1384 // Deprecated: use CiliumExecContext instead 1385 func (kub *Kubectl) CiliumExec(pod string, cmd string) *CmdRes { 1386 ctx, cancel := context.WithTimeout(context.Background(), HelperTimeout) 1387 defer cancel() 1388 return kub.CiliumExecContext(ctx, pod, cmd) 1389 } 1390 1391 // CiliumExecUntilMatch executes the specified command repeatedly for the 1392 // specified Cilium pod until the given substring is present in stdout. 1393 // If the timeout is reached it will return an error. 1394 func (kub *Kubectl) CiliumExecUntilMatch(pod, cmd, substr string) error { 1395 body := func() bool { 1396 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1397 defer cancel() 1398 res := kub.CiliumExecContext(ctx, pod, cmd) 1399 return strings.Contains(res.Output().String(), substr) 1400 } 1401 1402 return WithTimeout( 1403 body, 1404 fmt.Sprintf("%s is not in the output after timeout", substr), 1405 &TimeoutConfig{Timeout: HelperTimeout}) 1406 } 1407 1408 // WaitForCiliumInitContainerToFinish waits for all Cilium init containers to 1409 // finish 1410 func (kub *Kubectl) WaitForCiliumInitContainerToFinish() error { 1411 body := func() bool { 1412 podList := &v1.PodList{} 1413 err := kub.GetPods("kube-system", "-l k8s-app=cilium").Unmarshal(podList) 1414 if err != nil { 1415 kub.logger.Infof("Error while getting PodList: %s", err) 1416 return false 1417 } 1418 if len(podList.Items) == 0 { 1419 return false 1420 } 1421 for _, pod := range podList.Items { 1422 for _, v := range pod.Status.InitContainerStatuses { 1423 if v.State.Terminated != nil && (v.State.Terminated.Reason != "Completed" || v.State.Terminated.ExitCode != 0) { 1424 kub.logger.WithFields(logrus.Fields{ 1425 "podName": pod.Name, 1426 "currentState": v.State.String(), 1427 }).Infof("Cilium Init container not completed") 1428 return false 1429 } 1430 } 1431 } 1432 return true 1433 } 1434 1435 return WithTimeout(body, "Cilium Init Container was not able to initialize or had a successful run", &TimeoutConfig{Timeout: HelperTimeout}) 1436 } 1437 1438 // CiliumNodesWait waits until all nodes in the Kubernetes cluster are annotated 1439 // with Cilium annotations. Its runtime is bounded by a maximum of `HelperTimeout`. 1440 // When a node is annotated with said annotations, it indicates 1441 // that the tunnels in the nodes are set up and that cross-node traffic can be 1442 // tested. Returns an error if the timeout is exceeded for waiting for the nodes 1443 // to be annotated. 1444 func (kub *Kubectl) CiliumNodesWait() (bool, error) { 1445 body := func() bool { 1446 filter := `{range .items[*]}{@.metadata.name}{"="}{@.metadata.annotations.io\.cilium\.network\.ipv4-pod-cidr}{"\n"}{end}` 1447 data := kub.ExecShort(fmt.Sprintf( 1448 "%s get nodes -o jsonpath='%s'", KubectlCmd, filter)) 1449 if !data.WasSuccessful() { 1450 return false 1451 } 1452 result := data.KVOutput() 1453 for k, v := range result { 1454 if v == "" { 1455 kub.logger.Infof("Kubernetes node '%v' does not have Cilium metadata", k) 1456 return false 1457 } 1458 kub.logger.Infof("Kubernetes node '%v' IPv4 address: '%v'", k, v) 1459 } 1460 return true 1461 } 1462 err := WithTimeout(body, "Kubernetes node does not have cilium metadata", &TimeoutConfig{Timeout: HelperTimeout}) 1463 if err != nil { 1464 return false, err 1465 } 1466 return true, nil 1467 } 1468 1469 // WaitPolicyDeleted waits for policy policyName to be deleted from the 1470 // cilium-agent running in pod. Returns an error if policyName was unable to 1471 // be deleted after some amount of time. 1472 func (kub *Kubectl) WaitPolicyDeleted(pod string, policyName string) error { 1473 body := func() bool { 1474 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1475 defer cancel() 1476 res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyName)) 1477 1478 // `cilium policy get <policy name>` fails if the policy is not loaded, 1479 // which is the condition we want. 1480 return !res.WasSuccessful() 1481 } 1482 1483 return WithTimeout(body, fmt.Sprintf("Policy %s was not deleted in time", policyName), &TimeoutConfig{Timeout: HelperTimeout}) 1484 } 1485 1486 // CiliumIsPolicyLoaded returns true if the policy is loaded in the given 1487 // cilium Pod. it returns false in case that the policy is not in place 1488 func (kub *Kubectl) CiliumIsPolicyLoaded(pod string, policyCmd string) bool { 1489 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1490 defer cancel() 1491 res := kub.CiliumExecContext(ctx, pod, fmt.Sprintf("cilium policy get %s", policyCmd)) 1492 return res.WasSuccessful() 1493 } 1494 1495 // CiliumPolicyRevision returns the policy revision in the specified Cilium pod. 1496 // Returns an error if the policy revision cannot be retrieved. 1497 func (kub *Kubectl) CiliumPolicyRevision(pod string) (int, error) { 1498 ctx, cancel := context.WithTimeout(context.Background(), ShortCommandTimeout) 1499 defer cancel() 1500 res := kub.CiliumExecContext(ctx, pod, "cilium policy get -o json") 1501 if !res.WasSuccessful() { 1502 return -1, fmt.Errorf("cannot get the revision %s", res.Output()) 1503 } 1504 1505 revision, err := res.Filter("{.revision}") 1506 if err != nil { 1507 return -1, fmt.Errorf("cannot get revision from json: %s", err) 1508 } 1509 1510 revi, err := strconv.Atoi(strings.Trim(revision.String(), "\n")) 1511 if err != nil { 1512 kub.logger.Errorf("revision on pod '%s' is not valid '%s'", pod, res.CombineOutput()) 1513 return -1, err 1514 } 1515 return revi, nil 1516 } 1517 1518 // ResourceLifeCycleAction represents an action performed upon objects in 1519 // Kubernetes. 1520 type ResourceLifeCycleAction string 1521 1522 // CiliumPolicyAction performs the specified action in Kubernetes for the policy 1523 // stored in path filepath and waits up until timeout seconds for the policy 1524 // to be applied in all Cilium endpoints. Returns an error if the policy is not 1525 // imported before the timeout is 1526 // exceeded. 1527 func (kub *Kubectl) CiliumPolicyAction(namespace, filepath string, action ResourceLifeCycleAction, timeout time.Duration) (string, error) { 1528 numNodes := kub.GetNumNodes() 1529 1530 // Test filter: https://jqplay.org/s/EgNzc06Cgn 1531 jqFilter := fmt.Sprintf( 1532 `[.items[]|{name:.metadata.name, enforcing: (.status|if has("nodes") then .nodes |to_entries|map_values(.value.enforcing) + [(.|length >= %d)]|all else true end)|tostring, status: has("status")|tostring}]`, 1533 numNodes) 1534 npFilter := fmt.Sprintf( 1535 `{range .items[*]}{"%s="}{.metadata.name}{" %s="}{.metadata.namespace}{"\n"}{end}`, 1536 KubectlPolicyNameLabel, KubectlPolicyNameSpaceLabel) 1537 kub.logger.Infof("Performing %s action on resource '%s'", action, filepath) 1538 1539 if status := kub.Action(action, filepath, namespace); !status.WasSuccessful() { 1540 return "", status.GetErr(fmt.Sprintf("Cannot perform '%s' on resorce '%s'", action, filepath)) 1541 } 1542 1543 if action == KubectlDelete { 1544 // Due policy is uninstalled, there is no need to validate that the policy is enforce. 1545 return "", nil 1546 } 1547 1548 body := func() bool { 1549 var data []map[string]string 1550 cmd := fmt.Sprintf("%s get cnp --all-namespaces -o json | jq '%s'", 1551 KubectlCmd, jqFilter) 1552 1553 res := kub.ExecShort(cmd) 1554 if !res.WasSuccessful() { 1555 kub.logger.WithError(res.GetErr("")).Error("cannot get cnp status") 1556 return false 1557 1558 } 1559 1560 err := res.Unmarshal(&data) 1561 if err != nil { 1562 kub.logger.WithError(err).Error("Cannot unmarshal json") 1563 return false 1564 } 1565 1566 for _, item := range data { 1567 if item["enforcing"] != "true" || item["status"] != "true" { 1568 kub.logger.Errorf("Policy '%s' is not enforcing yet", item["name"]) 1569 return false 1570 } 1571 } 1572 return true 1573 } 1574 1575 err := WithTimeout( 1576 body, 1577 "cannot change state of resource correctly; command timed out", 1578 &TimeoutConfig{Timeout: timeout}) 1579 1580 if err != nil { 1581 return "", err 1582 } 1583 1584 knpBody := func() bool { 1585 knp := kub.ExecShort(fmt.Sprintf("%s get --all-namespaces netpol -o jsonpath='%s'", 1586 KubectlCmd, npFilter)) 1587 result := knp.ByLines() 1588 if len(result) == 0 { 1589 return true 1590 } 1591 1592 pods, err := kub.GetCiliumPods(KubeSystemNamespace) 1593 if err != nil { 1594 kub.logger.WithError(err).Error("cannot retrieve cilium pods") 1595 return false 1596 } 1597 for _, item := range result { 1598 for _, ciliumPod := range pods { 1599 if !kub.CiliumIsPolicyLoaded(ciliumPod, item) { 1600 kub.logger.Infof("Policy '%s' is not ready on Cilium pod '%s'", item, ciliumPod) 1601 return false 1602 } 1603 } 1604 } 1605 return true 1606 } 1607 1608 err = WithTimeout( 1609 knpBody, 1610 "cannot change state of Kubernetes network policies correctly; command timed out", 1611 &TimeoutConfig{Timeout: timeout}) 1612 return "", err 1613 } 1614 1615 // CiliumReport report the cilium pod to the log and appends the logs for the 1616 // given commands. 1617 func (kub *Kubectl) CiliumReport(namespace string, commands ...string) { 1618 if config.CiliumTestConfig.SkipLogGathering { 1619 ginkgoext.GinkgoPrint("Skipped gathering logs (-cilium.skipLogs=true)\n") 1620 return 1621 } 1622 1623 // Log gathering for Cilium should take at most 5 minutes. This ensures that 1624 // the CiliumReport stage doesn't cause the entire CI to hang. 1625 1626 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) 1627 defer cancel() 1628 1629 var wg sync.WaitGroup 1630 wg.Add(1) 1631 1632 go func() { 1633 defer wg.Done() 1634 kub.DumpCiliumCommandOutput(ctx, namespace) 1635 kub.GatherLogs(ctx) 1636 }() 1637 1638 kub.CiliumCheckReport(ctx) 1639 1640 pods, err := kub.GetCiliumPodsContext(ctx, namespace) 1641 if err != nil { 1642 kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump") 1643 } 1644 res := kub.ExecContextShort(ctx, fmt.Sprintf("%s get pods -o wide --all-namespaces", KubectlCmd)) 1645 ginkgoext.GinkgoPrint(res.GetDebugMessage()) 1646 1647 results := make([]*CmdRes, 0, len(pods)*len(commands)) 1648 ginkgoext.GinkgoPrint("Fetching command output from pods %s", pods) 1649 for _, pod := range pods { 1650 for _, cmd := range commands { 1651 res = kub.ExecPodCmdBackground(ctx, namespace, pod, cmd, ExecOptions{SkipLog: true}) 1652 results = append(results, res) 1653 } 1654 } 1655 1656 wg.Wait() 1657 1658 for _, res := range results { 1659 res.WaitUntilFinish() 1660 ginkgoext.GinkgoPrint(res.GetDebugMessage()) 1661 } 1662 } 1663 1664 // EtcdOperatorReport dump etcd pods data into the report directory to be able 1665 // to debug etcd operator status in case of fail test. 1666 func (kub *Kubectl) EtcdOperatorReport(ctx context.Context, reportCmds map[string]string) { 1667 if reportCmds == nil { 1668 reportCmds = make(map[string]string) 1669 } 1670 1671 pods, err := kub.GetPodNamesContext(ctx, KubeSystemNamespace, "etcd_cluster=cilium-etcd") 1672 if err != nil { 1673 kub.logger.WithError(err).Error("No etcd pods") 1674 return 1675 } 1676 1677 etcdctl := "etcdctl --endpoints=https://%s.cilium-etcd.kube-system.svc:2379 " + 1678 "--cert-file /etc/etcdtls/member/peer-tls/peer.crt " + 1679 "--key-file /etc/etcdtls/member/peer-tls/peer.key " + 1680 "--ca-file /etc/etcdtls/member/peer-tls/peer-ca.crt " + 1681 " %s" 1682 1683 etcdDumpCommands := map[string]string{ 1684 "member list": "etcd_%s_member_list", 1685 "cluster-health": "etcd_%s_cluster_health", 1686 } 1687 1688 for _, pod := range pods { 1689 for cmd, reportFile := range etcdDumpCommands { 1690 etcdCmd := fmt.Sprintf(etcdctl, pod, cmd) 1691 command := fmt.Sprintf("%s -n %s exec -ti %s -- %s", 1692 KubectlCmd, KubeSystemNamespace, pod, etcdCmd) 1693 reportCmds[command] = fmt.Sprintf(reportFile, pod) 1694 } 1695 } 1696 } 1697 1698 // CiliumCheckReport prints a few checks on the Junit output to provide more 1699 // context to users. The list of checks that prints are the following: 1700 // - Number of Kubernetes and Cilium policies installed. 1701 // - Policy enforcement status by endpoint. 1702 // - Controller, health, kvstore status. 1703 func (kub *Kubectl) CiliumCheckReport(ctx context.Context) { 1704 pods, _ := kub.GetCiliumPods(KubeSystemNamespace) 1705 fmt.Fprintf(CheckLogs, "Cilium pods: %v\n", pods) 1706 1707 var policiesFilter = `{range .items[*]}{.metadata.namespace}{"::"}{.metadata.name}{" "}{end}` 1708 netpols := kub.ExecContextShort(ctx, fmt.Sprintf( 1709 "%s get netpol -o jsonpath='%s' --all-namespaces", 1710 KubectlCmd, policiesFilter)) 1711 fmt.Fprintf(CheckLogs, "Netpols loaded: %v\n", netpols.Output()) 1712 1713 cnp := kub.ExecContextShort(ctx, fmt.Sprintf( 1714 "%s get cnp -o jsonpath='%s' --all-namespaces", 1715 KubectlCmd, policiesFilter)) 1716 fmt.Fprintf(CheckLogs, "CiliumNetworkPolicies loaded: %v\n", cnp.Output()) 1717 1718 cepFilter := `{range .items[*]}{.metadata.name}{"="}{.status.policy.ingress.enforcing}{":"}{.status.policy.egress.enforcing}{"\n"}{end}` 1719 cepStatus := kub.ExecContextShort(ctx, fmt.Sprintf( 1720 "%s get cep -o jsonpath='%s' --all-namespaces", 1721 KubectlCmd, cepFilter)) 1722 1723 fmt.Fprintf(CheckLogs, "Endpoint Policy Enforcement:\n") 1724 1725 table := tabwriter.NewWriter(CheckLogs, 5, 0, 3, ' ', 0) 1726 fmt.Fprintf(table, "Pod\tIngress\tEgress\n") 1727 for pod, policy := range cepStatus.KVOutput() { 1728 data := strings.SplitN(policy, ":", 2) 1729 if len(data) != 2 { 1730 data[0] = "invalid value" 1731 data[1] = "invalid value" 1732 } 1733 fmt.Fprintf(table, "%s\t%s\t%s\n", pod, data[0], data[1]) 1734 } 1735 table.Flush() 1736 1737 var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}::{.status.last-failure-msg}{"\n"}{end}` 1738 var failedControllers string 1739 for _, pod := range pods { 1740 var prefix = "" 1741 status := kub.CiliumExecContext(ctx, pod, "cilium status --all-controllers -o json") 1742 result, err := status.Filter(controllersFilter) 1743 if err != nil { 1744 kub.logger.WithError(err).Error("Cannot filter controller status output") 1745 continue 1746 } 1747 var total = 0 1748 var failed = 0 1749 for name, data := range result.KVOutput() { 1750 total++ 1751 status := strings.SplitN(data, "::", 2) 1752 if len(status) != 2 { 1753 // Just make sure that the the len of the output is 2 to not 1754 // fail on index error in the following lines. 1755 continue 1756 } 1757 if status[0] != "" { 1758 failed++ 1759 prefix = "⚠️ " 1760 failedControllers += fmt.Sprintf("controller %s failure '%s'\n", name, status[1]) 1761 } 1762 } 1763 statusFilter := `Status: {.cilium.state} Health: {.cluster.ciliumHealth.state}` + 1764 ` Nodes "{.cluster.nodes[*].name}" ContinerRuntime: {.container-runtime.state}` + 1765 ` Kubernetes: {.kubernetes.state} KVstore: {.kvstore.state}` 1766 data, _ := status.Filter(statusFilter) 1767 fmt.Fprintf(CheckLogs, "%sCilium agent '%s': %s Controllers: Total %d Failed %d\n", 1768 prefix, pod, data, total, failed) 1769 if failedControllers != "" { 1770 fmt.Fprintf(CheckLogs, "Failed controllers:\n %s", failedControllers) 1771 } 1772 } 1773 } 1774 1775 // ValidateNoErrorsInLogs checks in cilium logs since the given duration (By 1776 // default `CurrentGinkgoTestDescription().Duration`) do not contain `panic`, 1777 // `deadlocks` or `segmentation faults` messages. In case of any of these 1778 // messages, it'll mark the test as failed. 1779 func (kub *Kubectl) ValidateNoErrorsInLogs(duration time.Duration) { 1780 1781 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) 1782 defer cancel() 1783 1784 var logs string 1785 cmd := fmt.Sprintf("%s -n %s logs --timestamps=true -l k8s-app=cilium --since=%vs", 1786 KubectlCmd, KubeSystemNamespace, duration.Seconds()) 1787 res := kub.ExecContext(ctx, fmt.Sprintf("%s --previous", cmd), ExecOptions{SkipLog: true}) 1788 if res.WasSuccessful() { 1789 logs += res.Output().String() 1790 } 1791 res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true}) 1792 if res.WasSuccessful() { 1793 logs += res.Output().String() 1794 } 1795 defer func() { 1796 // Keep the cilium logs for the given test in a separate file. 1797 testPath, err := CreateReportDirectory() 1798 if err != nil { 1799 kub.logger.WithError(err).Error("Cannot create report directory") 1800 return 1801 } 1802 err = ioutil.WriteFile( 1803 fmt.Sprintf("%s/%s", testPath, CiliumTestLog), 1804 []byte(logs), LogPerm) 1805 1806 if err != nil { 1807 kub.logger.WithError(err).Errorf("Cannot create %s", CiliumTestLog) 1808 } 1809 }() 1810 1811 failIfContainsBadLogMsg(logs) 1812 1813 fmt.Fprintf(CheckLogs, logutils.LogErrorsSummary(logs)) 1814 } 1815 1816 // GatherCiliumCoreDumps copies core dumps if are present in the /tmp folder 1817 // into the test report folder for further analysis. 1818 func (kub *Kubectl) GatherCiliumCoreDumps(ctx context.Context, ciliumPod string) { 1819 log := kub.logger.WithField("pod", ciliumPod) 1820 1821 cores := kub.CiliumExecContext(ctx, ciliumPod, "ls /tmp/ | grep core") 1822 if !cores.WasSuccessful() { 1823 log.Debug("There is no core dumps in the pod") 1824 return 1825 } 1826 1827 testPath, err := CreateReportDirectory() 1828 if err != nil { 1829 log.WithError(err).Errorf("cannot create test result path '%s'", testPath) 1830 return 1831 } 1832 resultPath := filepath.Join(BasePath, testPath) 1833 1834 for _, core := range cores.ByLines() { 1835 dst := filepath.Join(resultPath, core) 1836 src := filepath.Join("/tmp/", core) 1837 cmd := fmt.Sprintf("%s -n %s cp %s:%s %s", 1838 KubectlCmd, KubeSystemNamespace, 1839 ciliumPod, src, dst) 1840 res := kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true}) 1841 if !res.WasSuccessful() { 1842 log.WithField("output", res.CombineOutput()).Error("Cannot get core from pod") 1843 } 1844 } 1845 } 1846 1847 // GetCiliumHostIPv4 retrieves cilium_host IPv4 addr of the given node. 1848 func (kub *Kubectl) GetCiliumHostIPv4(ctx context.Context, node string) (string, error) { 1849 pod, err := kub.GetCiliumPodOnNode(KubeSystemNamespace, node) 1850 if err != nil { 1851 return "", fmt.Errorf("unable to retrieve cilium pod: %s", err) 1852 } 1853 1854 cmd := "ip -4 -o a show dev cilium_host | grep -o -e 'inet [0-9.]*' | cut -d' ' -f2" 1855 res := kub.ExecPodCmd(KubeSystemNamespace, pod, cmd) 1856 if !res.WasSuccessful() { 1857 return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr: %s", res.GetError()) 1858 } 1859 addr := res.SingleOut() 1860 if addr == "" { 1861 return "", fmt.Errorf("unable to retrieve cilium_host ipv4 addr") 1862 } 1863 1864 return addr, nil 1865 } 1866 1867 // DumpCiliumCommandOutput runs a variety of commands (CiliumKubCLICommands) and writes the results to 1868 // TestResultsPath 1869 func (kub *Kubectl) DumpCiliumCommandOutput(ctx context.Context, namespace string) { 1870 ReportOnPod := func(pod string) { 1871 logger := kub.logger.WithField("CiliumPod", pod) 1872 1873 testPath, err := CreateReportDirectory() 1874 if err != nil { 1875 logger.WithError(err).Errorf("cannot create test result path '%s'", testPath) 1876 return 1877 } 1878 1879 genReportCmds := func(cliCmds map[string]string) map[string]string { 1880 reportCmds := map[string]string{} 1881 for cmd, logfile := range cliCmds { 1882 command := fmt.Sprintf("%s exec -n %s %s -- %s", KubectlCmd, namespace, pod, cmd) 1883 reportCmds[command] = fmt.Sprintf("%s_%s", pod, logfile) 1884 } 1885 return reportCmds 1886 } 1887 1888 reportCmds := genReportCmds(ciliumKubCLICommands) 1889 reportMapContext(ctx, testPath, reportCmds, kub.SSHMeta) 1890 1891 logsPath := filepath.Join(BasePath, testPath) 1892 1893 // Get bugtool output. Since bugtool output is dumped in the pod's filesystem, 1894 // copy it over with `kubectl cp`. 1895 bugtoolCmd := fmt.Sprintf("%s exec -n %s %s -- %s", 1896 KubectlCmd, namespace, pod, CiliumBugtool) 1897 res := kub.ExecContext(ctx, bugtoolCmd, ExecOptions{SkipLog: true}) 1898 if !res.WasSuccessful() { 1899 logger.Errorf("%s failed: %s", bugtoolCmd, res.CombineOutput().String()) 1900 return 1901 } 1902 // Default output directory is /tmp for bugtool. 1903 res = kub.ExecContext(ctx, fmt.Sprintf("%s exec -n %s %s -- ls /tmp/", KubectlCmd, namespace, pod)) 1904 tmpList := res.ByLines() 1905 for _, line := range tmpList { 1906 // Only copy over bugtool output to directory. 1907 if !strings.Contains(line, CiliumBugtool) { 1908 continue 1909 } 1910 1911 res = kub.ExecContext(ctx, fmt.Sprintf("%[1]s cp %[2]s/%[3]s:/tmp/%[4]s /tmp/%[4]s", 1912 KubectlCmd, namespace, pod, line), 1913 ExecOptions{SkipLog: true}) 1914 if !res.WasSuccessful() { 1915 logger.Errorf("'%s' failed: %s", res.GetCmd(), res.CombineOutput()) 1916 continue 1917 } 1918 1919 archiveName := filepath.Join(logsPath, fmt.Sprintf("bugtool-%s", pod)) 1920 res = kub.ExecContext(ctx, fmt.Sprintf("mkdir -p %s", archiveName)) 1921 if !res.WasSuccessful() { 1922 logger.WithField("cmd", res.GetCmd()).Errorf( 1923 "cannot create bugtool archive folder: %s", res.CombineOutput()) 1924 continue 1925 } 1926 1927 cmd := fmt.Sprintf("tar -xf /tmp/%s -C %s --strip-components=1", line, archiveName) 1928 res = kub.ExecContext(ctx, cmd, ExecOptions{SkipLog: true}) 1929 if !res.WasSuccessful() { 1930 logger.WithField("cmd", cmd).Errorf( 1931 "Cannot untar bugtool output: %s", res.CombineOutput()) 1932 continue 1933 } 1934 //Remove bugtool artifact, so it'll be not used if any other fail test 1935 _ = kub.ExecPodCmdBackground(ctx, KubeSystemNamespace, pod, fmt.Sprintf("rm /tmp/%s", line)) 1936 } 1937 1938 // Finally, get kvstore output - this is best effort; we do this last 1939 // because if connectivity to the kvstore is broken from a cilium pod, 1940 // we don't want the context above to timeout and as a result, get none 1941 // of the other logs from the tests. 1942 1943 // Use a shorter context for kvstore-related commands to avoid having 1944 // further log-gathering fail as well if the first Cilium pod fails to 1945 // gather kvstore logs. 1946 kvstoreCmdCtx, cancel := context.WithTimeout(ctx, MidCommandTimeout) 1947 defer cancel() 1948 reportCmds = genReportCmds(ciliumKubCLICommandsKVStore) 1949 reportMapContext(kvstoreCmdCtx, testPath, reportCmds, kub.SSHMeta) 1950 } 1951 1952 pods, err := kub.GetCiliumPodsContext(ctx, namespace) 1953 if err != nil { 1954 kub.logger.WithError(err).Error("cannot retrieve cilium pods on ReportDump") 1955 return 1956 } 1957 for _, pod := range pods { 1958 ReportOnPod(pod) 1959 kub.GatherCiliumCoreDumps(ctx, pod) 1960 } 1961 } 1962 1963 // GatherLogs dumps kubernetes pods, services, DaemonSet to the testResultsPath 1964 // directory 1965 func (kub *Kubectl) GatherLogs(ctx context.Context) { 1966 reportCmds := map[string]string{ 1967 "kubectl get pods --all-namespaces -o json": "pods.txt", 1968 "kubectl get services --all-namespaces -o json": "svc.txt", 1969 "kubectl get nodes -o json": "nodes.txt", 1970 "kubectl get ds --all-namespaces -o json": "ds.txt", 1971 "kubectl get cnp --all-namespaces -o json": "cnp.txt", 1972 "kubectl get cep --all-namespaces -o json": "cep.txt", 1973 "kubectl get netpol --all-namespaces -o json": "netpol.txt", 1974 "kubectl describe pods --all-namespaces": "pods_status.txt", 1975 "kubectl get replicationcontroller --all-namespaces -o json": "replicationcontroller.txt", 1976 "kubectl get deployment --all-namespaces -o json": "deployment.txt", 1977 } 1978 1979 kub.GeneratePodLogGatheringCommands(ctx, reportCmds) 1980 kub.EtcdOperatorReport(ctx, reportCmds) 1981 1982 res := kub.ExecContext(ctx, fmt.Sprintf(`%s api-resources | grep -v "^NAME" | awk '{print $1}'`, KubectlCmd)) 1983 if res.WasSuccessful() { 1984 for _, line := range res.ByLines() { 1985 key := fmt.Sprintf("%s get %s --all-namespaces -o wide", KubectlCmd, line) 1986 reportCmds[key] = fmt.Sprintf("api-resource-%s.txt", line) 1987 } 1988 } else { 1989 kub.logger.Errorf("Cannot get api-resoureces: %s", res.GetDebugMessage()) 1990 } 1991 1992 testPath, err := CreateReportDirectory() 1993 if err != nil { 1994 kub.logger.WithError(err).Errorf( 1995 "cannot create test results path '%s'", testPath) 1996 return 1997 } 1998 reportMap(testPath, reportCmds, kub.SSHMeta) 1999 2000 for _, node := range []string{K8s1VMName(), K8s2VMName()} { 2001 vm := GetVagrantSSHMeta(node) 2002 reportCmds := map[string]string{ 2003 "journalctl --no-pager -au kubelet": fmt.Sprintf("kubelet-%s.log", node), 2004 "sudo top -n 1 -b": fmt.Sprintf("top-%s.log", node), 2005 "sudo ps aux": fmt.Sprintf("ps-%s.log", node), 2006 } 2007 reportMapContext(ctx, testPath, reportCmds, vm) 2008 } 2009 } 2010 2011 // GeneratePodLogGatheringCommands generates the commands to gather logs for 2012 // all pods in the Kubernetes cluster, and maps the commands to the filename 2013 // in which they will be stored in reportCmds. 2014 func (kub *Kubectl) GeneratePodLogGatheringCommands(ctx context.Context, reportCmds map[string]string) { 2015 if reportCmds == nil { 2016 reportCmds = make(map[string]string) 2017 } 2018 pods, err := kub.GetAllPods(ctx, ExecOptions{SkipLog: true}) 2019 if err != nil { 2020 kub.logger.WithError(err).Error("Unable to get pods from Kubernetes via kubectl") 2021 } 2022 2023 for _, pod := range pods { 2024 for _, containerStatus := range pod.Status.ContainerStatuses { 2025 logCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name) 2026 logfileName := fmt.Sprintf("pod-%s-%s-%s.log", pod.Namespace, pod.Name, containerStatus.Name) 2027 reportCmds[logCmd] = logfileName 2028 2029 if containerStatus.RestartCount > 0 { 2030 previousLogCmd := fmt.Sprintf("%s -n %s logs --timestamps %s -c %s --previous", KubectlCmd, pod.Namespace, pod.Name, containerStatus.Name) 2031 previousLogfileName := fmt.Sprintf("pod-%s-%s-%s-previous.log", pod.Namespace, pod.Name, containerStatus.Name) 2032 reportCmds[previousLogCmd] = previousLogfileName 2033 } 2034 } 2035 } 2036 } 2037 2038 // GetCiliumPodOnNode returns the name of the Cilium pod that is running on / in 2039 //the specified node / namespace. 2040 func (kub *Kubectl) GetCiliumPodOnNode(namespace string, node string) (string, error) { 2041 filter := fmt.Sprintf( 2042 "-o jsonpath='{.items[?(@.spec.nodeName == \"%s\")].metadata.name}'", node) 2043 2044 res := kub.ExecShort(fmt.Sprintf( 2045 "%s -n %s get pods -l k8s-app=cilium %s", KubectlCmd, namespace, filter)) 2046 if !res.WasSuccessful() { 2047 return "", fmt.Errorf("Cilium pod not found on node '%s'", node) 2048 } 2049 2050 return res.Output().String(), nil 2051 } 2052 2053 func (kub *Kubectl) ciliumPreFlightCheck() error { 2054 err := kub.ciliumStatusPreFlightCheck() 2055 if err != nil { 2056 return fmt.Errorf("status is unhealthy: %s", err) 2057 } 2058 2059 err = kub.ciliumControllersPreFlightCheck() 2060 if err != nil { 2061 return fmt.Errorf("controllers are failing: %s", err) 2062 } 2063 2064 switch GetCurrentIntegration() { 2065 case CIIntegrationFlannel: 2066 default: 2067 err = kub.ciliumHealthPreFlightCheck() 2068 if err != nil { 2069 return fmt.Errorf("connectivity health is failing: %s", err) 2070 } 2071 } 2072 err = kub.fillServiceCache() 2073 if err != nil { 2074 return fmt.Errorf("unable to fill service cache: %s", err) 2075 } 2076 err = kub.ciliumServicePreFlightCheck() 2077 if err != nil { 2078 return fmt.Errorf("cilium services are not set up correctly: %s", err) 2079 } 2080 err = kub.servicePreFlightCheck("kubernetes", "default") 2081 if err != nil { 2082 return fmt.Errorf("kubernetes service is not ready: %s", err) 2083 } 2084 2085 return nil 2086 } 2087 2088 // CiliumPreFlightCheck specify that it checks that various subsystems within 2089 // Cilium are in a good state. If one of the multiple preflight fails it'll 2090 // return an error. 2091 func (kub *Kubectl) CiliumPreFlightCheck() error { 2092 ginkgoext.By("Performing Cilium preflight check") 2093 // Doing this withTimeout because the Status can be ready, but the other 2094 // nodes cannot be show up yet, and the cilium-health can fail as a false positive. 2095 var ( 2096 lastError string 2097 consecutiveFailures int 2098 ) 2099 2100 body := func() bool { 2101 if err := kub.ciliumPreFlightCheck(); err != nil { 2102 newError := err.Error() 2103 if lastError != newError || consecutiveFailures >= 5 { 2104 ginkgoext.GinkgoPrint("Cilium is not ready yet: %s", newError) 2105 lastError = newError 2106 consecutiveFailures = 0 2107 } else { 2108 consecutiveFailures++ 2109 } 2110 return false 2111 } 2112 return true 2113 2114 } 2115 timeoutErr := WithTimeout(body, "PreflightCheck failed", &TimeoutConfig{Timeout: HelperTimeout}) 2116 if timeoutErr != nil { 2117 return fmt.Errorf("CiliumPreFlightCheck error: %s: Last polled error: %s", timeoutErr, lastError) 2118 } 2119 return nil 2120 } 2121 2122 func (kub *Kubectl) ciliumStatusPreFlightCheck() error { 2123 ginkgoext.By("Performing Cilium status preflight check") 2124 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2125 if err != nil { 2126 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2127 } 2128 for _, pod := range ciliumPods { 2129 status := kub.CiliumExec(pod, "cilium status --all-health --all-nodes") 2130 if !status.WasSuccessful() { 2131 return fmt.Errorf("cilium-agent '%s' is unhealthy: %s", pod, status.OutputPrettyPrint()) 2132 } 2133 noQuorum, err := regexp.Match(`^.*KVStore:.*has-quorum=false.*$`, status.Output().Bytes()) 2134 if err != nil { 2135 return fmt.Errorf("Failed to check for kvstore quorum: %s", err.Error()) 2136 } 2137 if noQuorum { 2138 return fmt.Errorf("KVStore doesn't have quorum: %s", status.OutputPrettyPrint()) 2139 } 2140 } 2141 2142 return nil 2143 } 2144 2145 func (kub *Kubectl) ciliumControllersPreFlightCheck() error { 2146 ginkgoext.By("Performing Cilium controllers preflight check") 2147 var controllersFilter = `{range .controllers[*]}{.name}{"="}{.status.consecutive-failure-count}{"\n"}{end}` 2148 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2149 if err != nil { 2150 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2151 } 2152 for _, pod := range ciliumPods { 2153 status := kub.CiliumExec(pod, fmt.Sprintf( 2154 "cilium status --all-controllers -o jsonpath='%s'", controllersFilter)) 2155 if !status.WasSuccessful() { 2156 return fmt.Errorf("cilium-agent '%s': Cannot run cilium status: %s", 2157 pod, status.OutputPrettyPrint()) 2158 } 2159 for controller, status := range status.KVOutput() { 2160 if status != "0" { 2161 failmsg := kub.CiliumExec(pod, "cilium status --all-controllers") 2162 return fmt.Errorf("cilium-agent '%s': controller %s is failing: %s", 2163 pod, controller, failmsg.OutputPrettyPrint()) 2164 } 2165 } 2166 } 2167 2168 return nil 2169 } 2170 2171 func (kub *Kubectl) ciliumHealthPreFlightCheck() error { 2172 ginkgoext.By("Performing Cilium health check") 2173 var nodesFilter = `{.nodes[*].name}` 2174 var statusFilter = `{range .nodes[*]}{.name}{"="}{.host.primary-address.http.status}{"\n"}{end}` 2175 2176 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2177 if err != nil { 2178 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2179 } 2180 for _, pod := range ciliumPods { 2181 status := kub.CiliumExec(pod, "cilium-health status -o json --probe") 2182 if !status.WasSuccessful() { 2183 return fmt.Errorf( 2184 "Cluster connectivity is unhealthy on '%s': %s", 2185 pod, status.OutputPrettyPrint()) 2186 } 2187 2188 // By Checking that the node list is the same 2189 nodes, err := status.Filter(nodesFilter) 2190 if err != nil { 2191 return fmt.Errorf("Cannot unmarshal health status: %s", err) 2192 } 2193 2194 nodeCount := strings.Split(nodes.String(), " ") 2195 if len(ciliumPods) != len(nodeCount) { 2196 return fmt.Errorf( 2197 "cilium-agent '%s': Only %d/%d nodes appeared in cilium-health status. nodes = '%+v'", 2198 pod, len(nodeCount), len(ciliumPods), nodeCount) 2199 } 2200 2201 healthStatus, err := status.Filter(statusFilter) 2202 if err != nil { 2203 return fmt.Errorf("Cannot unmarshal health status: %s", err) 2204 } 2205 2206 for node, status := range healthStatus.KVOutput() { 2207 if status != "" { 2208 return fmt.Errorf("cilium-agent '%s': connectivity to node '%s' is unhealthy: '%s'", 2209 pod, node, status) 2210 } 2211 } 2212 } 2213 return nil 2214 } 2215 2216 // serviceCache keeps service information from 2217 // k8s, Cilium services and Cilium bpf load balancer map 2218 type serviceCache struct { 2219 services v1.ServiceList 2220 endpoints v1.EndpointsList 2221 pods []ciliumPodServiceCache 2222 } 2223 2224 // ciliumPodServiceCache 2225 type ciliumPodServiceCache struct { 2226 name string 2227 services []models.Service 2228 loadBalancers map[string][]string 2229 } 2230 2231 func (kub *Kubectl) fillServiceCache() error { 2232 cache := serviceCache{} 2233 2234 svcRes := kub.GetFromAllNS("service") 2235 err := svcRes.GetErr("Unable to get k8s services") 2236 if err != nil { 2237 return err 2238 } 2239 err = svcRes.Unmarshal(&cache.services) 2240 2241 if err != nil { 2242 return fmt.Errorf("Unable to unmarshal K8s services: %s", err.Error()) 2243 } 2244 2245 epRes := kub.GetFromAllNS("endpoints") 2246 err = epRes.GetErr("Unable to get k8s endpoints") 2247 if err != nil { 2248 return err 2249 } 2250 err = epRes.Unmarshal(&cache.endpoints) 2251 if err != nil { 2252 return fmt.Errorf("Unable to unmarshal K8s endpoints: %s", err.Error()) 2253 } 2254 2255 ciliumPods, err := kub.GetCiliumPods(KubeSystemNamespace) 2256 if err != nil { 2257 return fmt.Errorf("cannot retrieve cilium pods: %s", err) 2258 } 2259 ciliumSvcCmd := "cilium service list -o json" 2260 ciliumBpfLbCmd := "cilium bpf lb list -o json" 2261 2262 cache.pods = make([]ciliumPodServiceCache, 0, len(ciliumPods)) 2263 for _, pod := range ciliumPods { 2264 podCache := ciliumPodServiceCache{name: pod} 2265 2266 ciliumServicesRes := kub.CiliumExec(pod, ciliumSvcCmd) 2267 err := ciliumServicesRes.GetErr( 2268 fmt.Sprintf("Unable to retrieve Cilium services on %s", pod)) 2269 if err != nil { 2270 return err 2271 } 2272 2273 err = ciliumServicesRes.Unmarshal(&podCache.services) 2274 if err != nil { 2275 return fmt.Errorf("Unable to unmarshal Cilium services: %s", err.Error()) 2276 } 2277 2278 ciliumLbRes := kub.CiliumExec(pod, ciliumBpfLbCmd) 2279 err = ciliumLbRes.GetErr( 2280 fmt.Sprintf("Unable to retrieve Cilium bpf lb list on %s", pod)) 2281 if err != nil { 2282 return err 2283 } 2284 2285 err = ciliumLbRes.Unmarshal(&podCache.loadBalancers) 2286 if err != nil { 2287 return fmt.Errorf("Unable to unmarshal Cilium bpf lb list: %s", err.Error()) 2288 } 2289 cache.pods = append(cache.pods, podCache) 2290 } 2291 kub.serviceCache = &cache 2292 return nil 2293 } 2294 2295 // KubeDNSPreFlightCheck makes sure that kube-dns is plumbed into Cilium. 2296 func (kub *Kubectl) KubeDNSPreFlightCheck() error { 2297 err := kub.fillServiceCache() 2298 if err != nil { 2299 return err 2300 } 2301 return kub.servicePreFlightCheck("kube-dns", "kube-system") 2302 } 2303 2304 // servicePreFlightCheck makes sure that k8s service with given name and 2305 // namespace is properly plumbed in Cilium 2306 func (kub *Kubectl) servicePreFlightCheck(serviceName, serviceNamespace string) error { 2307 ginkgoext.By("Performing K8s service preflight check") 2308 var service *v1.Service 2309 for _, s := range kub.serviceCache.services.Items { 2310 if s.Name == serviceName && s.Namespace == serviceNamespace { 2311 service = &s 2312 break 2313 } 2314 } 2315 2316 if service == nil { 2317 return fmt.Errorf("%s/%s service not found in service cache", serviceName, serviceNamespace) 2318 } 2319 2320 for _, pod := range kub.serviceCache.pods { 2321 2322 err := validateK8sService(*service, kub.serviceCache.endpoints.Items, pod.services, pod.loadBalancers) 2323 if err != nil { 2324 return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error()) 2325 } 2326 } 2327 return nil 2328 } 2329 2330 func validateK8sService(k8sService v1.Service, k8sEndpoints []v1.Endpoints, ciliumSvcs []models.Service, ciliumLB map[string][]string) error { 2331 var ciliumService *models.Service 2332 CILIUM_SERVICES: 2333 for _, cSvc := range ciliumSvcs { 2334 if cSvc.Status.Realized.FrontendAddress.IP == k8sService.Spec.ClusterIP { 2335 for _, port := range k8sService.Spec.Ports { 2336 if int32(cSvc.Status.Realized.FrontendAddress.Port) == port.Port { 2337 ciliumService = &cSvc 2338 break CILIUM_SERVICES 2339 } 2340 } 2341 } 2342 } 2343 2344 if ciliumService == nil { 2345 return fmt.Errorf("Failed to find Cilium service corresponding to %s/%s k8s service", k8sService.Namespace, k8sService.Name) 2346 } 2347 2348 temp := map[string]bool{} 2349 err := validateCiliumSvc(*ciliumService, []v1.Service{k8sService}, k8sEndpoints, temp) 2350 if err != nil { 2351 return err 2352 } 2353 return validateCiliumSvcLB(*ciliumService, ciliumLB) 2354 } 2355 2356 // ciliumServicePreFlightCheck checks that k8s service is plumbed correctly 2357 func (kub *Kubectl) ciliumServicePreFlightCheck() error { 2358 ginkgoext.By("Performing Cilium service preflight check") 2359 for _, pod := range kub.serviceCache.pods { 2360 k8sServicesFound := map[string]bool{} 2361 2362 for _, cSvc := range pod.services { 2363 err := validateCiliumSvc(cSvc, kub.serviceCache.services.Items, kub.serviceCache.endpoints.Items, k8sServicesFound) 2364 if err != nil { 2365 return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error()) 2366 } 2367 } 2368 2369 notFoundServices := make([]string, 0, len(kub.serviceCache.services.Items)) 2370 for _, k8sSvc := range kub.serviceCache.services.Items { 2371 key := serviceKey(k8sSvc) 2372 // ignore headless services 2373 if k8sSvc.Spec.Type == v1.ServiceTypeClusterIP && 2374 k8sSvc.Spec.ClusterIP == v1.ClusterIPNone { 2375 continue 2376 } 2377 // TODO(brb) check NodePort services 2378 if k8sSvc.Spec.Type == v1.ServiceTypeNodePort { 2379 continue 2380 } 2381 if _, ok := k8sServicesFound[key]; !ok { 2382 notFoundServices = append(notFoundServices, key) 2383 } 2384 } 2385 2386 if len(notFoundServices) > 0 { 2387 return fmt.Errorf("Failed to find Cilium service corresponding to k8s services %s on pod %v", 2388 strings.Join(notFoundServices, ", "), pod) 2389 } 2390 2391 for _, cSvc := range pod.services { 2392 err := validateCiliumSvcLB(cSvc, pod.loadBalancers) 2393 if err != nil { 2394 return fmt.Errorf("Error validating Cilium service on pod %v: %s", pod, err.Error()) 2395 } 2396 } 2397 if len(pod.services) != len(pod.loadBalancers) { 2398 return fmt.Errorf("Length of Cilium services doesn't match length of bpf LB map on pod %v", pod) 2399 } 2400 } 2401 return nil 2402 } 2403 2404 // DeleteETCDOperator delete the etcd-operator from the cluster pointed by kub. 2405 func (kub *Kubectl) DeleteETCDOperator() { 2406 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete crd etcdclusters.etcd.database.coreos.com", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2407 log.Warningf("Unable to delete etcdclusters.etcd.database.coreos.com CRD: %s", res.OutputPrettyPrint()) 2408 } 2409 2410 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete deployment cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2411 log.Warningf("Unable to delete cilium-etcd-operator Deployment: %s", res.OutputPrettyPrint()) 2412 } 2413 2414 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2415 log.Warningf("Unable to delete cilium-etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint()) 2416 } 2417 2418 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole cilium-etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2419 log.Warningf("Unable to delete cilium-etcd-operator ClusterRole: %s", res.OutputPrettyPrint()) 2420 } 2421 2422 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-operator", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2423 log.Warningf("Unable to delete cilium-etcd-operator ServiceAccount: %s", res.OutputPrettyPrint()) 2424 } 2425 2426 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrolebinding etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2427 log.Warningf("Unable to delete etcd-operator ClusterRoleBinding: %s", res.OutputPrettyPrint()) 2428 } 2429 2430 if res := kub.ExecShort(fmt.Sprintf("%s delete clusterrole etcd-operator", KubectlCmd)); !res.WasSuccessful() { 2431 log.Warningf("Unable to delete etcd-operator ClusterRole: %s", res.OutputPrettyPrint()) 2432 } 2433 2434 if res := kub.ExecShort(fmt.Sprintf("%s -n %s delete serviceaccount cilium-etcd-sa", KubectlCmd, KubeSystemNamespace)); !res.WasSuccessful() { 2435 log.Warningf("Unable to delete cilium-etcd-sa ServiceAccount: %s", res.OutputPrettyPrint()) 2436 } 2437 } 2438 2439 func serviceKey(s v1.Service) string { 2440 return s.Namespace + "/" + s.Name 2441 } 2442 2443 // validateCiliumSvc checks if given Cilium service has corresponding k8s services and endpoints in given slices 2444 func validateCiliumSvc(cSvc models.Service, k8sSvcs []v1.Service, k8sEps []v1.Endpoints, k8sServicesFound map[string]bool) error { 2445 var k8sService *v1.Service 2446 2447 // TODO(brb) validate NodePort services 2448 if cSvc.Status.Realized.Flags != nil && cSvc.Status.Realized.Flags.NodePort { 2449 return nil 2450 } 2451 2452 for _, k8sSvc := range k8sSvcs { 2453 if k8sSvc.Spec.ClusterIP == cSvc.Status.Realized.FrontendAddress.IP { 2454 k8sService = &k8sSvc 2455 break 2456 } 2457 } 2458 if k8sService == nil { 2459 return fmt.Errorf("Could not find Cilium service with ip %s in k8s", cSvc.Spec.FrontendAddress.IP) 2460 } 2461 2462 var k8sServicePort *v1.ServicePort 2463 for _, k8sPort := range k8sService.Spec.Ports { 2464 if k8sPort.Port == int32(cSvc.Status.Realized.FrontendAddress.Port) { 2465 k8sServicePort = &k8sPort 2466 k8sServicesFound[serviceKey(*k8sService)] = true 2467 break 2468 } 2469 } 2470 if k8sServicePort == nil { 2471 return fmt.Errorf("Could not find Cilium service with address %s:%d in k8s", cSvc.Spec.FrontendAddress.IP, cSvc.Spec.FrontendAddress.Port) 2472 } 2473 2474 for _, backAddr := range cSvc.Status.Realized.BackendAddresses { 2475 foundEp := false 2476 for _, k8sEp := range k8sEps { 2477 for _, epAddr := range getK8sEndpointAddresses(k8sEp) { 2478 if addrsEqual(backAddr, epAddr) { 2479 foundEp = true 2480 } 2481 } 2482 } 2483 if !foundEp { 2484 return fmt.Errorf( 2485 "Could not match cilium service backend address %s:%d with k8s endpoint", 2486 *backAddr.IP, backAddr.Port) 2487 } 2488 } 2489 return nil 2490 } 2491 2492 func validateCiliumSvcLB(cSvc models.Service, lbMap map[string][]string) error { 2493 frontendAddress := cSvc.Status.Realized.FrontendAddress.IP + ":" + strconv.Itoa(int(cSvc.Status.Realized.FrontendAddress.Port)) 2494 bpfBackends, ok := lbMap[frontendAddress] 2495 if !ok { 2496 return fmt.Errorf("%s bpf lb map entry not found", frontendAddress) 2497 } 2498 2499 BACKENDS: 2500 for _, addr := range cSvc.Status.Realized.BackendAddresses { 2501 backend := *addr.IP + ":" + strconv.Itoa(int(addr.Port)) 2502 for _, bpfAddr := range bpfBackends { 2503 if strings.Contains(bpfAddr, backend) { 2504 continue BACKENDS 2505 } 2506 } 2507 return fmt.Errorf("%s not found in bpf map", backend) 2508 } 2509 return nil 2510 } 2511 2512 func getK8sEndpointAddresses(ep v1.Endpoints) []*models.BackendAddress { 2513 result := []*models.BackendAddress{} 2514 for _, subset := range ep.Subsets { 2515 for _, addr := range subset.Addresses { 2516 ip := addr.IP 2517 for _, port := range subset.Ports { 2518 ba := &models.BackendAddress{ 2519 IP: &ip, 2520 Port: uint16(port.Port), 2521 } 2522 result = append(result, ba) 2523 } 2524 } 2525 } 2526 return result 2527 } 2528 2529 func addrsEqual(addr1, addr2 *models.BackendAddress) bool { 2530 return *addr1.IP == *addr2.IP && addr1.Port == addr2.Port 2531 } 2532 2533 // GenerateNamespaceForTest generates a namespace based off of the current test 2534 // which is running. 2535 func GenerateNamespaceForTest() string { 2536 lowered := strings.ToLower(ginkgoext.CurrentGinkgoTestDescription().FullTestText) 2537 // K8s namespaces cannot have spaces. 2538 replaced := strings.Replace(lowered, " ", "", -1) 2539 return replaced 2540 }