k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e_node/util.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "context" 21 "crypto/tls" 22 "encoding/json" 23 "flag" 24 "fmt" 25 "io" 26 "net" 27 "net/http" 28 "os" 29 "os/exec" 30 "regexp" 31 "strconv" 32 "strings" 33 "time" 34 35 "k8s.io/kubernetes/pkg/util/procfs" 36 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 37 38 "go.opentelemetry.io/otel/trace/noop" 39 40 v1 "k8s.io/api/core/v1" 41 apiequality "k8s.io/apimachinery/pkg/api/equality" 42 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 43 "k8s.io/apimachinery/pkg/util/runtime" 44 "k8s.io/apimachinery/pkg/util/sets" 45 utilfeature "k8s.io/apiserver/pkg/util/feature" 46 clientset "k8s.io/client-go/kubernetes" 47 "k8s.io/component-base/featuregate" 48 internalapi "k8s.io/cri-api/pkg/apis" 49 runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" 50 remote "k8s.io/cri-client/pkg" 51 "k8s.io/klog/v2" 52 kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1" 53 kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1" 54 stats "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 55 "k8s.io/kubelet/pkg/types" 56 "k8s.io/kubernetes/pkg/cluster/ports" 57 kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" 58 "k8s.io/kubernetes/pkg/kubelet/apis/podresources" 59 "k8s.io/kubernetes/pkg/kubelet/cm" 60 kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" 61 "k8s.io/kubernetes/pkg/kubelet/util" 62 63 "github.com/coreos/go-systemd/v22/dbus" 64 "k8s.io/kubernetes/test/e2e/framework" 65 e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" 66 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 67 e2enodekubelet "k8s.io/kubernetes/test/e2e_node/kubeletconfig" 68 imageutils "k8s.io/kubernetes/test/utils/image" 69 70 "github.com/onsi/ginkgo/v2" 71 "github.com/onsi/gomega" 72 ) 73 74 var startServices = flag.Bool("start-services", true, "If true, start local node services") 75 var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests") 76 var busyboxImage = imageutils.GetE2EImage(imageutils.BusyBox) 77 var agnhostImage = imageutils.GetE2EImage(imageutils.Agnhost) 78 79 const ( 80 // Kubelet internal cgroup name for node allocatable cgroup. 81 defaultNodeAllocatableCgroup = "kubepods" 82 // defaultPodResourcesPath is the path to the local endpoint serving the podresources GRPC service. 83 defaultPodResourcesPath = "/var/lib/kubelet/pod-resources" 84 defaultPodResourcesTimeout = 10 * time.Second 85 defaultPodResourcesMaxSize = 1024 * 1024 * 16 // 16 Mb 86 // state files 87 cpuManagerStateFile = "/var/lib/kubelet/cpu_manager_state" 88 memoryManagerStateFile = "/var/lib/kubelet/memory_manager_state" 89 ) 90 91 var ( 92 kubeletHealthCheckURL = fmt.Sprintf("http://127.0.0.1:%d/healthz", ports.KubeletHealthzPort) 93 containerRuntimeUnitName = "" 94 // KubeletConfig is the kubelet configuration the test is running against. 95 kubeletCfg *kubeletconfig.KubeletConfiguration 96 ) 97 98 func getNodeSummary(ctx context.Context) (*stats.Summary, error) { 99 kubeletConfig, err := getCurrentKubeletConfig(ctx) 100 if err != nil { 101 return nil, fmt.Errorf("failed to get current kubelet config") 102 } 103 req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://%s/stats/summary", net.JoinHostPort(kubeletConfig.Address, strconv.Itoa(int(kubeletConfig.ReadOnlyPort)))), nil) 104 if err != nil { 105 return nil, fmt.Errorf("failed to build http request: %w", err) 106 } 107 req.Header.Add("Accept", "application/json") 108 109 client := &http.Client{} 110 resp, err := client.Do(req) 111 if err != nil { 112 return nil, fmt.Errorf("failed to get /stats/summary: %w", err) 113 } 114 115 defer resp.Body.Close() 116 contentsBytes, err := io.ReadAll(resp.Body) 117 if err != nil { 118 return nil, fmt.Errorf("failed to read /stats/summary: %+v", resp) 119 } 120 121 decoder := json.NewDecoder(strings.NewReader(string(contentsBytes))) 122 summary := stats.Summary{} 123 err = decoder.Decode(&summary) 124 if err != nil { 125 return nil, fmt.Errorf("failed to parse /stats/summary to go struct: %+v", resp) 126 } 127 return &summary, nil 128 } 129 130 func getV1alpha1NodeDevices(ctx context.Context) (*kubeletpodresourcesv1alpha1.ListPodResourcesResponse, error) { 131 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) 132 if err != nil { 133 return nil, fmt.Errorf("Error getting local endpoint: %w", err) 134 } 135 client, conn, err := podresources.GetV1alpha1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) 136 if err != nil { 137 return nil, fmt.Errorf("Error getting grpc client: %w", err) 138 } 139 defer conn.Close() 140 ctx, cancel := context.WithTimeout(ctx, 10*time.Second) 141 defer cancel() 142 resp, err := client.List(ctx, &kubeletpodresourcesv1alpha1.ListPodResourcesRequest{}) 143 if err != nil { 144 return nil, fmt.Errorf("%v.Get(_) = _, %v", client, err) 145 } 146 return resp, nil 147 } 148 149 func getV1NodeDevices(ctx context.Context) (*kubeletpodresourcesv1.ListPodResourcesResponse, error) { 150 endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket) 151 if err != nil { 152 return nil, fmt.Errorf("Error getting local endpoint: %w", err) 153 } 154 client, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize) 155 if err != nil { 156 return nil, fmt.Errorf("Error getting gRPC client: %w", err) 157 } 158 defer conn.Close() 159 ctx, cancel := context.WithTimeout(ctx, 10*time.Second) 160 defer cancel() 161 resp, err := client.List(ctx, &kubeletpodresourcesv1.ListPodResourcesRequest{}) 162 if err != nil { 163 return nil, fmt.Errorf("%v.Get(_) = _, %v", client, err) 164 } 165 return resp, nil 166 } 167 168 // Returns the current KubeletConfiguration 169 func getCurrentKubeletConfig(ctx context.Context) (*kubeletconfig.KubeletConfiguration, error) { 170 // namespace only relevant if useProxy==true, so we don't bother 171 return e2enodekubelet.GetCurrentKubeletConfig(ctx, framework.TestContext.NodeName, "", false, framework.TestContext.StandaloneMode) 172 } 173 174 func cleanupPods(f *framework.Framework) { 175 ginkgo.AfterEach(func(ctx context.Context) { 176 ginkgo.By("Deleting any Pods created by the test in namespace: " + f.Namespace.Name) 177 l, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{}) 178 framework.ExpectNoError(err) 179 for _, p := range l.Items { 180 if p.Namespace != f.Namespace.Name { 181 continue 182 } 183 framework.Logf("Deleting pod: %s", p.Name) 184 e2epod.NewPodClient(f).DeleteSync(ctx, p.Name, metav1.DeleteOptions{}, 2*time.Minute) 185 } 186 }) 187 } 188 189 // Must be called within a Context. Allows the function to modify the KubeletConfiguration during the BeforeEach of the context. 190 // The change is reverted in the AfterEach of the context. 191 // Returns true on success. 192 func tempSetCurrentKubeletConfig(f *framework.Framework, updateFunction func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration)) { 193 var oldCfg *kubeletconfig.KubeletConfiguration 194 195 ginkgo.BeforeEach(func(ctx context.Context) { 196 var err error 197 oldCfg, err = getCurrentKubeletConfig(ctx) 198 framework.ExpectNoError(err) 199 200 newCfg := oldCfg.DeepCopy() 201 updateFunction(ctx, newCfg) 202 if apiequality.Semantic.DeepEqual(*newCfg, *oldCfg) { 203 return 204 } 205 206 updateKubeletConfig(ctx, f, newCfg, true) 207 }) 208 209 ginkgo.AfterEach(func(ctx context.Context) { 210 if oldCfg != nil { 211 // Update the Kubelet configuration. 212 updateKubeletConfig(ctx, f, oldCfg, true) 213 } 214 }) 215 } 216 217 func updateKubeletConfig(ctx context.Context, f *framework.Framework, kubeletConfig *kubeletconfig.KubeletConfiguration, deleteStateFiles bool) { 218 // Update the Kubelet configuration. 219 ginkgo.By("Stopping the kubelet") 220 startKubelet := stopKubelet() 221 222 // wait until the kubelet health check will fail 223 gomega.Eventually(ctx, func() bool { 224 return kubeletHealthCheck(kubeletHealthCheckURL) 225 }, time.Minute, time.Second).Should(gomega.BeFalse()) 226 227 // Delete CPU and memory manager state files to be sure it will not prevent the kubelet restart 228 if deleteStateFiles { 229 deleteStateFile(cpuManagerStateFile) 230 deleteStateFile(memoryManagerStateFile) 231 } 232 233 framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(kubeletConfig)) 234 235 ginkgo.By("Starting the kubelet") 236 startKubelet() 237 waitForKubeletToStart(ctx, f) 238 } 239 240 func waitForKubeletToStart(ctx context.Context, f *framework.Framework) { 241 // wait until the kubelet health check will succeed 242 gomega.Eventually(ctx, func() bool { 243 return kubeletHealthCheck(kubeletHealthCheckURL) 244 }, 2*time.Minute, 5*time.Second).Should(gomega.BeTrue()) 245 246 // Wait for the Kubelet to be ready. 247 gomega.Eventually(ctx, func(ctx context.Context) bool { 248 nodes, err := e2enode.TotalReady(ctx, f.ClientSet) 249 framework.ExpectNoError(err) 250 return nodes == 1 251 }, time.Minute, time.Second).Should(gomega.BeTrue()) 252 } 253 254 func deleteStateFile(stateFileName string) { 255 err := exec.Command("/bin/sh", "-c", fmt.Sprintf("rm -f %s", stateFileName)).Run() 256 framework.ExpectNoError(err, "failed to delete the state file") 257 } 258 259 // listNamespaceEvents lists the events in the given namespace. 260 func listNamespaceEvents(ctx context.Context, c clientset.Interface, ns string) error { 261 ls, err := c.CoreV1().Events(ns).List(ctx, metav1.ListOptions{}) 262 if err != nil { 263 return err 264 } 265 for _, event := range ls.Items { 266 klog.Infof("Event(%#v): type: '%v' reason: '%v' %v", event.InvolvedObject, event.Type, event.Reason, event.Message) 267 } 268 return nil 269 } 270 271 func logPodEvents(ctx context.Context, f *framework.Framework) { 272 framework.Logf("Summary of pod events during the test:") 273 err := listNamespaceEvents(ctx, f.ClientSet, f.Namespace.Name) 274 framework.ExpectNoError(err) 275 } 276 277 func logNodeEvents(ctx context.Context, f *framework.Framework) { 278 framework.Logf("Summary of node events during the test:") 279 err := listNamespaceEvents(ctx, f.ClientSet, "") 280 framework.ExpectNoError(err) 281 } 282 283 func getLocalNode(ctx context.Context, f *framework.Framework) *v1.Node { 284 nodeList, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) 285 framework.ExpectNoError(err) 286 gomega.Expect(nodeList.Items).Should(gomega.HaveLen(1), "Unexpected number of node objects for node e2e. Expects only one node.") 287 return &nodeList.Items[0] 288 } 289 290 // getLocalTestNode fetches the node object describing the local worker node set up by the e2e_node infra, alongside with its ready state. 291 // getLocalTestNode is a variant of `getLocalNode` which reports but does not set any requirement about the node readiness state, letting 292 // the caller decide. The check is intentionally done like `getLocalNode` does. 293 // Note `getLocalNode` aborts (as in ginkgo.Expect) the test implicitly if the worker node is not ready. 294 func getLocalTestNode(ctx context.Context, f *framework.Framework) (*v1.Node, bool) { 295 node, err := f.ClientSet.CoreV1().Nodes().Get(ctx, framework.TestContext.NodeName, metav1.GetOptions{}) 296 framework.ExpectNoError(err) 297 ready := e2enode.IsNodeReady(node) 298 schedulable := e2enode.IsNodeSchedulable(node) 299 framework.Logf("node %q ready=%v schedulable=%v", node.Name, ready, schedulable) 300 return node, ready && schedulable 301 } 302 303 // logKubeletLatencyMetrics logs KubeletLatencyMetrics computed from the Prometheus 304 // metrics exposed on the current node and identified by the metricNames. 305 // The Kubelet subsystem prefix is automatically prepended to these metric names. 306 func logKubeletLatencyMetrics(ctx context.Context, metricNames ...string) { 307 metricSet := sets.NewString() 308 for _, key := range metricNames { 309 metricSet.Insert(kubeletmetrics.KubeletSubsystem + "_" + key) 310 } 311 metric, err := e2emetrics.GrabKubeletMetricsWithoutProxy(ctx, fmt.Sprintf("%s:%d", nodeNameOrIP(), ports.KubeletReadOnlyPort), "/metrics") 312 if err != nil { 313 framework.Logf("Error getting kubelet metrics: %v", err) 314 } else { 315 framework.Logf("Kubelet Metrics: %+v", e2emetrics.GetKubeletLatencyMetrics(metric, metricSet)) 316 } 317 } 318 319 // getCRIClient connects CRI and returns CRI runtime service clients and image service client. 320 func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService, error) { 321 // connection timeout for CRI service connection 322 logger := klog.Background() 323 const connectionTimeout = 2 * time.Minute 324 runtimeEndpoint := framework.TestContext.ContainerRuntimeEndpoint 325 r, err := remote.NewRemoteRuntimeService(runtimeEndpoint, connectionTimeout, noop.NewTracerProvider(), &logger) 326 if err != nil { 327 return nil, nil, err 328 } 329 imageManagerEndpoint := runtimeEndpoint 330 if framework.TestContext.ImageServiceEndpoint != "" { 331 //ImageServiceEndpoint is the same as ContainerRuntimeEndpoint if not 332 //explicitly specified 333 imageManagerEndpoint = framework.TestContext.ImageServiceEndpoint 334 } 335 i, err := remote.NewRemoteImageService(imageManagerEndpoint, connectionTimeout, noop.NewTracerProvider(), &logger) 336 if err != nil { 337 return nil, nil, err 338 } 339 return r, i, nil 340 } 341 342 // findKubeletServiceName searches the unit name among the services known to systemd. 343 // if the `running` parameter is true, restricts the search among currently running services; 344 // otherwise, also stopped, failed, exited (non-running in general) services are also considered. 345 // TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494 346 func findKubeletServiceName(running bool) string { 347 cmdLine := []string{ 348 "systemctl", "list-units", "*kubelet*", 349 } 350 if running { 351 cmdLine = append(cmdLine, "--state=running") 352 } 353 stdout, err := exec.Command("sudo", cmdLine...).CombinedOutput() 354 framework.ExpectNoError(err) 355 regex := regexp.MustCompile("(kubelet-\\w+)") 356 matches := regex.FindStringSubmatch(string(stdout)) 357 gomega.Expect(matches).ToNot(gomega.BeEmpty(), "Found more than one kubelet service running: %q", stdout) 358 kubeletServiceName := matches[0] 359 framework.Logf("Get running kubelet with systemctl: %v, %v", string(stdout), kubeletServiceName) 360 return kubeletServiceName 361 } 362 363 func findContainerRuntimeServiceName() (string, error) { 364 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 365 defer cancel() 366 367 conn, err := dbus.NewWithContext(ctx) 368 framework.ExpectNoError(err, "Failed to setup dbus connection") 369 defer conn.Close() 370 371 runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile) 372 framework.ExpectNoError(err, "failed to get list of container runtime pids") 373 gomega.Expect(runtimePids).To(gomega.HaveLen(1), "Unexpected number of container runtime pids. Expected 1 but got %v", len(runtimePids)) 374 375 containerRuntimePid := runtimePids[0] 376 377 unitName, err := conn.GetUnitNameByPID(ctx, uint32(containerRuntimePid)) 378 framework.ExpectNoError(err, "Failed to get container runtime unit name") 379 380 return unitName, nil 381 } 382 383 type containerRuntimeUnitOp int 384 385 const ( 386 startContainerRuntimeUnitOp containerRuntimeUnitOp = iota 387 stopContainerRuntimeUnitOp 388 ) 389 390 func performContainerRuntimeUnitOp(op containerRuntimeUnitOp) error { 391 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) 392 defer cancel() 393 394 conn, err := dbus.NewWithContext(ctx) 395 framework.ExpectNoError(err, "Failed to setup dbus connection") 396 defer conn.Close() 397 398 if containerRuntimeUnitName == "" { 399 containerRuntimeUnitName, err = findContainerRuntimeServiceName() 400 framework.ExpectNoError(err, "Failed to find container runtime name") 401 } 402 403 reschan := make(chan string) 404 405 switch op { 406 case startContainerRuntimeUnitOp: 407 _, err = conn.StartUnitContext(ctx, containerRuntimeUnitName, "replace", reschan) 408 case stopContainerRuntimeUnitOp: 409 _, err = conn.StopUnitContext(ctx, containerRuntimeUnitName, "replace", reschan) 410 default: 411 framework.Failf("Unexpected container runtime op: %v", op) 412 } 413 framework.ExpectNoError(err, "dbus connection error") 414 415 job := <-reschan 416 gomega.Expect(job).To(gomega.Equal("done"), "Expected job to complete with done") 417 418 return nil 419 } 420 421 func stopContainerRuntime() error { 422 return performContainerRuntimeUnitOp(stopContainerRuntimeUnitOp) 423 } 424 425 func startContainerRuntime() error { 426 return performContainerRuntimeUnitOp(startContainerRuntimeUnitOp) 427 } 428 429 // restartKubelet restarts the current kubelet service. 430 // the "current" kubelet service is the instance managed by the current e2e_node test run. 431 // If `running` is true, restarts only if the current kubelet is actually running. In some cases, 432 // the kubelet may have exited or can be stopped, typically because it was intentionally stopped 433 // earlier during a test, or, sometimes, because it just crashed. 434 // Warning: the "current" kubelet is poorly defined. The "current" kubelet is assumed to be the most 435 // recent kubelet service unit, IOW there is not a unique ID we use to bind explicitly a kubelet 436 // instance to a test run. 437 func restartKubelet(running bool) { 438 kubeletServiceName := findKubeletServiceName(running) 439 // reset the kubelet service start-limit-hit 440 stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() 441 framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout)) 442 443 stdout, err = exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() 444 framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %s", err, string(stdout)) 445 } 446 447 // stopKubelet will kill the running kubelet, and returns a func that will restart the process again 448 func stopKubelet() func() { 449 kubeletServiceName := findKubeletServiceName(true) 450 451 // reset the kubelet service start-limit-hit 452 stdout, err := exec.Command("sudo", "systemctl", "reset-failed", kubeletServiceName).CombinedOutput() 453 framework.ExpectNoError(err, "Failed to reset kubelet start-limit-hit with systemctl: %v, %s", err, string(stdout)) 454 455 stdout, err = exec.Command("sudo", "systemctl", "kill", kubeletServiceName).CombinedOutput() 456 framework.ExpectNoError(err, "Failed to stop kubelet with systemctl: %v, %s", err, string(stdout)) 457 458 return func() { 459 // we should restart service, otherwise the transient service start will fail 460 stdout, err := exec.Command("sudo", "systemctl", "restart", kubeletServiceName).CombinedOutput() 461 framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) 462 } 463 } 464 465 func kubeletHealthCheck(url string) bool { 466 insecureTransport := http.DefaultTransport.(*http.Transport).Clone() 467 insecureTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} 468 insecureHTTPClient := &http.Client{ 469 Transport: insecureTransport, 470 } 471 472 req, err := http.NewRequest("HEAD", url, nil) 473 if err != nil { 474 return false 475 } 476 req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", framework.TestContext.BearerToken)) 477 resp, err := insecureHTTPClient.Do(req) 478 if err != nil { 479 klog.Warningf("Health check on %q failed, error=%v", url, err) 480 } else if resp.StatusCode != http.StatusOK { 481 klog.Warningf("Health check on %q failed, status=%d", url, resp.StatusCode) 482 } 483 return err == nil && resp.StatusCode == http.StatusOK 484 } 485 486 func toCgroupFsName(cgroupName cm.CgroupName) string { 487 if kubeletCfg.CgroupDriver == "systemd" { 488 return cgroupName.ToSystemd() 489 } 490 return cgroupName.ToCgroupfs() 491 } 492 493 // reduceAllocatableMemoryUsageIfCgroupv1 uses memory.force_empty (https://lwn.net/Articles/432224/) 494 // to make the kernel reclaim memory in the allocatable cgroup 495 // the time to reduce pressure may be unbounded, but usually finishes within a second. 496 // memory.force_empty is no supported in cgroupv2. 497 func reduceAllocatableMemoryUsageIfCgroupv1() { 498 if !IsCgroup2UnifiedMode() { 499 cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(cm.NewCgroupName(cm.RootCgroupName, defaultNodeAllocatableCgroup))) 500 _, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput() 501 framework.ExpectNoError(err) 502 } 503 } 504 505 // Equivalent of featuregatetesting.SetFeatureGateDuringTest 506 // which can't be used here because we're not in a Testing context. 507 // This must be in a non-"_test" file to pass 508 // make verify WHAT=test-featuregates 509 func withFeatureGate(feature featuregate.Feature, desired bool) func() { 510 current := utilfeature.DefaultFeatureGate.Enabled(feature) 511 utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), desired)) 512 return func() { 513 utilfeature.DefaultMutableFeatureGate.Set(fmt.Sprintf("%s=%v", string(feature), current)) 514 } 515 } 516 517 // waitForAllContainerRemoval waits until all the containers on a given pod are really gone. 518 // This is needed by the e2e tests which involve exclusive resource allocation (cpu, topology manager; podresources; etc.) 519 // In these cases, we need to make sure the tests clean up after themselves to make sure each test runs in 520 // a pristine environment. The only way known so far to do that is to introduce this wait. 521 // Worth noting, however, that this makes the test runtime much bigger. 522 func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) { 523 rs, _, err := getCRIClient() 524 framework.ExpectNoError(err) 525 gomega.Eventually(ctx, func(ctx context.Context) error { 526 containers, err := rs.ListContainers(ctx, &runtimeapi.ContainerFilter{ 527 LabelSelector: map[string]string{ 528 types.KubernetesPodNameLabel: podName, 529 types.KubernetesPodNamespaceLabel: podNS, 530 }, 531 }) 532 if err != nil { 533 return fmt.Errorf("got error waiting for all containers to be removed from CRI: %v", err) 534 } 535 536 if len(containers) > 0 { 537 return fmt.Errorf("expected all containers to be removed from CRI but %v containers still remain. Containers: %+v", len(containers), containers) 538 } 539 return nil 540 }, 2*time.Minute, 1*time.Second).Should(gomega.Succeed()) 541 } 542 543 func getPidsForProcess(name, pidFile string) ([]int, error) { 544 if len(pidFile) > 0 { 545 pid, err := getPidFromPidFile(pidFile) 546 if err == nil { 547 return []int{pid}, nil 548 } 549 // log the error and fall back to pidof 550 runtime.HandleError(err) 551 } 552 return procfs.PidOf(name) 553 } 554 555 func getPidFromPidFile(pidFile string) (int, error) { 556 file, err := os.Open(pidFile) 557 if err != nil { 558 return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err) 559 } 560 defer file.Close() 561 562 data, err := io.ReadAll(file) 563 if err != nil { 564 return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err) 565 } 566 567 pid, err := strconv.Atoi(string(data)) 568 if err != nil { 569 return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err) 570 } 571 572 return pid, nil 573 } 574 575 // WaitForPodInitContainerRestartCount waits for the given Pod init container 576 // to achieve at least a given restartCount 577 // TODO: eventually look at moving to test/e2e/framework/pod 578 func WaitForPodInitContainerRestartCount(ctx context.Context, c clientset.Interface, namespace, podName string, initContainerIndex int, desiredRestartCount int32, timeout time.Duration) error { 579 conditionDesc := fmt.Sprintf("init container %d started", initContainerIndex) 580 return e2epod.WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 581 if initContainerIndex > len(pod.Status.InitContainerStatuses)-1 { 582 return false, nil 583 } 584 containerStatus := pod.Status.InitContainerStatuses[initContainerIndex] 585 return containerStatus.RestartCount >= desiredRestartCount, nil 586 }) 587 } 588 589 // WaitForPodContainerRestartCount waits for the given Pod container to achieve at least a given restartCount 590 // TODO: eventually look at moving to test/e2e/framework/pod 591 func WaitForPodContainerRestartCount(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, desiredRestartCount int32, timeout time.Duration) error { 592 conditionDesc := fmt.Sprintf("container %d started", containerIndex) 593 return e2epod.WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 594 if containerIndex > len(pod.Status.ContainerStatuses)-1 { 595 return false, nil 596 } 597 containerStatus := pod.Status.ContainerStatuses[containerIndex] 598 return containerStatus.RestartCount >= desiredRestartCount, nil 599 }) 600 } 601 602 // WaitForPodInitContainerToFail waits for the given Pod init container to fail with the given reason, specifically due to 603 // invalid container configuration. In this case, the container will remain in a waiting state with a specific 604 // reason set, which should match the given reason. 605 // TODO: eventually look at moving to test/e2e/framework/pod 606 func WaitForPodInitContainerToFail(ctx context.Context, c clientset.Interface, namespace, podName string, containerIndex int, reason string, timeout time.Duration) error { 607 conditionDesc := fmt.Sprintf("container %d failed with reason %s", containerIndex, reason) 608 return e2epod.WaitForPodCondition(ctx, c, namespace, podName, conditionDesc, timeout, func(pod *v1.Pod) (bool, error) { 609 switch pod.Status.Phase { 610 case v1.PodPending: 611 if len(pod.Status.InitContainerStatuses) == 0 { 612 return false, nil 613 } 614 containerStatus := pod.Status.InitContainerStatuses[containerIndex] 615 if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == reason { 616 return true, nil 617 } 618 return false, nil 619 case v1.PodFailed, v1.PodRunning, v1.PodSucceeded: 620 return false, fmt.Errorf("pod was expected to be pending, but it is in the state: %s", pod.Status.Phase) 621 } 622 return false, nil 623 }) 624 } 625 626 func nodeNameOrIP() string { 627 return "localhost" 628 }