istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/test/kube/dump.go (about)

     1  //  Copyright Istio Authors
     2  //
     3  //  Licensed under the Apache License, Version 2.0 (the "License");
     4  //  you may not use this file except in compliance with the License.
     5  //  You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  //  Unless required by applicable law or agreed to in writing, software
    10  //  distributed under the License is distributed on an "AS IS" BASIS,
    11  //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  //  See the License for the specific language governing permissions and
    13  //  limitations under the License.
    14  
    15  package kube
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  	"os"
    25  	"path"
    26  	"path/filepath"
    27  	"strings"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/hashicorp/go-multierror"
    32  	"go.uber.org/atomic"
    33  	"golang.org/x/sync/errgroup"
    34  	corev1 "k8s.io/api/core/v1"
    35  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    36  	"sigs.k8s.io/yaml"
    37  
    38  	"istio.io/api/annotation"
    39  	"istio.io/istio/pkg/kube"
    40  	"istio.io/istio/pkg/test/framework/components/cluster"
    41  	"istio.io/istio/pkg/test/framework/components/istioctl"
    42  	"istio.io/istio/pkg/test/framework/resource"
    43  	"istio.io/istio/pkg/test/prow"
    44  	"istio.io/istio/pkg/test/scopes"
    45  	"istio.io/istio/pkg/test/util/retry"
    46  )
    47  
    48  type wellKnownContainer string
    49  
    50  func (n wellKnownContainer) IsContainer(c corev1.Container) bool {
    51  	return c.Name == n.Name()
    52  }
    53  
    54  func (n wellKnownContainer) Name() string {
    55  	return string(n)
    56  }
    57  
    58  const (
    59  	maxCoreDumpedPods                      = 5
    60  	proxyContainer      wellKnownContainer = "istio-proxy"
    61  	discoveryContainer  wellKnownContainer = "discovery"
    62  	initContainer       wellKnownContainer = "istio-init"
    63  	validationContainer wellKnownContainer = "istio-validation"
    64  )
    65  
    66  var coreDumpedPods = atomic.NewInt32(0)
    67  
    68  // PodDumper will dump information from all the pods into the given workDir.
    69  // If no pods are provided, client will be used to fetch all the pods in a namespace.
    70  type PodDumper func(ctx resource.Context, cluster cluster.Cluster, workDir string, namespace string, pods ...corev1.Pod)
    71  
    72  func podOutputPath(workDir string, cluster cluster.Cluster, pod corev1.Pod, dumpName string) string {
    73  	return outputPath(workDir, cluster, pod.Name, dumpName)
    74  }
    75  
    76  // outputPath gives a path in the form of workDir/cluster/<prefix>_<suffix>
    77  func outputPath(workDir string, cluster cluster.Cluster, prefix, suffix string) string {
    78  	dir := path.Join(workDir, cluster.StableName())
    79  	if err := os.MkdirAll(dir, os.ModeDir|0o700); err != nil {
    80  		scopes.Framework.Warnf("failed creating directory: %s", dir)
    81  	}
    82  	return path.Join(dir, fmt.Sprintf("%s_%s", prefix, suffix))
    83  }
    84  
    85  func DumpDeployments(ctx resource.Context, workDir, namespace string) {
    86  	errG := multierror.Group{}
    87  	for _, c := range ctx.AllClusters().Kube() {
    88  		deps, err := c.Kube().AppsV1().Deployments(namespace).List(context.TODO(), metav1.ListOptions{})
    89  		if err != nil {
    90  			scopes.Framework.Warnf("Error getting deployments for cluster %s: %v", c.Name(), err)
    91  			return
    92  		}
    93  		for _, deployment := range deps.Items {
    94  			deployment := deployment
    95  			errG.Go(func() error {
    96  				out, err := yaml.Marshal(deployment)
    97  				if err != nil {
    98  					return err
    99  				}
   100  				return os.WriteFile(outputPath(workDir, c, deployment.Name, "deployment.yaml"), out, os.ModePerm)
   101  			})
   102  		}
   103  	}
   104  	_ = errG.Wait()
   105  }
   106  
   107  func DumpWebhooks(ctx resource.Context, workDir string) {
   108  	errG := multierror.Group{}
   109  	for _, c := range ctx.AllClusters().Kube() {
   110  		mwhs, err := c.Kube().AdmissionregistrationV1().MutatingWebhookConfigurations().List(context.TODO(), metav1.ListOptions{})
   111  		if err != nil {
   112  			scopes.Framework.Warnf("Error getting mutating webhook configurations for cluster %s: %v", c.Name(), err)
   113  			return
   114  		}
   115  		for _, mwh := range mwhs.Items {
   116  			mwh := mwh
   117  			errG.Go(func() error {
   118  				out, err := yaml.Marshal(mwh)
   119  				if err != nil {
   120  					return err
   121  				}
   122  				return os.WriteFile(outputPath(workDir, c, mwh.Name, "mutatingwebhook.yaml"), out, os.ModePerm)
   123  			})
   124  		}
   125  		vwhs, err := c.Kube().AdmissionregistrationV1().ValidatingWebhookConfigurations().List(context.TODO(), metav1.ListOptions{})
   126  		if err != nil {
   127  			scopes.Framework.Warnf("Error getting validating webhook configurations for cluster %s: %v", c.Name(), err)
   128  			return
   129  		}
   130  		for _, vwh := range vwhs.Items {
   131  			vwh := vwh
   132  			errG.Go(func() error {
   133  				out, err := yaml.Marshal(vwh)
   134  				if err != nil {
   135  					return err
   136  				}
   137  				return os.WriteFile(outputPath(workDir, c, vwh.Name, "validatingwebhook.yaml"), out, os.ModePerm)
   138  			})
   139  		}
   140  	}
   141  	_ = errG.Wait()
   142  }
   143  
   144  // DumpPods runs each dumper with the selected pods in the given namespace.
   145  // If selectors is empty, all pods in the namespace will be dumpped.
   146  // If no dumpers are provided, their resource state, events, container logs and Envoy information will be dumped.
   147  func DumpPods(ctx resource.Context, workDir, namespace string, selectors []string, dumpers ...PodDumper) {
   148  	if len(dumpers) == 0 {
   149  		dumpers = []PodDumper{
   150  			DumpPodState,
   151  			DumpPodEvents,
   152  			DumpPodLogs,
   153  			DumpPodEnvoy,
   154  			DumpPodAgent,
   155  			DumpCoreDumps,
   156  		}
   157  	}
   158  
   159  	wg := sync.WaitGroup{}
   160  	for _, c := range ctx.AllClusters().Kube() {
   161  		pods, err := c.PodsForSelector(context.TODO(), namespace, selectors...)
   162  		if err != nil {
   163  			scopes.Framework.Warnf("Error getting pods list for cluster %s via kubectl: %v", c.Name(), err)
   164  			return
   165  		}
   166  		if len(pods.Items) == 0 {
   167  			continue
   168  		}
   169  		for _, dump := range dumpers {
   170  			c, dump := c, dump
   171  			wg.Add(1)
   172  			go func() {
   173  				dump(ctx, c, workDir, namespace, pods.Items...)
   174  				wg.Done()
   175  			}()
   176  		}
   177  	}
   178  	wg.Wait()
   179  }
   180  
   181  const coredumpDir = "/var/lib/istio"
   182  
   183  func DumpCoreDumps(_ resource.Context, c cluster.Cluster, workDir string, namespace string, pods ...corev1.Pod) {
   184  	if coreDumpedPods.Load() >= maxCoreDumpedPods {
   185  		return
   186  	}
   187  	pods = podsOrFetch(c, pods, namespace)
   188  	for _, pod := range pods {
   189  		if coreDumpedPods.Load() >= maxCoreDumpedPods {
   190  			return
   191  		}
   192  		isVM := checkIfVM(pod)
   193  		wroteDumpsForPod := false
   194  		containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
   195  		for _, container := range containers {
   196  			if !proxyContainer.IsContainer(container) {
   197  				continue
   198  			}
   199  			restarts := containerRestarts(pod, proxyContainer.Name())
   200  			crashed, _ := containerCrashed(pod, proxyContainer.Name())
   201  			if !crashed && restarts == 0 {
   202  				// no need to store this dump
   203  				continue
   204  			}
   205  
   206  			findDumps := fmt.Sprintf("find %s -name core.*", coredumpDir)
   207  			stdout, _, err := c.PodExec(pod.Name, pod.Namespace, container.Name, findDumps)
   208  			if err != nil {
   209  				scopes.Framework.Warnf("Unable to get core dumps for cluster/pod: %s/%s/%s: %v",
   210  					c.Name(), pod.Namespace, pod.Name, err)
   211  				continue
   212  			}
   213  			for _, cd := range strings.Split(stdout, "\n") {
   214  				if strings.TrimSpace(cd) == "" {
   215  					continue
   216  				}
   217  				cmd := "cat " + cd
   218  				if isVM {
   219  					cmd = "sudo " + cmd
   220  				}
   221  				stdout, _, err := c.PodExec(pod.Name, pod.Namespace, container.Name, cmd)
   222  				if err != nil {
   223  					scopes.Framework.Warnf("Unable to get core dumps %v for cluster/pod: %s/%s/%s: %v",
   224  						cd, c.Name(), pod.Namespace, pod.Name, err)
   225  					continue
   226  				}
   227  				fname := podOutputPath(workDir, c, pod, filepath.Base(cd))
   228  				if err = os.WriteFile(fname, []byte(stdout), os.ModePerm); err != nil {
   229  					scopes.Framework.Warnf("Unable to write envoy core dump log for cluster/pod: %s/%s/%s: %v",
   230  						c.Name(), pod.Namespace, pod.Name, err)
   231  				} else {
   232  					wroteDumpsForPod = true
   233  				}
   234  			}
   235  		}
   236  		if wroteDumpsForPod {
   237  			coreDumpedPods.Inc()
   238  		}
   239  	}
   240  }
   241  
   242  func podsOrFetch(c cluster.Cluster, pods []corev1.Pod, namespace string) []corev1.Pod {
   243  	if len(pods) == 0 {
   244  		podList, err := c.Kube().CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{})
   245  		if err != nil {
   246  			scopes.Framework.Warnf("Error getting pods list in cluster %s via kubectl: %v", c.Name(), err)
   247  			return nil
   248  		}
   249  		pods = podList.Items
   250  	}
   251  	return pods
   252  }
   253  
   254  // DumpPodState dumps the pod state for either the provided pods or all pods in the namespace if none are provided.
   255  func DumpPodState(_ resource.Context, c cluster.Cluster, workDir string, namespace string, pods ...corev1.Pod) {
   256  	pods = podsOrFetch(c, pods, namespace)
   257  
   258  	for _, pod := range pods {
   259  		out, err := yaml.Marshal(&pod)
   260  		if err != nil {
   261  			scopes.Framework.Warnf("Error marshaling pod state for output: %v", err)
   262  			continue
   263  		}
   264  
   265  		outPath := podOutputPath(workDir, c, pod, "pod-state.yaml")
   266  		if err := os.WriteFile(outPath, out, os.ModePerm); err != nil {
   267  			scopes.Framework.Infof("Error writing out pod state to file: %v", err)
   268  		}
   269  	}
   270  }
   271  
   272  // DumpPodEvents dumps the pod events for either the provided pods or all pods in the namespace if none are provided.
   273  func DumpPodEvents(_ resource.Context, c cluster.Cluster, workDir, namespace string, pods ...corev1.Pod) {
   274  	pods = podsOrFetch(c, pods, namespace)
   275  
   276  	for _, pod := range pods {
   277  		list, err := c.Kube().CoreV1().Events(namespace).List(context.TODO(),
   278  			metav1.ListOptions{
   279  				FieldSelector: "involvedObject.name=" + pod.Name,
   280  			})
   281  		if err != nil {
   282  			scopes.Framework.Warnf("Error getting events list for cluster/pod %s/%s/%s via kubectl: %v",
   283  				c.Name(), namespace, pod.Name, err)
   284  			return
   285  		}
   286  
   287  		for i := range list.Items {
   288  			e := list.Items[i]
   289  			e.ManagedFields = nil
   290  			list.Items[i] = e
   291  		}
   292  
   293  		out, err := yaml.Marshal(list.Items)
   294  		if err != nil {
   295  			scopes.Framework.Warnf("Error marshaling pod event for output: %v", err)
   296  			continue
   297  		}
   298  
   299  		outPath := podOutputPath(workDir, c, pod, "pod-events.yaml")
   300  		if err := os.WriteFile(outPath, out, os.ModePerm); err != nil {
   301  			scopes.Framework.Infof("Error writing out pod events to file: %v", err)
   302  		}
   303  	}
   304  }
   305  
   306  // containerRestarts checks how many times container has ever restarted
   307  func containerRestarts(pod corev1.Pod, container string) int {
   308  	for _, cs := range pod.Status.ContainerStatuses {
   309  		if cs.Name == container {
   310  			return int(cs.RestartCount)
   311  		}
   312  	}
   313  	// No match - assume that means no restart
   314  	return 0
   315  }
   316  
   317  func containerCrashed(pod corev1.Pod, container string) (bool, *corev1.ContainerStateTerminated) {
   318  	for _, cs := range pod.Status.ContainerStatuses {
   319  		if cs.Name == container && cs.State.Terminated != nil && cs.State.Terminated.ExitCode != 0 {
   320  			return true, cs.State.Terminated
   321  		}
   322  	}
   323  	return false, nil
   324  }
   325  
   326  // DumpPodLogs will dump logs from each container in each of the provided pods
   327  // or all pods in the namespace if none are provided.
   328  func DumpPodLogs(_ resource.Context, c cluster.Cluster, workDir, namespace string, pods ...corev1.Pod) {
   329  	pods = podsOrFetch(c, pods, namespace)
   330  
   331  	for _, pod := range pods {
   332  		isVM := checkIfVM(pod)
   333  		containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
   334  		for _, container := range containers {
   335  			l, err := c.PodLogs(context.TODO(), pod.Name, pod.Namespace, container.Name, false /* previousLog */)
   336  			if err != nil {
   337  				scopes.Framework.Warnf("Unable to get logs for cluster/pod/container: %s/%s/%s/%s for: %v",
   338  					c.Name(), pod.Namespace, pod.Name, container.Name, err)
   339  			}
   340  
   341  			fname := podOutputPath(workDir, c, pod, fmt.Sprintf("%s.log", container.Name))
   342  			if err = os.WriteFile(fname, []byte(l), os.ModePerm); err != nil {
   343  				scopes.Framework.Warnf("Unable to write logs for cluster/pod/container: %s/%s/%s/%s: %v",
   344  					c.Name(), pod.Namespace, pod.Name, container.Name, err)
   345  			}
   346  
   347  			// Get previous container logs, if applicable
   348  			if restarts := containerRestarts(pod, container.Name); restarts > 0 {
   349  				fname := podOutputPath(workDir, c, pod, fmt.Sprintf("%s.previous.log", container.Name))
   350  				// only care about istio components restart
   351  				if proxyContainer.IsContainer(container) || discoveryContainer.IsContainer(container) || initContainer.IsContainer(container) ||
   352  					validationContainer.IsContainer(container) || strings.HasPrefix(pod.Name, "istio-cni-node") {
   353  					// This is only called if the test failed, so we cannot mark it as "failed" again. Instead, output
   354  					// a log which will get highlighted in the test logs
   355  					// TODO proper analysis of restarts to ensure we do not miss crashes when tests still pass.
   356  					scopes.Framework.Errorf("FAIL: cluster/pod/container %s/%s/%s/%s crashed/restarted %d times. Logs: %v",
   357  						c.Name(), pod.Namespace, pod.Name, container.Name, restarts, prow.ArtifactsURL(fname))
   358  				}
   359  				l, err := c.PodLogs(context.TODO(), pod.Name, pod.Namespace, container.Name, true /* previousLog */)
   360  				if err != nil {
   361  					scopes.Framework.Warnf("Unable to get previous logs for cluster/pod/container: %s/%s/%s/%s: %v",
   362  						c.Name(), pod.Namespace, pod.Name, container.Name, err)
   363  				}
   364  
   365  				if err = os.WriteFile(fname, []byte(l), os.ModePerm); err != nil {
   366  					scopes.Framework.Warnf("Unable to write previous logs for cluster/pod/container: %s/%s/%s/%s: %v",
   367  						c.Name(), pod.Namespace, pod.Name, container.Name, err)
   368  				}
   369  			}
   370  
   371  			if crashed, terminateState := containerCrashed(pod, container.Name); crashed {
   372  				scopes.Framework.Errorf("FAIL: cluster/pod/container: %s/%s/%s/%s crashed with status: %+v. Logs: %v",
   373  					c.Name(), pod.Namespace, pod.Name, container.Name, terminateState, prow.ArtifactsURL(fname))
   374  			}
   375  
   376  			// Get envoy logs if the pod is a VM, since kubectl logs only shows the logs from iptables for VMs
   377  			if isVM && proxyContainer.IsContainer(container) {
   378  				if stdout, stderr, err := c.PodExec(pod.Name, pod.Namespace, container.Name, "cat /var/log/istio/istio.err.log"); err == nil {
   379  					fname := podOutputPath(workDir, c, pod, fmt.Sprintf("%s.envoy.err.log", container.Name))
   380  					stdAll := stdout + stderr
   381  					if err = os.WriteFile(fname, []byte(stdAll), os.ModePerm); err != nil {
   382  						scopes.Framework.Warnf("Unable to write envoy err log for VM cluster/pod/container: %s/%s/%s/%s: %v",
   383  							c.Name(), pod.Namespace, pod.Name, container.Name, err)
   384  					}
   385  					if strings.Contains(stdout, "envoy backtrace") {
   386  						scopes.Framework.Errorf("FAIL: VM envoy crashed in cluster/pod/container: %s/%s/%s/%s. See log: %s",
   387  							c.Name(), pod.Namespace, pod.Name, container.Name, prow.ArtifactsURL(fname))
   388  
   389  						if strings.Contains(stdAll, "Too many open files") {
   390  							// Run netstat on the container with the crashed proxy to debug socket creation issues.
   391  							if stdout, stderr, err := c.PodExec(pod.Name, pod.Namespace, container.Name, "netstat -at"); err != nil {
   392  								scopes.Framework.Errorf("Unable to run `netstat -at` for crashed VM cluster/pod/container: %s/%s/%s/%s: %v",
   393  									c.Name(), pod.Namespace, pod.Name, container.Name, err)
   394  							} else {
   395  								fname := podOutputPath(workDir, c, pod, fmt.Sprintf("%s.netstat.txt", container.Name))
   396  								if err = os.WriteFile(fname, []byte(stdout+stderr), os.ModePerm); err != nil {
   397  									scopes.Framework.Warnf("Unable to write netstat log for crashed VM cluster/pod/container: %s/%s/%s/%s: %v",
   398  										c.Name(), pod.Namespace, pod.Name, container.Name, err)
   399  								} else {
   400  									scopes.Framework.Errorf("Results of `netstat -at` for crashed VM cluster/pod/container: %s/%s/%s/%s: %s",
   401  										c.Name(), pod.Namespace, pod.Name, container.Name, fname)
   402  								}
   403  							}
   404  						}
   405  					}
   406  				} else {
   407  					scopes.Framework.Warnf("Unable to get envoy err log for VM cluster/pod/container: %s/%s/%s/%s: %v",
   408  						c.Name(), pod.Namespace, pod.Name, container.Name, err)
   409  				}
   410  
   411  				if stdout, stderr, err := c.PodExec(pod.Name, pod.Namespace, container.Name, "cat /var/log/istio/istio.log"); err == nil {
   412  					fname := podOutputPath(workDir, c, pod, fmt.Sprintf("%s.envoy.log", container.Name))
   413  					if err = os.WriteFile(fname, []byte(stdout+stderr), os.ModePerm); err != nil {
   414  						scopes.Framework.Warnf("Unable to write envoy log for VM cluster/pod/container: %s/%s/%s/%s: %v",
   415  							c.Name(), pod.Namespace, pod.Name, container.Name, err)
   416  					}
   417  				} else {
   418  					scopes.Framework.Warnf("Unable to get envoy log for VM cluster/pod: %s/%s/%s: %v",
   419  						c.Name(), pod.Namespace, pod.Name, err)
   420  				}
   421  			}
   422  		}
   423  	}
   424  }
   425  
   426  // DumpPodEnvoy will dump Envoy proxy config and clusters in each of the provided pods
   427  // or all pods in the namespace if none are provided.
   428  func DumpPodEnvoy(ctx resource.Context, c cluster.Cluster, workDir, namespace string, pods ...corev1.Pod) {
   429  	pods = podsOrFetch(c, pods, namespace)
   430  	g := errgroup.Group{}
   431  	for _, pod := range pods {
   432  		pod := pod
   433  		if !hasEnvoy(pod) {
   434  			continue
   435  		}
   436  
   437  		g.Go(func() error {
   438  			fw, err := newPortForward(c, pod, 15000)
   439  			if err != nil {
   440  				return err
   441  			}
   442  			defer fw.Close()
   443  			dumpProxyCommand(c, fw, pod, workDir, "proxy-config.json", "config_dump?include_eds=true")
   444  			dumpProxyCommand(c, fw, pod, workDir, "proxy-clusters.txt", "clusters")
   445  			return nil
   446  		})
   447  	}
   448  	if err := g.Wait(); err != nil {
   449  		scopes.Framework.Errorf("dump failed: %v", err)
   450  	}
   451  }
   452  
   453  func newPortForward(c cluster.Cluster, pod corev1.Pod, port int) (kube.PortForwarder, error) {
   454  	var fw kube.PortForwarder
   455  	// add a retry loop since sometimes reserving a port fails
   456  	err := retry.UntilSuccess(func() error {
   457  		var err error
   458  		fw, err = c.NewPortForwarder(pod.Name, pod.Namespace, "", 0, port)
   459  		if err != nil {
   460  			return err
   461  		}
   462  		if err = fw.Start(); err != nil {
   463  			return err
   464  		}
   465  		return nil
   466  	}, retry.MaxAttempts(5), retry.Delay(time.Millisecond*10))
   467  	return fw, err
   468  }
   469  
   470  var dumpClient = &http.Client{}
   471  
   472  func portForwardRequest(fw kube.PortForwarder, method, path string) ([]byte, error) {
   473  	req, err := http.NewRequest(method, fmt.Sprintf("http://%s/%s", fw.Address(), path), nil)
   474  	if err != nil {
   475  		return nil, err
   476  	}
   477  	resp, err := dumpClient.Do(req)
   478  	if err != nil {
   479  		return nil, err
   480  	}
   481  	defer func() {
   482  		_ = resp.Body.Close()
   483  	}()
   484  	out, err := io.ReadAll(resp.Body)
   485  	if err != nil {
   486  		return nil, err
   487  	}
   488  
   489  	return out, nil
   490  }
   491  
   492  func dumpProxyCommand(c cluster.Cluster, fw kube.PortForwarder, pod corev1.Pod, workDir, filename, path string) {
   493  	containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
   494  	for _, container := range containers {
   495  		if !proxyContainer.IsContainer(container) {
   496  			// The pilot-agent is only available in the proxy container
   497  			continue
   498  		}
   499  
   500  		if cfgDump, err := portForwardRequest(fw, "GET", path); err == nil {
   501  			fname := podOutputPath(workDir, c, pod, filename)
   502  			if err = os.WriteFile(fname, cfgDump, os.ModePerm); err != nil {
   503  				scopes.Framework.Errorf("Unable to write output for command %q on cluster/pod/container: %s/%s/%s/%s: %v",
   504  					path, c.Name(), pod.Namespace, pod.Name, container.Name, err)
   505  			}
   506  			if filename == "proxy-config.json" {
   507  				// Add extra logs if we have anything warming. FAIL syntax is import to make prow highlight
   508  				// it. Note: this doesn't make the test fail, just adds logging; if we hit this code the test
   509  				// already failed.
   510  				// We add backoff because we may see transient warming errors during cleanup of resources.
   511  				attempts := 0
   512  				backoff := time.Second * 1 // Try after 0s, 1s, 2s, 4s, 8s, or 7s total
   513  				for {
   514  					attempts++
   515  					warming := isWarming(cfgDump)
   516  					if warming == "" {
   517  						// Not warming
   518  						break
   519  					}
   520  					if attempts > 3 {
   521  						scopes.Framework.Warnf("FAIL: cluster/pod %s/%s/%s found warming resources (%v) on final attempt. Config: %v",
   522  							c.Name(), pod.Namespace, pod.Name, warming, prow.ArtifactsURL(fname))
   523  						break
   524  					}
   525  					scopes.Framework.Warnf("cluster/pod %s/%s/%s found warming resources (%v) on attempt %d",
   526  						c.Name(), pod.Namespace, pod.Name, warming, attempts)
   527  					time.Sleep(backoff)
   528  					backoff *= 2
   529  					cfgDump, err = portForwardRequest(fw, "GET", path)
   530  					if err != nil {
   531  						scopes.Framework.Errorf("FAIL: Unable to get execute command %q on cluster/pod: %s/%s/%s for: %v",
   532  							path, c.Name(), pod.Namespace, pod.Name, err)
   533  					}
   534  				}
   535  				if warming := isWarming(cfgDump); warming != "" {
   536  					scopes.Framework.Warnf("FAIL: cluster/pod %s/%s/%s found warming resources (%v)",
   537  						c.Name(), pod.Namespace, pod.Name, warming)
   538  				}
   539  			}
   540  		} else {
   541  			scopes.Framework.Errorf("Unable to get execute command %q on cluster/pod: %s/%s/%s for: %v",
   542  				path, c.Name(), pod.Namespace, pod.Name, err)
   543  		}
   544  	}
   545  }
   546  
   547  func isWarming(dump []byte) string {
   548  	if bytes.Contains(dump, []byte("dynamic_warming_clusters")) {
   549  		return "dynamic_warming_clusters"
   550  	}
   551  	if bytes.Contains(dump, []byte("dynamic_warming_secrets")) {
   552  		return "dynamic_warming_secrets"
   553  	}
   554  	if bytes.Contains(dump, []byte("warming_state")) {
   555  		return "warming_state (listeners)"
   556  	}
   557  	return ""
   558  }
   559  
   560  func hasEnvoy(pod corev1.Pod) bool {
   561  	if checkIfVM(pod) {
   562  		// assume VMs run Envoy
   563  		return true
   564  	}
   565  	f := false
   566  	for _, c := range pod.Spec.Containers {
   567  		if proxyContainer.IsContainer(c) {
   568  			f = true
   569  			break
   570  		}
   571  	}
   572  	if !f {
   573  		// no proxy container
   574  		return false
   575  	}
   576  	for k, v := range pod.ObjectMeta.Annotations {
   577  		if k == annotation.InjectTemplates.Name && strings.HasPrefix(v, "grpc-") {
   578  			// proxy container may run only agent for proxyless gRPC
   579  			return false
   580  		}
   581  	}
   582  	return true
   583  }
   584  
   585  func checkIfVM(pod corev1.Pod) bool {
   586  	for k := range pod.ObjectMeta.Labels {
   587  		if strings.Contains(k, "test-vm") {
   588  			return true
   589  		}
   590  	}
   591  	return false
   592  }
   593  
   594  func DumpDebug(ctx resource.Context, c cluster.Cluster, workDir, endpoint, namespace string) {
   595  	ik, err := istioctl.New(ctx, istioctl.Config{Cluster: c})
   596  	if err != nil {
   597  		scopes.Framework.Warnf("failed dumping %s (cluster %s): %v", endpoint, c.Name(), err)
   598  		return
   599  	}
   600  	args := []string{"x", "internal-debug", "--all", endpoint}
   601  	if ctx.Settings().Revisions.Default() != "" {
   602  		args = append(args, "--revision", ctx.Settings().Revisions.Default())
   603  	}
   604  	if namespace != "" && namespace != "istio-system" {
   605  		args = append(args, "--istioNamespace", namespace)
   606  	}
   607  	scopes.Framework.Debugf("dump %s (cluster %s): %v", endpoint, c.Name(), args)
   608  	stdout, _, err := ik.Invoke(args)
   609  	if err != nil {
   610  		scopes.Framework.Warnf("failed dumping %s (cluster %s): %v", endpoint, c.Name(), err)
   611  		return
   612  	}
   613  	outputs := map[string]string{}
   614  	if err := json.Unmarshal([]byte(stdout), &outputs); err != nil {
   615  		scopes.Framework.Warnf("failed dumping %s (cluster %s): %v", endpoint, c.Name(), err)
   616  		return
   617  	}
   618  	for istiod, out := range outputs {
   619  		outPath := outputPath(workDir, c, istiod, endpoint)
   620  		if err := os.WriteFile(outPath, []byte(out), 0o644); err != nil {
   621  			scopes.Framework.Warnf("failed dumping %s (cluster %s): %v", endpoint, c.Name(), err)
   622  			return
   623  		}
   624  	}
   625  }
   626  
   627  func DumpPodAgent(ctx resource.Context, c cluster.Cluster, workDir string, _ string, pods ...corev1.Pod) {
   628  	g := errgroup.Group{}
   629  	for _, pod := range pods {
   630  		pod := pod
   631  		g.Go(func() error {
   632  			fw, err := newPortForward(c, pod, 15020)
   633  			if err != nil {
   634  				return err
   635  			}
   636  			defer fw.Close()
   637  			dumpProxyCommand(c, fw, pod, workDir, "ndsz.json", "debug/ndsz")
   638  			dumpProxyCommand(c, fw, pod, workDir, "proxy-stats.txt", "stats/prometheus")
   639  			return nil
   640  		})
   641  	}
   642  	if err := g.Wait(); err != nil {
   643  		scopes.Framework.Errorf("failed to dump ndsz: %v", err)
   644  	}
   645  }