k8s.io/kubernetes@v1.29.3/test/e2e/framework/debug/dump.go (about)

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package debug
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sort"
    23  	"time"
    24  
    25  	"github.com/onsi/ginkgo/v2"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/fields"
    30  	clientset "k8s.io/client-go/kubernetes"
    31  	restclient "k8s.io/client-go/rest"
    32  	"k8s.io/kubernetes/test/e2e/framework"
    33  	e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
    34  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    35  )
    36  
    37  // EventsLister is a func that lists events.
    38  type EventsLister func(opts metav1.ListOptions, ns string) (*v1.EventList, error)
    39  
    40  // dumpEventsInNamespace dumps events in the given namespace.
    41  func dumpEventsInNamespace(eventsLister EventsLister, namespace string) {
    42  	ginkgo.By(fmt.Sprintf("Collecting events from namespace %q.", namespace))
    43  	events, err := eventsLister(metav1.ListOptions{}, namespace)
    44  	framework.ExpectNoError(err, "failed to list events in namespace %q", namespace)
    45  
    46  	ginkgo.By(fmt.Sprintf("Found %d events.", len(events.Items)))
    47  	// Sort events by their first timestamp
    48  	sortedEvents := events.Items
    49  	if len(sortedEvents) > 1 {
    50  		sort.Sort(byFirstTimestamp(sortedEvents))
    51  	}
    52  	for _, e := range sortedEvents {
    53  		framework.Logf("At %v - event for %v: %v %v: %v", e.FirstTimestamp, e.InvolvedObject.Name, e.Source, e.Reason, e.Message)
    54  	}
    55  	// Note that we don't wait for any Cleanup to propagate, which means
    56  	// that if you delete a bunch of pods right before ending your test,
    57  	// you may or may not see the killing/deletion/Cleanup events.
    58  }
    59  
    60  // DumpAllNamespaceInfo dumps events, pods and nodes information in the given namespace.
    61  func DumpAllNamespaceInfo(ctx context.Context, c clientset.Interface, namespace string) {
    62  	dumpEventsInNamespace(func(opts metav1.ListOptions, ns string) (*v1.EventList, error) {
    63  		return c.CoreV1().Events(ns).List(ctx, opts)
    64  	}, namespace)
    65  
    66  	e2epod.DumpAllPodInfoForNamespace(ctx, c, namespace, framework.TestContext.ReportDir)
    67  
    68  	// If cluster is large, then the following logs are basically useless, because:
    69  	// 1. it takes tens of minutes or hours to grab all of them
    70  	// 2. there are so many of them that working with them are mostly impossible
    71  	// So we dump them only if the cluster is relatively small.
    72  	maxNodesForDump := framework.TestContext.MaxNodesToGather
    73  	nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
    74  	if err != nil {
    75  		framework.Logf("unable to fetch node list: %v", err)
    76  		return
    77  	}
    78  	if len(nodes.Items) <= maxNodesForDump {
    79  		dumpAllNodeInfo(ctx, c, nodes)
    80  	} else {
    81  		framework.Logf("skipping dumping cluster info - cluster too large")
    82  	}
    83  }
    84  
    85  // byFirstTimestamp sorts a slice of events by first timestamp, using their involvedObject's name as a tie breaker.
    86  type byFirstTimestamp []v1.Event
    87  
    88  func (o byFirstTimestamp) Len() int      { return len(o) }
    89  func (o byFirstTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
    90  
    91  func (o byFirstTimestamp) Less(i, j int) bool {
    92  	if o[i].FirstTimestamp.Equal(&o[j].FirstTimestamp) {
    93  		return o[i].InvolvedObject.Name < o[j].InvolvedObject.Name
    94  	}
    95  	return o[i].FirstTimestamp.Before(&o[j].FirstTimestamp)
    96  }
    97  
    98  func dumpAllNodeInfo(ctx context.Context, c clientset.Interface, nodes *v1.NodeList) {
    99  	names := make([]string, len(nodes.Items))
   100  	for ix := range nodes.Items {
   101  		names[ix] = nodes.Items[ix].Name
   102  	}
   103  	DumpNodeDebugInfo(ctx, c, names, framework.Logf)
   104  }
   105  
   106  // DumpNodeDebugInfo dumps debug information of the given nodes.
   107  func DumpNodeDebugInfo(ctx context.Context, c clientset.Interface, nodeNames []string, logFunc func(fmt string, args ...interface{})) {
   108  	for _, n := range nodeNames {
   109  		logFunc("\nLogging node info for node %v", n)
   110  		node, err := c.CoreV1().Nodes().Get(ctx, n, metav1.GetOptions{})
   111  		if err != nil {
   112  			logFunc("Error getting node info %v", err)
   113  		}
   114  		logFunc("Node Info: %v", node)
   115  
   116  		logFunc("\nLogging kubelet events for node %v", n)
   117  		for _, e := range getNodeEvents(ctx, c, n) {
   118  			logFunc("source %v type %v message %v reason %v first ts %v last ts %v, involved obj %+v",
   119  				e.Source, e.Type, e.Message, e.Reason, e.FirstTimestamp, e.LastTimestamp, e.InvolvedObject)
   120  		}
   121  		logFunc("\nLogging pods the kubelet thinks is on node %v", n)
   122  		podList, err := getKubeletPods(ctx, c, n)
   123  		if err != nil {
   124  			logFunc("Unable to retrieve kubelet pods for node %v: %v", n, err)
   125  			continue
   126  		}
   127  		for _, p := range podList.Items {
   128  			logFunc("%v started at %v (%d+%d container statuses recorded)", p.Name, p.Status.StartTime, len(p.Status.InitContainerStatuses), len(p.Status.ContainerStatuses))
   129  			for _, c := range p.Status.InitContainerStatuses {
   130  				logFunc("\tInit container %v ready: %v, restart count %v",
   131  					c.Name, c.Ready, c.RestartCount)
   132  			}
   133  			for _, c := range p.Status.ContainerStatuses {
   134  				logFunc("\tContainer %v ready: %v, restart count %v",
   135  					c.Name, c.Ready, c.RestartCount)
   136  			}
   137  		}
   138  		_, err = e2emetrics.HighLatencyKubeletOperations(ctx, c, 10*time.Second, n, logFunc)
   139  		framework.ExpectNoError(err)
   140  		// TODO: Log node resource info
   141  	}
   142  }
   143  
   144  // getKubeletPods retrieves the list of pods on the kubelet.
   145  func getKubeletPods(ctx context.Context, c clientset.Interface, node string) (*v1.PodList, error) {
   146  	var client restclient.Result
   147  	finished := make(chan struct{}, 1)
   148  	go func() {
   149  		// call chain tends to hang in some cases when Node is not ready. Add an artificial timeout for this call. #22165
   150  		client = c.CoreV1().RESTClient().Get().
   151  			Resource("nodes").
   152  			SubResource("proxy").
   153  			Name(fmt.Sprintf("%v:%v", node, framework.KubeletPort)).
   154  			Suffix("pods").
   155  			Do(ctx)
   156  
   157  		finished <- struct{}{}
   158  	}()
   159  	select {
   160  	case <-finished:
   161  		result := &v1.PodList{}
   162  		if err := client.Into(result); err != nil {
   163  			return &v1.PodList{}, err
   164  		}
   165  		return result, nil
   166  	case <-time.After(framework.PodGetTimeout):
   167  		return &v1.PodList{}, fmt.Errorf("Waiting up to %v for getting the list of pods", framework.PodGetTimeout)
   168  	}
   169  }
   170  
   171  // logNodeEvents logs kubelet events from the given node. This includes kubelet
   172  // restart and node unhealthy events. Note that listing events like this will mess
   173  // with latency metrics, beware of calling it during a test.
   174  func getNodeEvents(ctx context.Context, c clientset.Interface, nodeName string) []v1.Event {
   175  	selector := fields.Set{
   176  		"involvedObject.kind":      "Node",
   177  		"involvedObject.name":      nodeName,
   178  		"involvedObject.namespace": metav1.NamespaceAll,
   179  		"source":                   "kubelet",
   180  	}.AsSelector().String()
   181  	options := metav1.ListOptions{FieldSelector: selector}
   182  	events, err := c.CoreV1().Events(metav1.NamespaceSystem).List(ctx, options)
   183  	if err != nil {
   184  		framework.Logf("Unexpected error retrieving node events %v", err)
   185  		return []v1.Event{}
   186  	}
   187  	return events.Items
   188  }