github.com/verrazzano/verrazzano@v1.7.0/tools/vz/pkg/bugreport/reportgen.go (about)

     1  // Copyright (c) 2022, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package bugreport
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"sync"
    12  
    13  	vzconstants "github.com/verrazzano/verrazzano/pkg/constants"
    14  	"github.com/verrazzano/verrazzano/platform-operator/apis/verrazzano/v1beta1"
    15  	"github.com/verrazzano/verrazzano/tools/vz/pkg/constants"
    16  	pkghelpers "github.com/verrazzano/verrazzano/tools/vz/pkg/helpers"
    17  	corev1 "k8s.io/api/core/v1"
    18  	"k8s.io/apimachinery/pkg/api/errors"
    19  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    20  	"k8s.io/apimachinery/pkg/runtime/schema"
    21  	"k8s.io/client-go/dynamic"
    22  	"k8s.io/client-go/kubernetes"
    23  	clipkg "sigs.k8s.io/controller-runtime/pkg/client"
    24  )
    25  
    26  // The bug-report command captures the following resources from the cluster by default
    27  // - Verrazzano resource
    28  // - Logs from verrazzano-platform-operator, verrazzano-monitoring-operator and verrazzano-application-operator pods
    29  // - Workloads (Deployment and ReplicaSet, StatefulSet, Daemonset), pods, events, ingress and services from the namespaces of
    30  //   installed verrazzano components and namespaces specified by flag --include-namespaces
    31  // - OAM resources like ApplicationConfiguration, Component, IngressTrait, MetricsTrait from namespaces specified by flag --include-namespaces
    32  // - VerrazzanoManagedCluster, VerrazzanoProject and MultiClusterApplicationConfiguration in a multi-clustered environment
    33  
    34  type ErrorsChannelLogs struct {
    35  	PodName      string `json:"podName"`
    36  	ErrorMessage string `json:"errorMessage"`
    37  }
    38  
    39  type ErrorsChannel struct {
    40  	ErrorMessage string `json:"errorMessage"`
    41  }
    42  
    43  type PodLogs struct {
    44  	IsPodLog bool
    45  	Duration int64
    46  }
    47  type Pods struct {
    48  	Namespace string
    49  	PodList   []corev1.Pod
    50  }
    51  
    52  // CaptureClusterSnapshot selectively captures the resources from the cluster, useful to analyze the issue.
    53  func CaptureClusterSnapshot(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, client clipkg.Client, vzHelper pkghelpers.VZHelper, podLogs PodLogs, clusterSnapshotCtx pkghelpers.ClusterSnapshotCtx) error {
    54  
    55  	// Create a file to capture the standard out to a file
    56  	stdOutFile, err := os.OpenFile(filepath.Join(clusterSnapshotCtx.BugReportDir, constants.BugReportOut), os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
    57  	if err != nil {
    58  		return fmt.Errorf("an error occurred while creating the file include the summary of the resources captured: %s", err.Error())
    59  	}
    60  	defer stdOutFile.Close()
    61  
    62  	// Create a file to capture the standard err to a file
    63  	stdErrFile, err := os.OpenFile(filepath.Join(clusterSnapshotCtx.BugReportDir, constants.BugReportErr), os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
    64  	if err != nil {
    65  		return fmt.Errorf("an error occurred while creating the file include the summary of the resources captured: %s", err.Error())
    66  	}
    67  	defer stdErrFile.Close()
    68  
    69  	// Create MultiWriters for standard out and err
    70  	pkghelpers.SetMultiWriterOut(vzHelper.GetOutputStream(), stdOutFile)
    71  	pkghelpers.SetMultiWriterErr(vzHelper.GetErrorStream(), stdErrFile)
    72  
    73  	// Find the Verrazzano resource to analyze.
    74  	vz, err := pkghelpers.FindVerrazzanoResource(client)
    75  	if err != nil {
    76  		pkghelpers.LogMessage(fmt.Sprintf("Verrazzano is not installed: %s", err.Error()))
    77  	}
    78  
    79  	// Get the list of namespaces based on the failed components and value specified by flag --include-namespaces
    80  	nsList, additionalNS, err := collectNamespaces(kubeClient, dynamicClient, clusterSnapshotCtx.MoreNS, vz, vzHelper)
    81  	if err != nil {
    82  		return err
    83  	}
    84  	var msgPrefix string
    85  	if pkghelpers.GetIsLiveCluster() {
    86  		msgPrefix = constants.AnalysisMsgPrefix
    87  	} else {
    88  		msgPrefix = constants.BugReportMsgPrefix
    89  	}
    90  	if clusterSnapshotCtx.PrintReportToConsole {
    91  		// Print initial message to console output only
    92  		fmt.Fprintf(vzHelper.GetOutputStream(), "\n"+msgPrefix+"resources from the cluster ...\n")
    93  	}
    94  	// Capture list of resources from verrazzano-install and verrazzano-system namespaces
    95  	err = captureResources(client, kubeClient, dynamicClient, clusterSnapshotCtx.BugReportDir, vz, vzHelper, nsList)
    96  	if err != nil {
    97  		pkghelpers.LogError(fmt.Sprintf("There is an error with capturing the Verrazzano resources: %s", err.Error()))
    98  	}
    99  
   100  	// Capture OAM resources from the namespaces specified using --include-namespaces
   101  	if len(additionalNS) > 0 {
   102  		captureAdditionalResources(client, kubeClient, dynamicClient, vzHelper, clusterSnapshotCtx.BugReportDir, additionalNS, podLogs)
   103  	}
   104  
   105  	// Capture Verrazzano Projects and VerrazzanoManagedCluster
   106  	if err = captureMultiClusterResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil {
   107  		return err
   108  	}
   109  
   110  	// Capture global CAPI resources
   111  	if err = pkghelpers.CaptureGlobalCapiResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil {
   112  		return err
   113  	}
   114  
   115  	// Capture global Rancher resources
   116  	if err = pkghelpers.CaptureGlobalRancherResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil {
   117  		return err
   118  	}
   119  	return nil
   120  }
   121  
   122  func captureResources(client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, bugReportDir string, vz *v1beta1.Verrazzano, vzHelper pkghelpers.VZHelper, namespaces []string) error {
   123  	// List of pods to collect the logs
   124  	vpoPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoPlatformOperator, vzconstants.VerrazzanoInstallNamespace)
   125  	vaoPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoApplicationOperator, vzconstants.VerrazzanoSystemNamespace)
   126  	vcoPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoClusterOperator, vzconstants.VerrazzanoSystemNamespace)
   127  	vmoPod, _ := pkghelpers.GetPodList(client, constants.K8SAppLabel, constants.VerrazzanoMonitoringOperator, vzconstants.VerrazzanoSystemNamespace)
   128  	vpoWebHookPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoPlatformOperatorWebhook, vzconstants.VerrazzanoInstallNamespace)
   129  	externalDNSPod, _ := pkghelpers.GetPodList(client, constants.K8sAppLabelExternalDNS, vzconstants.ExternalDNS, vzconstants.CertManager)
   130  	wgCount := 5 + len(namespaces)
   131  	wgCount++ // increment for the verrrazzano resource
   132  	if len(externalDNSPod) > 0 {
   133  		wgCount++
   134  	}
   135  	wg := &sync.WaitGroup{}
   136  	wg.Add(wgCount)
   137  
   138  	// Define channels to get the errors
   139  	evr := make(chan ErrorsChannel, 1)
   140  	ecr := make(chan ErrorsChannel, 1)
   141  	ecl := make(chan ErrorsChannelLogs, 1)
   142  
   143  	go captureVZResource(wg, evr, vz, bugReportDir)
   144  
   145  	go captureLogs(wg, ecl, kubeClient, Pods{PodList: vpoPod, Namespace: vzconstants.VerrazzanoInstallNamespace}, bugReportDir, vzHelper, 0)
   146  	go captureLogs(wg, ecl, kubeClient, Pods{PodList: vpoWebHookPod, Namespace: vzconstants.VerrazzanoInstallNamespace}, bugReportDir, vzHelper, 0)
   147  	go captureLogs(wg, ecl, kubeClient, Pods{PodList: vmoPod, Namespace: vzconstants.VerrazzanoSystemNamespace}, bugReportDir, vzHelper, 0)
   148  	go captureLogs(wg, ecl, kubeClient, Pods{PodList: vaoPod, Namespace: vzconstants.VerrazzanoSystemNamespace}, bugReportDir, vzHelper, 0)
   149  	go captureLogs(wg, ecl, kubeClient, Pods{PodList: vcoPod, Namespace: vzconstants.VerrazzanoSystemNamespace}, bugReportDir, vzHelper, 0)
   150  
   151  	if len(externalDNSPod) > 0 {
   152  		go captureLogs(wg, ecl, kubeClient, Pods{PodList: externalDNSPod, Namespace: vzconstants.CertManager}, bugReportDir, vzHelper, 0)
   153  	}
   154  	for _, ns := range namespaces {
   155  		go captureK8SResources(wg, ecr, client, kubeClient, dynamicClient, ns, bugReportDir, vzHelper)
   156  	}
   157  
   158  	wg.Wait()
   159  	close(ecl)
   160  	close(ecr)
   161  	close(evr)
   162  	// Report errors (if any), in capturing the verrazzano resource
   163  	for err := range evr {
   164  		return fmt.Errorf("an error occurred while capturing the Verrazzano resource, error: %s", err.ErrorMessage)
   165  	}
   166  
   167  	// Report errors (if any), in collecting the logs from various pods
   168  	for err := range ecl {
   169  		return fmt.Errorf("an error occurred while capturing the log for pod: %s, error: %s", err.PodName, err.ErrorMessage)
   170  	}
   171  
   172  	// Report errors (if any), in collecting resources from various namespaces
   173  	for err := range ecr {
   174  		return fmt.Errorf("an error occurred while capturing the resource, error: %s", err.ErrorMessage)
   175  	}
   176  	return nil
   177  }
   178  
   179  // captureAdditionalLogs will be used for capture logs from additional namespace.
   180  func captureAdditionalLogs(client clipkg.Client, kubeClient kubernetes.Interface, bugReportDir string, vzHelper pkghelpers.VZHelper, namespaces []string, duration int64) error {
   181  	wgCount := len(namespaces)
   182  	wg := &sync.WaitGroup{}
   183  	wg.Add(wgCount)
   184  	// Define channels to get the errors
   185  	evr := make(chan ErrorsChannel, 1)
   186  	ecr := make(chan ErrorsChannel, 1)
   187  	ecl := make(chan ErrorsChannelLogs, 1)
   188  	for _, ns := range namespaces {
   189  		podList, _ := pkghelpers.GetPodListAll(client, ns)
   190  		go captureLogsAllPods(wg, ecl, kubeClient, Pods{PodList: podList, Namespace: ns}, bugReportDir, vzHelper, duration)
   191  	}
   192  
   193  	wg.Wait()
   194  	close(ecl)
   195  	close(ecr)
   196  	close(evr)
   197  	// Report errors (if any), in collecting the logs from various pods
   198  	for err := range evr {
   199  		return fmt.Errorf("an error occurred while capturing the Verrazzano resource, error: %s", err.ErrorMessage)
   200  	}
   201  
   202  	// Report errors (if any), in collecting the logs from various pods
   203  	for err := range ecl {
   204  		return fmt.Errorf("an error occurred while capturing the log for pod: %s, error: %s", err.PodName, err.ErrorMessage)
   205  	}
   206  
   207  	// Report errors (if any), in collecting resources from various namespaces
   208  	for err := range ecr {
   209  		return fmt.Errorf("an error occurred while capturing the resource, error: %s", err.ErrorMessage)
   210  	}
   211  	return nil
   212  }
   213  
   214  // captureVZResource collects the Verrazzano resource as a JSON, in parallel
   215  func captureVZResource(wg *sync.WaitGroup, ec chan ErrorsChannel, vz *v1beta1.Verrazzano, bugReportDir string) {
   216  	defer wg.Done()
   217  	err := pkghelpers.CaptureVZResource(bugReportDir, vz)
   218  	if err != nil {
   219  		ec <- ErrorsChannel{ErrorMessage: err.Error()}
   220  	}
   221  }
   222  
   223  // captureLogs collects the logs from platform operator, application operator and monitoring operator in parallel
   224  func captureLogs(wg *sync.WaitGroup, ec chan ErrorsChannelLogs, kubeClient kubernetes.Interface, pod Pods, bugReportDir string, vzHelper pkghelpers.VZHelper, duration int64) {
   225  	defer wg.Done()
   226  	if len(pod.PodList) == 0 {
   227  		return
   228  	}
   229  	// This won't work when there are more than one pods for the same app label
   230  	pkghelpers.LogMessage(fmt.Sprintf("log from pod %s in %s namespace ...\n", pod.PodList[0].Name, pod.Namespace))
   231  	err := pkghelpers.CapturePodLog(kubeClient, pod.PodList[0], pod.Namespace, bugReportDir, vzHelper, duration)
   232  	if err != nil {
   233  		ec <- ErrorsChannelLogs{PodName: pod.PodList[0].Name, ErrorMessage: err.Error()}
   234  	}
   235  
   236  }
   237  
   238  // captureK8SResources captures Kubernetes workloads, pods, events, ingresses and services from the list of namespaces in parallel
   239  func captureK8SResources(wg *sync.WaitGroup, ec chan ErrorsChannel, client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, namespace, bugReportDir string, vzHelper pkghelpers.VZHelper) {
   240  	defer wg.Done()
   241  	if err := pkghelpers.CaptureK8SResources(client, kubeClient, dynamicClient, namespace, bugReportDir, vzHelper); err != nil {
   242  		ec <- ErrorsChannel{ErrorMessage: err.Error()}
   243  	}
   244  }
   245  
   246  // collectNamespaces gathers list of unique namespaces, to be considered to collect the information
   247  func collectNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, includedNS []string, vz *v1beta1.Verrazzano, vzHelper pkghelpers.VZHelper) ([]string, []string, error) {
   248  
   249  	var nsList []string
   250  
   251  	// Include namespaces for all the vz components
   252  	allCompNS := pkghelpers.GetNamespacesForAllComponents(vz)
   253  	nsList = append(nsList, allCompNS...)
   254  
   255  	// Verify and Include verrazzano-install namespace
   256  	if pkghelpers.VerifyVzInstallNamespaceExists(kubeClient) {
   257  		nsList = append(nsList, vzconstants.VerrazzanoInstallNamespace)
   258  	}
   259  
   260  	// Add any namespaces that have CAPI clusters
   261  	capiNSList, err := getCAPIClusterNamespaces(kubeClient, dynamicClient)
   262  	if err != nil {
   263  		return nil, nil, err
   264  	}
   265  	nsList = append(nsList, capiNSList...)
   266  
   267  	// Add Rancher namespaces
   268  	rancherNSList, err := getRancherNamespaces(kubeClient, dynamicClient)
   269  	if err != nil {
   270  		return nil, nil, err
   271  	}
   272  	nsList = append(nsList, rancherNSList...)
   273  
   274  	// Include the namespaces specified by flag --include-namespaces
   275  	var additionalNS []string
   276  	if len(includedNS) > 0 {
   277  		includedList := pkghelpers.RemoveDuplicate(includedNS)
   278  		for _, ns := range includedList {
   279  			nsExists, _ := pkghelpers.DoesNamespaceExist(kubeClient, ns, vzHelper)
   280  			if nsExists {
   281  				additionalNS = append(additionalNS, ns)
   282  			}
   283  		}
   284  		nsList = append(nsList, additionalNS...)
   285  	}
   286  
   287  	// Remove the duplicates from nsList
   288  	nsList = pkghelpers.RemoveDuplicate(nsList)
   289  	return nsList, additionalNS, nil
   290  }
   291  
   292  // This function returns a list of namespaces that have a CAPI cluster resource.
   293  // We want to always capture these resources.
   294  func getCAPIClusterNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface) ([]string, error) {
   295  	namespaces, err := kubeClient.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  
   300  	nsList := []string{}
   301  	gvr := schema.GroupVersionResource{Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "clusters"}
   302  	for _, namespace := range namespaces.Items {
   303  		list, err := dynamicClient.Resource(gvr).Namespace(namespace.Name).List(context.TODO(), metav1.ListOptions{})
   304  		// Resource type does not exist, return here since there will be no "cluster" resources.
   305  		// This will be the case if the cluster-api component is not installed.
   306  		if errors.IsNotFound(err) {
   307  			return nil, nil
   308  		}
   309  		if err != nil {
   310  			return nil, err
   311  		}
   312  		if len(list.Items) > 0 {
   313  			nsList = append(nsList, namespace.Name)
   314  		}
   315  	}
   316  	return nsList, nil
   317  }
   318  
   319  // This function returns a list of namespaces that have a Rancher annotation.
   320  // We want to always capture these resources.
   321  func getRancherNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface) ([]string, error) {
   322  	namespaces, err := kubeClient.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{})
   323  	if err != nil {
   324  		return nil, err
   325  	}
   326  
   327  	nsList := []string{}
   328  	for _, namespace := range namespaces.Items {
   329  		if namespace.Annotations["lifecycle.cattle.io/create.namespace-auth"] == "true" {
   330  			nsList = append(nsList, namespace.Name)
   331  		}
   332  	}
   333  	return nsList, nil
   334  }
   335  
   336  // captureLogsAllPods captures logs from all pods without filtering in given namespace.
   337  func captureLogsAllPods(wg *sync.WaitGroup, ec chan ErrorsChannelLogs, kubeClient kubernetes.Interface, pods Pods, bugReportDir string, vzHelper pkghelpers.VZHelper, duration int64) {
   338  
   339  	defer wg.Done()
   340  	if len(pods.PodList) == 0 {
   341  		return
   342  	}
   343  	for index := range pods.PodList {
   344  		pkghelpers.LogMessage(fmt.Sprintf("log from pod %s in %s namespace ...\n", pods.PodList[index].Name, pods.Namespace))
   345  		err := pkghelpers.CapturePodLog(kubeClient, pods.PodList[index], pods.Namespace, bugReportDir, vzHelper, duration)
   346  		if err != nil {
   347  			ec <- ErrorsChannelLogs{PodName: pods.PodList[index].Name, ErrorMessage: err.Error()}
   348  		}
   349  	}
   350  }
   351  
   352  // captureAdditionalResources will capture additional resources from additional namespaces
   353  func captureAdditionalResources(client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, vzHelper pkghelpers.VZHelper, bugReportDir string, additionalNS []string, podLogs PodLogs) {
   354  	if err := pkghelpers.CaptureOAMResources(dynamicClient, additionalNS, bugReportDir, vzHelper); err != nil {
   355  		pkghelpers.LogError(fmt.Sprintf("There is an error in capturing the resources : %s", err.Error()))
   356  	}
   357  	if podLogs.IsPodLog {
   358  		if err := captureAdditionalLogs(client, kubeClient, bugReportDir, vzHelper, additionalNS, podLogs.Duration); err != nil {
   359  			pkghelpers.LogError(fmt.Sprintf("There is an error with capturing the logs: %s", err.Error()))
   360  		}
   361  	}
   362  	if err := pkghelpers.CaptureMultiClusterOAMResources(dynamicClient, additionalNS, bugReportDir, vzHelper); err != nil {
   363  		pkghelpers.LogError(fmt.Sprintf("There is an error in capturing the multi-cluster resources : %s", err.Error()))
   364  	}
   365  }
   366  
   367  // captureMultiClusterResources captures Projects and VerrazzanoManagedCluster resource
   368  func captureMultiClusterResources(dynamicClient dynamic.Interface, captureDir string, vzHelper pkghelpers.VZHelper) error {
   369  	// Return nil when dynamicClient is nil, useful to get clean unit tests
   370  	if dynamicClient == nil {
   371  		return nil
   372  	}
   373  
   374  	// Capture Verrazzano projects in verrazzano-mc namespace
   375  	if err := pkghelpers.CaptureVerrazzanoProjects(dynamicClient, captureDir, vzHelper); err != nil {
   376  		return err
   377  	}
   378  
   379  	// Capture Verrazzano projects in verrazzano-mc namespace
   380  	if err := pkghelpers.CaptureVerrazzanoManagedCluster(dynamicClient, captureDir, vzHelper); err != nil {
   381  		return err
   382  	}
   383  	return nil
   384  }