github.com/verrazzano/verrazzano@v1.7.0/tools/vz/pkg/bugreport/reportgen.go (about) 1 // Copyright (c) 2022, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package bugreport 5 6 import ( 7 "context" 8 "fmt" 9 "os" 10 "path/filepath" 11 "sync" 12 13 vzconstants "github.com/verrazzano/verrazzano/pkg/constants" 14 "github.com/verrazzano/verrazzano/platform-operator/apis/verrazzano/v1beta1" 15 "github.com/verrazzano/verrazzano/tools/vz/pkg/constants" 16 pkghelpers "github.com/verrazzano/verrazzano/tools/vz/pkg/helpers" 17 corev1 "k8s.io/api/core/v1" 18 "k8s.io/apimachinery/pkg/api/errors" 19 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 20 "k8s.io/apimachinery/pkg/runtime/schema" 21 "k8s.io/client-go/dynamic" 22 "k8s.io/client-go/kubernetes" 23 clipkg "sigs.k8s.io/controller-runtime/pkg/client" 24 ) 25 26 // The bug-report command captures the following resources from the cluster by default 27 // - Verrazzano resource 28 // - Logs from verrazzano-platform-operator, verrazzano-monitoring-operator and verrazzano-application-operator pods 29 // - Workloads (Deployment and ReplicaSet, StatefulSet, Daemonset), pods, events, ingress and services from the namespaces of 30 // installed verrazzano components and namespaces specified by flag --include-namespaces 31 // - OAM resources like ApplicationConfiguration, Component, IngressTrait, MetricsTrait from namespaces specified by flag --include-namespaces 32 // - VerrazzanoManagedCluster, VerrazzanoProject and MultiClusterApplicationConfiguration in a multi-clustered environment 33 34 type ErrorsChannelLogs struct { 35 PodName string `json:"podName"` 36 ErrorMessage string `json:"errorMessage"` 37 } 38 39 type ErrorsChannel struct { 40 ErrorMessage string `json:"errorMessage"` 41 } 42 43 type PodLogs struct { 44 IsPodLog bool 45 Duration int64 46 } 47 type Pods struct { 48 Namespace string 49 PodList []corev1.Pod 50 } 51 52 // CaptureClusterSnapshot selectively captures the resources from the cluster, useful to analyze the issue. 53 func CaptureClusterSnapshot(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, client clipkg.Client, vzHelper pkghelpers.VZHelper, podLogs PodLogs, clusterSnapshotCtx pkghelpers.ClusterSnapshotCtx) error { 54 55 // Create a file to capture the standard out to a file 56 stdOutFile, err := os.OpenFile(filepath.Join(clusterSnapshotCtx.BugReportDir, constants.BugReportOut), os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) 57 if err != nil { 58 return fmt.Errorf("an error occurred while creating the file include the summary of the resources captured: %s", err.Error()) 59 } 60 defer stdOutFile.Close() 61 62 // Create a file to capture the standard err to a file 63 stdErrFile, err := os.OpenFile(filepath.Join(clusterSnapshotCtx.BugReportDir, constants.BugReportErr), os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) 64 if err != nil { 65 return fmt.Errorf("an error occurred while creating the file include the summary of the resources captured: %s", err.Error()) 66 } 67 defer stdErrFile.Close() 68 69 // Create MultiWriters for standard out and err 70 pkghelpers.SetMultiWriterOut(vzHelper.GetOutputStream(), stdOutFile) 71 pkghelpers.SetMultiWriterErr(vzHelper.GetErrorStream(), stdErrFile) 72 73 // Find the Verrazzano resource to analyze. 74 vz, err := pkghelpers.FindVerrazzanoResource(client) 75 if err != nil { 76 pkghelpers.LogMessage(fmt.Sprintf("Verrazzano is not installed: %s", err.Error())) 77 } 78 79 // Get the list of namespaces based on the failed components and value specified by flag --include-namespaces 80 nsList, additionalNS, err := collectNamespaces(kubeClient, dynamicClient, clusterSnapshotCtx.MoreNS, vz, vzHelper) 81 if err != nil { 82 return err 83 } 84 var msgPrefix string 85 if pkghelpers.GetIsLiveCluster() { 86 msgPrefix = constants.AnalysisMsgPrefix 87 } else { 88 msgPrefix = constants.BugReportMsgPrefix 89 } 90 if clusterSnapshotCtx.PrintReportToConsole { 91 // Print initial message to console output only 92 fmt.Fprintf(vzHelper.GetOutputStream(), "\n"+msgPrefix+"resources from the cluster ...\n") 93 } 94 // Capture list of resources from verrazzano-install and verrazzano-system namespaces 95 err = captureResources(client, kubeClient, dynamicClient, clusterSnapshotCtx.BugReportDir, vz, vzHelper, nsList) 96 if err != nil { 97 pkghelpers.LogError(fmt.Sprintf("There is an error with capturing the Verrazzano resources: %s", err.Error())) 98 } 99 100 // Capture OAM resources from the namespaces specified using --include-namespaces 101 if len(additionalNS) > 0 { 102 captureAdditionalResources(client, kubeClient, dynamicClient, vzHelper, clusterSnapshotCtx.BugReportDir, additionalNS, podLogs) 103 } 104 105 // Capture Verrazzano Projects and VerrazzanoManagedCluster 106 if err = captureMultiClusterResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil { 107 return err 108 } 109 110 // Capture global CAPI resources 111 if err = pkghelpers.CaptureGlobalCapiResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil { 112 return err 113 } 114 115 // Capture global Rancher resources 116 if err = pkghelpers.CaptureGlobalRancherResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil { 117 return err 118 } 119 return nil 120 } 121 122 func captureResources(client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, bugReportDir string, vz *v1beta1.Verrazzano, vzHelper pkghelpers.VZHelper, namespaces []string) error { 123 // List of pods to collect the logs 124 vpoPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoPlatformOperator, vzconstants.VerrazzanoInstallNamespace) 125 vaoPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoApplicationOperator, vzconstants.VerrazzanoSystemNamespace) 126 vcoPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoClusterOperator, vzconstants.VerrazzanoSystemNamespace) 127 vmoPod, _ := pkghelpers.GetPodList(client, constants.K8SAppLabel, constants.VerrazzanoMonitoringOperator, vzconstants.VerrazzanoSystemNamespace) 128 vpoWebHookPod, _ := pkghelpers.GetPodList(client, constants.AppLabel, constants.VerrazzanoPlatformOperatorWebhook, vzconstants.VerrazzanoInstallNamespace) 129 externalDNSPod, _ := pkghelpers.GetPodList(client, constants.K8sAppLabelExternalDNS, vzconstants.ExternalDNS, vzconstants.CertManager) 130 wgCount := 5 + len(namespaces) 131 wgCount++ // increment for the verrrazzano resource 132 if len(externalDNSPod) > 0 { 133 wgCount++ 134 } 135 wg := &sync.WaitGroup{} 136 wg.Add(wgCount) 137 138 // Define channels to get the errors 139 evr := make(chan ErrorsChannel, 1) 140 ecr := make(chan ErrorsChannel, 1) 141 ecl := make(chan ErrorsChannelLogs, 1) 142 143 go captureVZResource(wg, evr, vz, bugReportDir) 144 145 go captureLogs(wg, ecl, kubeClient, Pods{PodList: vpoPod, Namespace: vzconstants.VerrazzanoInstallNamespace}, bugReportDir, vzHelper, 0) 146 go captureLogs(wg, ecl, kubeClient, Pods{PodList: vpoWebHookPod, Namespace: vzconstants.VerrazzanoInstallNamespace}, bugReportDir, vzHelper, 0) 147 go captureLogs(wg, ecl, kubeClient, Pods{PodList: vmoPod, Namespace: vzconstants.VerrazzanoSystemNamespace}, bugReportDir, vzHelper, 0) 148 go captureLogs(wg, ecl, kubeClient, Pods{PodList: vaoPod, Namespace: vzconstants.VerrazzanoSystemNamespace}, bugReportDir, vzHelper, 0) 149 go captureLogs(wg, ecl, kubeClient, Pods{PodList: vcoPod, Namespace: vzconstants.VerrazzanoSystemNamespace}, bugReportDir, vzHelper, 0) 150 151 if len(externalDNSPod) > 0 { 152 go captureLogs(wg, ecl, kubeClient, Pods{PodList: externalDNSPod, Namespace: vzconstants.CertManager}, bugReportDir, vzHelper, 0) 153 } 154 for _, ns := range namespaces { 155 go captureK8SResources(wg, ecr, client, kubeClient, dynamicClient, ns, bugReportDir, vzHelper) 156 } 157 158 wg.Wait() 159 close(ecl) 160 close(ecr) 161 close(evr) 162 // Report errors (if any), in capturing the verrazzano resource 163 for err := range evr { 164 return fmt.Errorf("an error occurred while capturing the Verrazzano resource, error: %s", err.ErrorMessage) 165 } 166 167 // Report errors (if any), in collecting the logs from various pods 168 for err := range ecl { 169 return fmt.Errorf("an error occurred while capturing the log for pod: %s, error: %s", err.PodName, err.ErrorMessage) 170 } 171 172 // Report errors (if any), in collecting resources from various namespaces 173 for err := range ecr { 174 return fmt.Errorf("an error occurred while capturing the resource, error: %s", err.ErrorMessage) 175 } 176 return nil 177 } 178 179 // captureAdditionalLogs will be used for capture logs from additional namespace. 180 func captureAdditionalLogs(client clipkg.Client, kubeClient kubernetes.Interface, bugReportDir string, vzHelper pkghelpers.VZHelper, namespaces []string, duration int64) error { 181 wgCount := len(namespaces) 182 wg := &sync.WaitGroup{} 183 wg.Add(wgCount) 184 // Define channels to get the errors 185 evr := make(chan ErrorsChannel, 1) 186 ecr := make(chan ErrorsChannel, 1) 187 ecl := make(chan ErrorsChannelLogs, 1) 188 for _, ns := range namespaces { 189 podList, _ := pkghelpers.GetPodListAll(client, ns) 190 go captureLogsAllPods(wg, ecl, kubeClient, Pods{PodList: podList, Namespace: ns}, bugReportDir, vzHelper, duration) 191 } 192 193 wg.Wait() 194 close(ecl) 195 close(ecr) 196 close(evr) 197 // Report errors (if any), in collecting the logs from various pods 198 for err := range evr { 199 return fmt.Errorf("an error occurred while capturing the Verrazzano resource, error: %s", err.ErrorMessage) 200 } 201 202 // Report errors (if any), in collecting the logs from various pods 203 for err := range ecl { 204 return fmt.Errorf("an error occurred while capturing the log for pod: %s, error: %s", err.PodName, err.ErrorMessage) 205 } 206 207 // Report errors (if any), in collecting resources from various namespaces 208 for err := range ecr { 209 return fmt.Errorf("an error occurred while capturing the resource, error: %s", err.ErrorMessage) 210 } 211 return nil 212 } 213 214 // captureVZResource collects the Verrazzano resource as a JSON, in parallel 215 func captureVZResource(wg *sync.WaitGroup, ec chan ErrorsChannel, vz *v1beta1.Verrazzano, bugReportDir string) { 216 defer wg.Done() 217 err := pkghelpers.CaptureVZResource(bugReportDir, vz) 218 if err != nil { 219 ec <- ErrorsChannel{ErrorMessage: err.Error()} 220 } 221 } 222 223 // captureLogs collects the logs from platform operator, application operator and monitoring operator in parallel 224 func captureLogs(wg *sync.WaitGroup, ec chan ErrorsChannelLogs, kubeClient kubernetes.Interface, pod Pods, bugReportDir string, vzHelper pkghelpers.VZHelper, duration int64) { 225 defer wg.Done() 226 if len(pod.PodList) == 0 { 227 return 228 } 229 // This won't work when there are more than one pods for the same app label 230 pkghelpers.LogMessage(fmt.Sprintf("log from pod %s in %s namespace ...\n", pod.PodList[0].Name, pod.Namespace)) 231 err := pkghelpers.CapturePodLog(kubeClient, pod.PodList[0], pod.Namespace, bugReportDir, vzHelper, duration) 232 if err != nil { 233 ec <- ErrorsChannelLogs{PodName: pod.PodList[0].Name, ErrorMessage: err.Error()} 234 } 235 236 } 237 238 // captureK8SResources captures Kubernetes workloads, pods, events, ingresses and services from the list of namespaces in parallel 239 func captureK8SResources(wg *sync.WaitGroup, ec chan ErrorsChannel, client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, namespace, bugReportDir string, vzHelper pkghelpers.VZHelper) { 240 defer wg.Done() 241 if err := pkghelpers.CaptureK8SResources(client, kubeClient, dynamicClient, namespace, bugReportDir, vzHelper); err != nil { 242 ec <- ErrorsChannel{ErrorMessage: err.Error()} 243 } 244 } 245 246 // collectNamespaces gathers list of unique namespaces, to be considered to collect the information 247 func collectNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, includedNS []string, vz *v1beta1.Verrazzano, vzHelper pkghelpers.VZHelper) ([]string, []string, error) { 248 249 var nsList []string 250 251 // Include namespaces for all the vz components 252 allCompNS := pkghelpers.GetNamespacesForAllComponents(vz) 253 nsList = append(nsList, allCompNS...) 254 255 // Verify and Include verrazzano-install namespace 256 if pkghelpers.VerifyVzInstallNamespaceExists(kubeClient) { 257 nsList = append(nsList, vzconstants.VerrazzanoInstallNamespace) 258 } 259 260 // Add any namespaces that have CAPI clusters 261 capiNSList, err := getCAPIClusterNamespaces(kubeClient, dynamicClient) 262 if err != nil { 263 return nil, nil, err 264 } 265 nsList = append(nsList, capiNSList...) 266 267 // Add Rancher namespaces 268 rancherNSList, err := getRancherNamespaces(kubeClient, dynamicClient) 269 if err != nil { 270 return nil, nil, err 271 } 272 nsList = append(nsList, rancherNSList...) 273 274 // Include the namespaces specified by flag --include-namespaces 275 var additionalNS []string 276 if len(includedNS) > 0 { 277 includedList := pkghelpers.RemoveDuplicate(includedNS) 278 for _, ns := range includedList { 279 nsExists, _ := pkghelpers.DoesNamespaceExist(kubeClient, ns, vzHelper) 280 if nsExists { 281 additionalNS = append(additionalNS, ns) 282 } 283 } 284 nsList = append(nsList, additionalNS...) 285 } 286 287 // Remove the duplicates from nsList 288 nsList = pkghelpers.RemoveDuplicate(nsList) 289 return nsList, additionalNS, nil 290 } 291 292 // This function returns a list of namespaces that have a CAPI cluster resource. 293 // We want to always capture these resources. 294 func getCAPIClusterNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface) ([]string, error) { 295 namespaces, err := kubeClient.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{}) 296 if err != nil { 297 return nil, err 298 } 299 300 nsList := []string{} 301 gvr := schema.GroupVersionResource{Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "clusters"} 302 for _, namespace := range namespaces.Items { 303 list, err := dynamicClient.Resource(gvr).Namespace(namespace.Name).List(context.TODO(), metav1.ListOptions{}) 304 // Resource type does not exist, return here since there will be no "cluster" resources. 305 // This will be the case if the cluster-api component is not installed. 306 if errors.IsNotFound(err) { 307 return nil, nil 308 } 309 if err != nil { 310 return nil, err 311 } 312 if len(list.Items) > 0 { 313 nsList = append(nsList, namespace.Name) 314 } 315 } 316 return nsList, nil 317 } 318 319 // This function returns a list of namespaces that have a Rancher annotation. 320 // We want to always capture these resources. 321 func getRancherNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface) ([]string, error) { 322 namespaces, err := kubeClient.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{}) 323 if err != nil { 324 return nil, err 325 } 326 327 nsList := []string{} 328 for _, namespace := range namespaces.Items { 329 if namespace.Annotations["lifecycle.cattle.io/create.namespace-auth"] == "true" { 330 nsList = append(nsList, namespace.Name) 331 } 332 } 333 return nsList, nil 334 } 335 336 // captureLogsAllPods captures logs from all pods without filtering in given namespace. 337 func captureLogsAllPods(wg *sync.WaitGroup, ec chan ErrorsChannelLogs, kubeClient kubernetes.Interface, pods Pods, bugReportDir string, vzHelper pkghelpers.VZHelper, duration int64) { 338 339 defer wg.Done() 340 if len(pods.PodList) == 0 { 341 return 342 } 343 for index := range pods.PodList { 344 pkghelpers.LogMessage(fmt.Sprintf("log from pod %s in %s namespace ...\n", pods.PodList[index].Name, pods.Namespace)) 345 err := pkghelpers.CapturePodLog(kubeClient, pods.PodList[index], pods.Namespace, bugReportDir, vzHelper, duration) 346 if err != nil { 347 ec <- ErrorsChannelLogs{PodName: pods.PodList[index].Name, ErrorMessage: err.Error()} 348 } 349 } 350 } 351 352 // captureAdditionalResources will capture additional resources from additional namespaces 353 func captureAdditionalResources(client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, vzHelper pkghelpers.VZHelper, bugReportDir string, additionalNS []string, podLogs PodLogs) { 354 if err := pkghelpers.CaptureOAMResources(dynamicClient, additionalNS, bugReportDir, vzHelper); err != nil { 355 pkghelpers.LogError(fmt.Sprintf("There is an error in capturing the resources : %s", err.Error())) 356 } 357 if podLogs.IsPodLog { 358 if err := captureAdditionalLogs(client, kubeClient, bugReportDir, vzHelper, additionalNS, podLogs.Duration); err != nil { 359 pkghelpers.LogError(fmt.Sprintf("There is an error with capturing the logs: %s", err.Error())) 360 } 361 } 362 if err := pkghelpers.CaptureMultiClusterOAMResources(dynamicClient, additionalNS, bugReportDir, vzHelper); err != nil { 363 pkghelpers.LogError(fmt.Sprintf("There is an error in capturing the multi-cluster resources : %s", err.Error())) 364 } 365 } 366 367 // captureMultiClusterResources captures Projects and VerrazzanoManagedCluster resource 368 func captureMultiClusterResources(dynamicClient dynamic.Interface, captureDir string, vzHelper pkghelpers.VZHelper) error { 369 // Return nil when dynamicClient is nil, useful to get clean unit tests 370 if dynamicClient == nil { 371 return nil 372 } 373 374 // Capture Verrazzano projects in verrazzano-mc namespace 375 if err := pkghelpers.CaptureVerrazzanoProjects(dynamicClient, captureDir, vzHelper); err != nil { 376 return err 377 } 378 379 // Capture Verrazzano projects in verrazzano-mc namespace 380 if err := pkghelpers.CaptureVerrazzanoManagedCluster(dynamicClient, captureDir, vzHelper); err != nil { 381 return err 382 } 383 return nil 384 }