github.com/verrazzano/verrazzano@v1.7.1/tools/vz/pkg/bugreport/reportgen.go (about) 1 // Copyright (c) 2022, 2024, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package bugreport 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "os" 11 "path/filepath" 12 "sync" 13 14 vzconstants "github.com/verrazzano/verrazzano/pkg/constants" 15 "github.com/verrazzano/verrazzano/platform-operator/apis/verrazzano/v1beta1" 16 "github.com/verrazzano/verrazzano/tools/vz/pkg/constants" 17 pkghelpers "github.com/verrazzano/verrazzano/tools/vz/pkg/helpers" 18 "github.com/verrazzano/verrazzano/tools/vz/pkg/internal/util/cluster" 19 corev1 "k8s.io/api/core/v1" 20 "k8s.io/apimachinery/pkg/api/errors" 21 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 "k8s.io/apimachinery/pkg/labels" 23 "k8s.io/apimachinery/pkg/runtime/schema" 24 "k8s.io/client-go/dynamic" 25 "k8s.io/client-go/kubernetes" 26 clipkg "sigs.k8s.io/controller-runtime/pkg/client" 27 ) 28 29 // The bug-report command captures the following resources from the cluster by default 30 // - Verrazzano resource 31 // - Logs from verrazzano-platform-operator, verrazzano-monitoring-operator and verrazzano-application-operator pods 32 // - Workloads (Deployment and ReplicaSet, StatefulSet, Daemonset), pods, events, ingress and services from the namespaces of 33 // installed verrazzano components and namespaces specified by flag --include-namespaces 34 // - OAM resources like ApplicationConfiguration, Component, IngressTrait, MetricsTrait from namespaces specified by flag --include-namespaces 35 // - VerrazzanoManagedCluster, VerrazzanoProject and MultiClusterApplicationConfiguration in a multi-clustered environment 36 37 type VzComponentNamespaces struct { 38 Name string 39 Namespace string 40 Label string 41 PodList pkghelpers.Pods 42 } 43 44 var vpoPod = VzComponentNamespaces{ 45 Name: constants.VerrazzanoPlatformOperator, 46 Namespace: vzconstants.VerrazzanoInstallNamespace, 47 Label: constants.AppLabel, 48 PodList: pkghelpers.Pods{ 49 Namespace: vzconstants.VerrazzanoInstallNamespace, 50 PodList: nil, 51 }, 52 } 53 54 var vaoPod = VzComponentNamespaces{ 55 Name: constants.VerrazzanoApplicationOperator, 56 Namespace: vzconstants.VerrazzanoSystemNamespace, 57 Label: constants.AppLabel, 58 PodList: pkghelpers.Pods{ 59 Namespace: vzconstants.VerrazzanoSystemNamespace, 60 PodList: nil, 61 }, 62 } 63 64 var vcoPod = VzComponentNamespaces{ 65 Name: constants.VerrazzanoClusterOperator, 66 Namespace: vzconstants.VerrazzanoSystemNamespace, 67 Label: constants.AppLabel, 68 PodList: pkghelpers.Pods{ 69 Namespace: vzconstants.VerrazzanoSystemNamespace, 70 PodList: nil, 71 }, 72 } 73 74 var vmoPod = VzComponentNamespaces{ 75 Name: constants.VerrazzanoMonitoringOperator, 76 Namespace: vzconstants.VerrazzanoSystemNamespace, 77 Label: constants.K8SAppLabel, 78 PodList: pkghelpers.Pods{ 79 Namespace: vzconstants.VerrazzanoSystemNamespace, 80 PodList: nil, 81 }, 82 } 83 84 var vpoWebHookPod = VzComponentNamespaces{ 85 Name: constants.VerrazzanoPlatformOperatorWebhook, 86 Namespace: vzconstants.VerrazzanoInstallNamespace, 87 Label: constants.AppLabel, 88 PodList: pkghelpers.Pods{ 89 Namespace: vzconstants.VerrazzanoInstallNamespace, 90 PodList: nil, 91 }, 92 } 93 94 var externalDNSPod = VzComponentNamespaces{ 95 Name: vzconstants.ExternalDNS, 96 Namespace: vzconstants.CertManager, 97 Label: constants.K8sAppLabelExternalDNS, 98 PodList: pkghelpers.Pods{ 99 Namespace: vzconstants.CertManager, 100 PodList: nil, 101 }, 102 } 103 104 var DefaultPodLog = pkghelpers.PodLogs{ 105 IsPodLog: false, 106 IsPrevious: false, 107 Duration: 0, 108 } 109 110 const istioSidecarStatus = "sidecar.istio.io/status" 111 112 // CaptureClusterSnapshot selectively captures the resources from the cluster, useful to analyze the issue. 113 func CaptureClusterSnapshot(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, client clipkg.Client, vzHelper pkghelpers.VZHelper, podLogs pkghelpers.PodLogs, clusterSnapshotCtx pkghelpers.ClusterSnapshotCtx) error { 114 115 // Create a file to capture the standard out to a file 116 stdOutFile, err := os.OpenFile(filepath.Join(clusterSnapshotCtx.BugReportDir, constants.BugReportOut), os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) 117 if err != nil { 118 return fmt.Errorf("an error occurred while creating the file include the summary of the resources captured: %s", err.Error()) 119 } 120 defer stdOutFile.Close() 121 122 // Create a file to capture the standard err to a file 123 stdErrFile, err := os.OpenFile(filepath.Join(clusterSnapshotCtx.BugReportDir, constants.BugReportErr), os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) 124 if err != nil { 125 return fmt.Errorf("an error occurred while creating the file include the summary of the resources captured: %s", err.Error()) 126 } 127 defer stdErrFile.Close() 128 129 // Create MultiWriters for standard out and err 130 pkghelpers.SetMultiWriterOut(vzHelper.GetOutputStream(), stdOutFile) 131 pkghelpers.SetMultiWriterErr(vzHelper.GetErrorStream(), stdErrFile) 132 133 // Find the Verrazzano resource to analyze. 134 vz, err := pkghelpers.FindVerrazzanoResource(client) 135 if err != nil { 136 pkghelpers.LogMessage(fmt.Sprintf("Verrazzano is not installed: %s", err.Error())) 137 } 138 139 // Get the list of namespaces based on the failed components and value specified by flag --include-namespaces 140 nsList, _, err := collectNamespaces(kubeClient, dynamicClient, clusterSnapshotCtx.MoreNS, vz, vzHelper) 141 if err != nil { 142 return err 143 } 144 var msgPrefix string 145 if pkghelpers.GetIsLiveCluster() { 146 msgPrefix = constants.AnalysisMsgPrefix 147 } else { 148 msgPrefix = constants.BugReportMsgPrefix 149 } 150 if clusterSnapshotCtx.PrintReportToConsole { 151 // Print initial message to console output only 152 fmt.Fprintf(vzHelper.GetOutputStream(), "\n"+msgPrefix+"resources from the cluster ...\n") 153 } 154 // Capture list of resources from verrazzano-install and verrazzano-system namespaces 155 err = captureResources(client, kubeClient, dynamicClient, clusterSnapshotCtx.BugReportDir, vz, vzHelper, nsList) 156 if err != nil { 157 pkghelpers.LogError(fmt.Sprintf("There is an error with capturing the Verrazzano resources: %s", err.Error())) 158 } 159 160 // Capture logs from resources when the --include-logs flag 161 captureAdditionalResources(client, kubeClient, dynamicClient, vzHelper, clusterSnapshotCtx.BugReportDir, nsList, podLogs) 162 163 // Capture Verrazzano Projects and VerrazzanoManagedCluster 164 if err = captureMultiClusterResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil { 165 return err 166 } 167 168 // Capture global CAPI resources 169 if err = pkghelpers.CaptureGlobalCapiResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil { 170 return err 171 } 172 173 if err := pkghelpers.CaptureMetadata(clusterSnapshotCtx.BugReportDir); err != nil { 174 return err 175 } 176 177 // Capture global Rancher resources 178 if err = pkghelpers.CaptureGlobalRancherResources(dynamicClient, clusterSnapshotCtx.BugReportDir, vzHelper); err != nil { 179 return err 180 } 181 182 // flag pods that are missing sidecar containers 183 err = flagMissingSidecarContainers(client, kubeClient) 184 185 // find problematic pods from captured resources 186 podNameNamespaces, err := cluster.FindProblematicPods(clusterSnapshotCtx.BugReportDir) 187 if err != nil { 188 return err 189 } 190 err = captureProblematicPodLogs(kubeClient, clusterSnapshotCtx.BugReportDir, vzHelper, podNameNamespaces) 191 if err != nil { 192 return err 193 } 194 195 return nil 196 } 197 198 func captureResources(client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, bugReportDir string, vz *v1beta1.Verrazzano, vzHelper pkghelpers.VZHelper, namespaces []string) error { 199 // List of pods to collect the logs 200 podsToCollect := []VzComponentNamespaces{vpoPod, vaoPod, vcoPod, vmoPod, vpoWebHookPod} 201 for i, component := range podsToCollect { 202 podList, _ := pkghelpers.GetPodList(client, component.Label, component.Name, component.Namespace) 203 podsToCollect[i].PodList.PodList = podList 204 } 205 externalDNSPod.PodList.PodList, _ = pkghelpers.GetPodList(client, externalDNSPod.Label, externalDNSPod.Name, externalDNSPod.Namespace) 206 207 wgCount := 5 + len(namespaces) 208 wgCount++ // increment for the verrrazzano resource 209 if len(externalDNSPod.PodList.PodList) > 0 { 210 wgCount++ 211 } 212 wg := &sync.WaitGroup{} 213 wg.Add(wgCount) 214 215 // Define channels to get the errors 216 evr := make(chan pkghelpers.ErrorsChannel, 1) 217 ecr := make(chan pkghelpers.ErrorsChannel, 1) 218 ecl := make(chan pkghelpers.ErrorsChannelLogs, 1) 219 220 go captureVZResource(wg, evr, vz, bugReportDir) 221 222 for _, podList := range podsToCollect { 223 go pkghelpers.CaptureLogs(wg, ecl, kubeClient, pkghelpers.Pods{Namespace: podList.Namespace, PodList: podList.PodList.PodList}, bugReportDir, vzHelper, DefaultPodLog) 224 } 225 if len(externalDNSPod.PodList.PodList) > 0 { 226 go pkghelpers.CaptureLogs(wg, ecl, kubeClient, pkghelpers.Pods{Namespace: externalDNSPod.Namespace, PodList: externalDNSPod.PodList.PodList}, bugReportDir, vzHelper, DefaultPodLog) 227 } 228 229 for _, ns := range namespaces { 230 go captureK8SResources(wg, ecr, client, kubeClient, dynamicClient, ns, bugReportDir, vzHelper) 231 } 232 233 wg.Wait() 234 close(ecl) 235 close(ecr) 236 close(evr) 237 // Report errors (if any), in capturing the verrazzano resource 238 for err := range evr { 239 return fmt.Errorf("an error occurred while capturing the Verrazzano resource, error: %s", err.ErrorMessage) 240 } 241 242 // Report errors (if any), in collecting the logs from various pods 243 for err := range ecl { 244 return fmt.Errorf("an error occurred while capturing the log for pod: %s, error: %s", err.PodName, err.ErrorMessage) 245 } 246 247 // Report errors (if any), in collecting resources from various namespaces 248 for err := range ecr { 249 return fmt.Errorf("an error occurred while capturing the resource, error: %s", err.ErrorMessage) 250 } 251 return nil 252 } 253 254 // captureAdditionalLogs will be used for capture logs from additional namespace. 255 func captureAdditionalLogs(client clipkg.Client, kubeClient kubernetes.Interface, bugReportDir string, vzHelper pkghelpers.VZHelper, namespaces []string, podLogs pkghelpers.PodLogs) error { 256 wgCount := len(namespaces) 257 wg := &sync.WaitGroup{} 258 wg.Add(wgCount) 259 // Define channels to get the errors 260 evr := make(chan pkghelpers.ErrorsChannel, 1) 261 ecr := make(chan pkghelpers.ErrorsChannel, 1) 262 ecl := make(chan pkghelpers.ErrorsChannelLogs, 1) 263 for _, ns := range namespaces { 264 podList, _ := pkghelpers.GetPodListAll(client, ns) 265 go captureLogsAllPods(wg, ecl, kubeClient, pkghelpers.Pods{PodList: podList, Namespace: ns}, bugReportDir, vzHelper, podLogs) 266 } 267 268 wg.Wait() 269 close(ecl) 270 close(ecr) 271 close(evr) 272 // Report errors (if any), in collecting the logs from various pods 273 for err := range evr { 274 return fmt.Errorf("an error occurred while capturing the Verrazzano resource, error: %s", err.ErrorMessage) 275 } 276 277 // Report errors (if any), in collecting the logs from various pods 278 for err := range ecl { 279 return fmt.Errorf("an error occurred while capturing the log for pod: %s, error: %s", err.PodName, err.ErrorMessage) 280 } 281 282 // Report errors (if any), in collecting resources from various namespaces 283 for err := range ecr { 284 return fmt.Errorf("an error occurred while capturing the resource, error: %s", err.ErrorMessage) 285 } 286 return nil 287 } 288 289 // captureVZResource collects the Verrazzano resource as a JSON, in parallel 290 func captureVZResource(wg *sync.WaitGroup, ec chan pkghelpers.ErrorsChannel, vz *v1beta1.Verrazzano, bugReportDir string) { 291 defer wg.Done() 292 err := pkghelpers.CaptureVZResource(bugReportDir, vz) 293 if err != nil { 294 ec <- pkghelpers.ErrorsChannel{ErrorMessage: err.Error()} 295 } 296 } 297 298 // captureK8SResources captures Kubernetes workloads, pods, events, ingresses and services from the list of namespaces in parallel 299 func captureK8SResources(wg *sync.WaitGroup, ec chan pkghelpers.ErrorsChannel, client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, namespace, bugReportDir string, vzHelper pkghelpers.VZHelper) { 300 defer wg.Done() 301 if err := pkghelpers.CaptureK8SResources(client, kubeClient, dynamicClient, namespace, bugReportDir, vzHelper); err != nil { 302 ec <- pkghelpers.ErrorsChannel{ErrorMessage: err.Error()} 303 } 304 } 305 306 // collectNamespaces gathers list of unique namespaces, to be considered to collect the information 307 func collectNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, includedNS []string, vz *v1beta1.Verrazzano, vzHelper pkghelpers.VZHelper) ([]string, []string, error) { 308 309 var nsList []string 310 311 // Include namespaces for all the vz components 312 allCompNS := pkghelpers.GetNamespacesForAllComponents(vz) 313 nsList = append(nsList, allCompNS...) 314 315 // Verify and Include verrazzano-install namespace 316 if pkghelpers.VerifyVzInstallNamespaceExists(kubeClient) { 317 nsList = append(nsList, vzconstants.VerrazzanoInstallNamespace) 318 } 319 320 // Add any namespaces that have CAPI clusters 321 capiNSList, err := getCAPIClusterNamespaces(kubeClient, dynamicClient) 322 if err != nil { 323 return nil, nil, err 324 } 325 nsList = append(nsList, capiNSList...) 326 327 // Add Rancher namespaces 328 rancherNSList, err := getRancherNamespaces(kubeClient, dynamicClient) 329 if err != nil { 330 return nil, nil, err 331 } 332 nsList = append(nsList, rancherNSList...) 333 334 // Include the namespaces specified by flag --include-namespaces 335 var additionalNS []string 336 if len(includedNS) > 0 { 337 includedList := pkghelpers.RemoveDuplicate(includedNS) 338 for _, ns := range includedList { 339 nsExists, _ := pkghelpers.DoesNamespaceExist(kubeClient, ns, vzHelper) 340 if nsExists { 341 additionalNS = append(additionalNS, ns) 342 } 343 } 344 nsList = append(nsList, additionalNS...) 345 } 346 347 // Remove the duplicates from nsList 348 nsList = pkghelpers.RemoveDuplicate(nsList) 349 return nsList, additionalNS, nil 350 } 351 352 // This function returns a list of namespaces that have a CAPI cluster resource. 353 // We want to always capture these resources. 354 func getCAPIClusterNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface) ([]string, error) { 355 namespaces, err := kubeClient.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{}) 356 if err != nil { 357 return nil, err 358 } 359 360 nsList := []string{} 361 gvr := schema.GroupVersionResource{Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "clusters"} 362 for _, namespace := range namespaces.Items { 363 list, err := dynamicClient.Resource(gvr).Namespace(namespace.Name).List(context.TODO(), metav1.ListOptions{}) 364 // Resource type does not exist, return here since there will be no "cluster" resources. 365 // This will be the case if the cluster-api component is not installed. 366 if errors.IsNotFound(err) { 367 return nil, nil 368 } 369 if err != nil { 370 return nil, err 371 } 372 if len(list.Items) > 0 { 373 nsList = append(nsList, namespace.Name) 374 } 375 } 376 return nsList, nil 377 } 378 379 // This function returns a list of namespaces that have a Rancher annotation. 380 // We want to always capture these resources. 381 func getRancherNamespaces(kubeClient kubernetes.Interface, dynamicClient dynamic.Interface) ([]string, error) { 382 namespaces, err := kubeClient.CoreV1().Namespaces().List(context.TODO(), metav1.ListOptions{}) 383 if err != nil { 384 return nil, err 385 } 386 387 nsList := []string{} 388 for _, namespace := range namespaces.Items { 389 if namespace.Annotations["lifecycle.cattle.io/create.namespace-auth"] == "true" { 390 nsList = append(nsList, namespace.Name) 391 } 392 } 393 return nsList, nil 394 } 395 396 // captureLogsAllPods captures logs from all pods without filtering in given namespace. 397 func captureLogsAllPods(wg *sync.WaitGroup, ec chan pkghelpers.ErrorsChannelLogs, kubeClient kubernetes.Interface, pods pkghelpers.Pods, bugReportDir string, vzHelper pkghelpers.VZHelper, podLogs pkghelpers.PodLogs) { 398 399 defer wg.Done() 400 if len(pods.PodList) == 0 { 401 return 402 } 403 for index := range pods.PodList { 404 pkghelpers.LogMessage(fmt.Sprintf("log from pod %s in %s namespace ...\n", pods.PodList[index].Name, pods.Namespace)) 405 err := pkghelpers.CapturePodLog(kubeClient, pods.PodList[index], pods.Namespace, bugReportDir, vzHelper, podLogs.Duration, podLogs.IsPrevious) 406 if err != nil { 407 ec <- pkghelpers.ErrorsChannelLogs{PodName: pods.PodList[index].Name, ErrorMessage: err.Error()} 408 } 409 } 410 } 411 412 // captureAdditionalResources will capture additional resources from additional namespaces 413 func captureAdditionalResources(client clipkg.Client, kubeClient kubernetes.Interface, dynamicClient dynamic.Interface, vzHelper pkghelpers.VZHelper, bugReportDir string, additionalNS []string, podLogs pkghelpers.PodLogs) { 414 if err := pkghelpers.CaptureOAMResources(dynamicClient, additionalNS, bugReportDir, vzHelper); err != nil { 415 pkghelpers.LogError(fmt.Sprintf("There is an error in capturing the resources : %s", err.Error())) 416 } 417 if podLogs.IsPodLog { 418 if err := captureAdditionalLogs(client, kubeClient, bugReportDir, vzHelper, additionalNS, podLogs); err != nil { 419 pkghelpers.LogError(fmt.Sprintf("There is an error with capturing the logs: %s", err.Error())) 420 } 421 } 422 if err := pkghelpers.CaptureMultiClusterOAMResources(dynamicClient, additionalNS, bugReportDir, vzHelper); err != nil { 423 pkghelpers.LogError(fmt.Sprintf("There is an error in capturing the multi-cluster resources : %s", err.Error())) 424 } 425 } 426 427 // captureMultiClusterResources captures Projects and VerrazzanoManagedCluster resource 428 func captureMultiClusterResources(dynamicClient dynamic.Interface, captureDir string, vzHelper pkghelpers.VZHelper) error { 429 // Return nil when dynamicClient is nil, useful to get clean unit tests 430 if dynamicClient == nil { 431 return nil 432 } 433 434 // Capture Verrazzano projects in verrazzano-mc namespace 435 if err := pkghelpers.CaptureVerrazzanoProjects(dynamicClient, captureDir, vzHelper); err != nil { 436 return err 437 } 438 439 // Capture Verrazzano projects in verrazzano-mc namespace 440 if err := pkghelpers.CaptureVerrazzanoManagedCluster(dynamicClient, captureDir, vzHelper); err != nil { 441 return err 442 } 443 return nil 444 } 445 446 // flagMissingSidecarContainers identifies pods in namespaces with --label istio-injection=enabled that are missing sidecar containers 447 func flagMissingSidecarContainers(client clipkg.Client, kubeClient kubernetes.Interface) error { 448 labelSelector := metav1.LabelSelector{MatchLabels: map[string]string{vzconstants.LabelIstioInjection: "enabled"}} 449 listOptions := metav1.ListOptions{LabelSelector: labels.Set(labelSelector.MatchLabels).String()} 450 namespaceList, err := kubeClient.CoreV1().Namespaces().List(context.TODO(), listOptions) 451 if err != nil { 452 return err 453 } 454 for _, namespace := range namespaceList.Items { 455 podList, err := pkghelpers.GetPodListAll(client, namespace.Name) 456 if err != nil { 457 return err 458 } 459 err = getSidecarsFromAnnotations(podList) 460 if err != nil { 461 return err 462 } 463 } 464 return nil 465 } 466 467 // getSidecarsFromAnnotations parses istio sidecar containers from pod annotations identifies pods that are missing sidecar containers 468 func getSidecarsFromAnnotations(pods []corev1.Pod) error { 469 for _, pod := range pods { 470 if pod.Annotations[istioSidecarStatus] == "" { 471 continue 472 } 473 sidecarContainers := pod.Annotations[istioSidecarStatus] 474 var obj map[string]interface{} 475 err := json.Unmarshal([]byte(sidecarContainers), &obj) 476 if err != nil { 477 return err 478 } 479 sidecarContainersFromAnnotation := obj["containers"] 480 findMissingSidecarContainers(sidecarContainersFromAnnotation.([]interface{}), pod) 481 } 482 return nil 483 } 484 485 // findMissingSidecarContainers identifies missing sidecar containers based on its annotations and pod.Spec.Containers 486 func findMissingSidecarContainers(sidecars []interface{}, pod corev1.Pod) { 487 containersFromPod := pod.Spec.Containers 488 for _, sidecar := range sidecars { 489 for i, container := range containersFromPod { 490 if sidecar == container.Name { 491 continue 492 } 493 if i+1 == len(containersFromPod) && sidecar != container.Name { 494 pkghelpers.LogError(fmt.Sprintf("Sidecar container: %s, was not found for pod: %s, in namespace %s\n", sidecar, pod.Name, pod.Namespace)) 495 } 496 } 497 } 498 } 499 500 // captureProblematicPodLogs tries to capture previous logs for any problematic pods 501 func captureProblematicPodLogs(kubeClient kubernetes.Interface, bugReportDir string, vzHelper pkghelpers.VZHelper, podNameNamespaces map[string][]corev1.Pod) error { 502 if len(podNameNamespaces) != 0 { 503 for namespace := range podNameNamespaces { 504 for _, pod := range podNameNamespaces[namespace] { 505 _ = pkghelpers.CapturePodLog(kubeClient, pod, namespace, bugReportDir, vzHelper, 0, true) 506 } 507 } 508 } 509 return nil 510 }