github.com/verrazzano/verrazzano@v1.7.0/tools/vz/pkg/analysis/internal/util/report/issue.go (about) 1 // Copyright (c) 2021, 2023, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 // Package report handles reporting 5 package report 6 7 import ( 8 "errors" 9 "fmt" 10 "strings" 11 12 "github.com/verrazzano/verrazzano/tools/vz/pkg/analysis/internal/util/files" 13 "go.uber.org/zap" 14 ) 15 16 // NOTE: This is part of the contract with the analyzers however it is currently an initial stake in the ground and 17 // will be evolving rapidly initially as we add analysis cases 18 19 // An issue describes a specific problem that has been found and includes information such as 20 // A Summary of the issue 21 // A list of Actions which can be taken 22 // - Actions are reported in the order specified in this list (so actions more likely to mitigate an issue 23 // should be specified first). 24 // - Each action may have Steps to take and/or give a list of runbook Links 25 // A list of supporting data (TBD) 26 // - Source which helped identify the issue 27 // - Indicators that identified the issue (search matches, json elements) 28 // - etc... 29 // A Confidence level (TBD) 30 // This is and indication of how confident the analysis is that the issue is really causing 31 // problems. The analysis will attempt to weed out things that are not causing an issue and will 32 // not report them if it is certain. However there may be situations where something that is found 33 // could be causing problems but it is not certain. 34 35 // JSONPath is a JSON path 36 type JSONPath struct { 37 File string // Json filename 38 Path string // Json Path 39 } 40 41 // SupportData is data which helps a user to further identify an issue TODO: Shake this out more as we add more types, see what we really end up needing here 42 type SupportData struct { 43 Messages []string // Optional, Messages and/or descriptions the supporting data 44 RelatedFiles []string // Optional, if present provides a list of related files that support the issue identification 45 TextMatches []files.TextMatch // Optional, if present provides search results that support the issue identification 46 JSONPaths []JSONPath // Optional, if present provides a list of Json paths that support the issue identification 47 } 48 49 // Issue holds the information about an issue, supporting data, and actions 50 type Issue struct { 51 Type string // Required, This identifies the type of issue. This is either a Known Issue type, or a custom type name 52 Source string // Required, This is the source of the analysis, It may be the root of the cluster analyzed (ie: there can be multiple) 53 Informational bool // Defaults to false, if this is not an issue but an Informational note (TBD: may separate these) 54 Summary string // Required, there must be a Summary of the issue included 55 Actions []Action // Optional, if Actions are known these are included. Actions will be reported in the order specified 56 57 SupportingData []SupportData // Optional but highly desirable for issues when possible. Data that helps support issue identification 58 Confidence int // Required if not informational 0-10 () 59 Impact int // Optional 0-10 (TBD: This is a swag at how broad the impact is, 0 low, 10 high, defaults to -1 unknown) 60 } 61 62 // Validate validates an issue. A zeroed Issue is not valid, there is some amount of information that must be specified for the Issue to 63 // be useful. Currently the report will validate that the issues contributed are valid at the point where they are 64 // being contributed. 65 func (issue *Issue) Validate(log *zap.SugaredLogger, mapSource string) (err error) { 66 if len(issue.Type) == 0 { 67 return errors.New("A Type is required for an Issue") 68 } 69 if len(issue.Source) == 0 { 70 return errors.New("A Source is required for an Issue") 71 } 72 // If there was a map source supplied, this means we are additionally checking that the source key 73 // for the map matches the issue source as well (ie: when handed a map/slice of issues and a source 74 // key, we check these here). If there is no mapSource supplied it just means the issue Source is used for 75 // map insertions. 76 if len(mapSource) != 0 && issue.Source != mapSource { 77 return fmt.Errorf("The issue source %s doesn't match the map source supplied %s", issue.Source, mapSource) 78 } 79 if len(issue.Summary) == 0 { 80 return errors.New("A Summary is required for an Issue") 81 } 82 if len(issue.Actions) > 0 { 83 for _, action := range issue.Actions { 84 err = action.Validate(log) 85 if err != nil { 86 log.Debugf("Action related to issue %s was invalid", issue.Summary, err) 87 return err 88 } 89 } 90 } 91 if issue.Confidence < 0 || issue.Confidence > 10 { 92 log.Debugf("Confidence %d is out of range, related to issue %s", issue.Confidence, issue.Summary) 93 return fmt.Errorf("Confidence %d is out of range, related to issue %s", issue.Confidence, issue.Summary) 94 } 95 return nil 96 } 97 98 // Known Issue Types. 99 const ( 100 ImagePullBackOff = "ImagePullBackOff" 101 ImagePullRateLimit = "ImagePullRateLimit" 102 ImagePullNotFound = "ImagePullNotFound" 103 ImagePullService = "ImagePullService" 104 InsufficientMemory = "InsufficientMemory" 105 InsufficientCPU = "InsufficientCPU" 106 IngressInstallFailure = "IngressInstallFailure" 107 IngressLBLimitExceeded = "IngressLBLimitExceeded" 108 IngressNoLoadBalancerIP = "IngressNoLoadBalancerIP" 109 IngressOciIPLimitExceeded = "IngressOciIPLimitExceeded" 110 InstallFailure = "InstallFailure" 111 PendingPods = "PendingPods" 112 PodProblemsNotReported = "PodProblemsNotReported" 113 ComponentsNotReady = "ComponentsNotReady" 114 IngressNoIPFound = "IngressNoIPFound" 115 IstioIngressNoIP = "IstioIngressNoIP" 116 IngressShapeInvalid = "IngressShapeInvalid" 117 IstioIngressPrivateSubnet = "IstioIngressPrivateSubnet" 118 NginxIngressPrivateSubnet = "NginxIngressPrivateSubnet" 119 ExternalDNSConfigureIssue = "ExternalDNSConfigureIssue" 120 KeycloakDataMigrationFailure = "KeycloakDataMigrationFailure" 121 RancherIssues = "RancherIssues" 122 VZClientHangingIssueDueToLongCertificateApproval = "VZClientHangingIssueDueToLongCertificateApproval" 123 CertificateExpired = "CertificateExpired" 124 CertificateExperiencingIssuesInCluster = "CertificateExperiencingIssuesInCluster" 125 ClusterAPIClusterIssues = "ClusterAPIClusterIssues" 126 CaCrtExpiredInCluster = "CaCrtExpiredInCluster" 127 ) 128 129 // NOTE: How we are handling the issues/actions/reporting is still very much evolving here. Currently supplying some 130 // helpers to reduce boilerplate when creating/reporting issues with common cases. 131 132 // Known Issue Templates. While analyzers are free to roll their own custom Issues, the preference for well-known issues is to capture them 133 // here so they are more generally available. 134 var knownIssues = map[string]Issue{ 135 ImagePullBackOff: {Type: ImagePullBackOff, Summary: "Failure(s) pulling images have been detected, however a specific root cause was not identified", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[ImagePullBackOff]}}, 136 ImagePullRateLimit: {Type: ImagePullRateLimit, Summary: "Failure(s) pulling images have been detected due to an image pull rate limit", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[ImagePullRateLimit]}}, 137 ImagePullNotFound: {Type: ImagePullNotFound, Summary: "Failure(s) pulling images have been detected due to the image not being found", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[ImagePullNotFound]}}, 138 ImagePullService: {Type: ImagePullService, Summary: "Failure(s) pulling images have been detected due to the service not being available, the service may be unreachable or may be incorrectly specified", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[ImagePullService]}}, 139 InsufficientMemory: {Type: InsufficientMemory, Summary: "Failure(s) due to insufficient memory on nodes have been detected", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[InsufficientMemory]}}, 140 InsufficientCPU: {Type: InsufficientCPU, Summary: "Failure(s) due to insufficient CPU on nodes have been detected", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[InsufficientCPU]}}, 141 IngressInstallFailure: {Type: IngressInstallFailure, Summary: "Verrazzano install failed while installing the NGINX Ingress Controller, however a specific root cause was not identified", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IngressInstallFailure]}}, 142 IngressLBLimitExceeded: {Type: IngressLBLimitExceeded, Summary: "Verrazzano install failed while installing the NGINX Ingress Controller, the root cause appears to be that the load balancer service limit has been reached", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IngressLBLimitExceeded]}}, 143 IngressNoLoadBalancerIP: {Type: IngressNoLoadBalancerIP, Summary: "Verrazzano install failed while installing the NGINX Ingress Controller, the root cause appears to be the LoadBalancer is not there or is unable to set the ingress IP address on the NGINX Ingress service", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IngressNoLoadBalancerIP]}}, 144 IngressOciIPLimitExceeded: {Type: IngressOciIPLimitExceeded, Summary: "Verrazzano install failed while installing the NGINX Ingress Controller, the root cause appears to be an OCI IP non-ephemeral address limit has been reached", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IngressOciIPLimitExceeded]}}, 145 InstallFailure: {Type: InstallFailure, Summary: "Verrazzano install failed, however a specific root cause was not identified", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[InstallFailure]}}, 146 PendingPods: {Type: PendingPods, Summary: "Pods in a Pending state were detected. These may come up normally or there may be specific issues preventing them from coming up", Informational: true, Impact: 0, Confidence: 1, Actions: []Action{KnownActions[PendingPods]}}, 147 PodProblemsNotReported: {Type: PodProblemsNotReported, Summary: "Problem pods were detected, however a specific root cause was not identified", Informational: true, Impact: 0, Confidence: 10, Actions: []Action{KnownActions[PodProblemsNotReported]}}, 148 ComponentsNotReady: {Type: InstallFailure, Summary: "Verrazzano install failed, one or more components did not reach Ready state", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[InstallFailure]}}, 149 IngressNoIPFound: {Type: IngressNoIPFound, Summary: "Verrazzano install failed as no IP found for service ingress-controller-ingress-nginx-controller with type LoadBalancer", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IngressNoIPFound]}}, 150 IstioIngressNoIP: {Type: IstioIngressNoIP, Summary: "Verrazzano install failed as no IP found for service istio-ingressgateway with type LoadBalancer", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IstioIngressNoIP]}}, 151 IngressShapeInvalid: {Type: IngressShapeInvalid, Summary: "Verrazzano install failed as the shape provided for NGINX Ingress Controller is invalid", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IngressShapeInvalid]}}, 152 IstioIngressPrivateSubnet: {Type: IstioIngressPrivateSubnet, Summary: "Failed to create LoadBalancer for Istio Ingress Gateway", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[IstioIngressPrivateSubnet]}}, 153 NginxIngressPrivateSubnet: {Type: NginxIngressPrivateSubnet, Summary: "Failed to create LoadBalancer for Nginx Ingress Controller", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[NginxIngressPrivateSubnet]}}, 154 ExternalDNSConfigureIssue: {Type: ExternalDNSConfigureIssue, Summary: "Failed to setup DNS configuration", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[ExternalDNSConfigureIssue]}}, 155 KeycloakDataMigrationFailure: {Type: KeycloakDataMigrationFailure, Summary: "Failure(s) migrating Keycloak data during MySQL upgrade", Informational: true, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[KeycloakDataMigrationFailure]}}, 156 RancherIssues: {Type: RancherIssues, Summary: "Rancher resources are not in the expected state", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[RancherIssues]}}, 157 VZClientHangingIssueDueToLongCertificateApproval: {Type: VZClientHangingIssueDueToLongCertificateApproval, Summary: " Verrazzano Client is hanging due to the long time that it takes to approve and provision certificates", Informational: true, Impact: 10, Confidence: 10}, 158 CertificateExpired: {Type: CertificateExpired, Summary: "A certificate in the cluster is currently expired", Informational: true, Impact: 10, Confidence: 10}, 159 CertificateExperiencingIssuesInCluster: {Type: CertificateExperiencingIssuesInCluster, Summary: "A certificate in the cluster is experiencing issues, but it is not expired", Informational: true, Impact: 10, Confidence: 10}, 160 ClusterAPIClusterIssues: {Type: ClusterAPIClusterIssues, Summary: "Cluster API cluster resources are not in the expected state", Informational: false, Impact: 10, Confidence: 10, Actions: []Action{KnownActions[ClusterAPIClusterIssues]}}, 161 CaCrtExpiredInCluster: {Type: CaCrtExpiredInCluster, Summary: "A ca.crt value in the cluster is expired", Informational: true, Impact: 10, Confidence: 10}, 162 } 163 164 // NewKnownIssueSupportingData adds a known issue 165 func NewKnownIssueSupportingData(issueType string, source string, supportingData []SupportData) (issue Issue) { 166 issue = getKnownIssueOrDie(issueType) 167 issue.Source = source 168 issue.SupportingData = supportingData 169 return issue 170 } 171 172 // NewKnownIssueMessagesFiles adds a known issue 173 func NewKnownIssueMessagesFiles(issueType string, source string, messages []string, fileNames []string) (issue Issue) { 174 issue = getKnownIssueOrDie(issueType) 175 issue.Source = source 176 issue.SupportingData = make([]SupportData, 1) 177 issue.SupportingData[0] = SupportData{ 178 Messages: messages, 179 RelatedFiles: fileNames, 180 } 181 return issue 182 } 183 184 // NewKnownIssueMessagesMatches adds a known issue 185 func NewKnownIssueMessagesMatches(issueType string, source string, messages []string, matches []files.TextMatch) (issue Issue) { 186 issue = getKnownIssueOrDie(issueType) 187 issue.Source = source 188 issue.SupportingData = make([]SupportData, 1) 189 issue.SupportingData[0] = SupportData{ 190 Messages: messages, 191 TextMatches: matches, 192 } 193 return issue 194 } 195 196 // IssueReporter is a helper for consolidating known issues before contributing them to the report 197 // An analyzer may is free to use the IssueReporter NewKnown* helpers for known issues, however they 198 // are not required to do so and are free to form fully custom issues and Contribute 199 // those directly to the report.Contribute* helpers. This allows analyzers flexibility, but the goal 200 // here is that the IssueReporter can evolve to support all of the cases if possible. 201 type IssueReporter struct { 202 PendingIssues map[string]Issue 203 } 204 205 // AddKnownIssueSupportingData adds a known issue 206 func (issueReporter *IssueReporter) AddKnownIssueSupportingData(issueType string, source string, supportingData []SupportData) { 207 confirmKnownIssueOrDie(issueType) 208 209 // If this is a new issue, get a new one 210 if issue, ok := issueReporter.PendingIssues[issueType]; !ok { 211 issueReporter.PendingIssues[issueType] = NewKnownIssueSupportingData(issueType, source, supportingData) 212 } else { 213 issue.SupportingData = append(issue.SupportingData, supportingData...) 214 issueReporter.PendingIssues[issueType] = issue 215 } 216 } 217 218 // AddKnownIssueMessagesFiles adds a known issue 219 func (issueReporter *IssueReporter) AddKnownIssueMessagesFiles(issueType string, source string, messages []string, fileNames []string) { 220 confirmKnownIssueOrDie(issueType) 221 222 // If this is a new issue, get a new one 223 if issue, ok := issueReporter.PendingIssues[issueType]; !ok { 224 issueReporter.PendingIssues[issueType] = NewKnownIssueMessagesFiles(issueType, source, messages, fileNames) 225 } else { 226 supportData := SupportData{ 227 Messages: messages, 228 RelatedFiles: fileNames, 229 } 230 issue.SupportingData = append(issue.SupportingData, supportData) 231 issueReporter.PendingIssues[issueType] = issue 232 } 233 } 234 235 // AddKnownIssueMessagesMatches adds a known issue 236 func (issueReporter *IssueReporter) AddKnownIssueMessagesMatches(issueType string, source string, messages []string, matches []files.TextMatch) { 237 confirmKnownIssueOrDie(issueType) 238 239 // If this is a new issue, get a new one 240 if issue, ok := issueReporter.PendingIssues[issueType]; !ok { 241 issueReporter.PendingIssues[issueType] = NewKnownIssueMessagesMatches(issueType, source, messages, matches) 242 } else { 243 supportData := SupportData{ 244 Messages: messages, 245 TextMatches: matches, 246 } 247 issue.SupportingData = append(issue.SupportingData, supportData) 248 issueReporter.PendingIssues[issueType] = issue 249 } 250 } 251 252 // DeduplicateSupportingData 253 func DeduplicateSupportingData(dataIn []SupportData) (dataOut []SupportData) { 254 // First deduplicate each individual SupportData element, get a minimal set of file and messages at least in 255 // each one. 256 dataOut = make([]SupportData, len(dataIn)) 257 for index, supportData := range dataIn { 258 dataOut[index] = deduplicateSupportData(supportData) 259 } 260 // TODO: Next deduplicate the SupportData entries that match exactly 261 262 return dataIn 263 } 264 265 // deduplicateSupportData will deduplicate values within a single SupportData 266 func deduplicateSupportData(dataIn SupportData) (dataOut SupportData) { 267 dataOut.RelatedFiles = deduplicateStringSlice(dataIn.RelatedFiles) 268 dataOut.Messages = deduplicateStringSlice(dataIn.Messages) 269 // TODO: deduplicate 270 dataOut.JSONPaths = dataIn.JSONPaths 271 dataOut.TextMatches = dataIn.TextMatches 272 return dataOut 273 } 274 275 func deduplicateStringSlice(sliceIn []string) (sliceOut []string) { 276 if len(sliceIn) <= 1 { 277 copy(sliceOut, sliceIn) 278 } else { 279 tempMap := make(map[string]int) 280 for _, value := range sliceIn { 281 _, ok := tempMap[value] 282 if !ok { 283 tempMap[value] = 0 284 } 285 } 286 sliceOut = make([]string, len(tempMap)) 287 index := 0 288 for key := range tempMap { 289 sliceOut[index] = key 290 index++ 291 } 292 } 293 return sliceOut 294 } 295 296 // The helpers that work with known issue types only support working with those types 297 // If code is supplying an issueType that is not known, that is a coding error and we 298 // panic so that is clear immediately to the developer. 299 func getKnownIssueOrDie(issueType string) (issue Issue) { 300 issue, ok := knownIssues[issueType] 301 if !ok { 302 panic("This helper is used with known issue types only") 303 } 304 return issue 305 } 306 307 func confirmKnownIssueOrDie(issueType string) { 308 _, ok := knownIssues[issueType] 309 if !ok { 310 panic("This helper is used with known issue types only") 311 } 312 } 313 314 // Contribute will contribute issues which have been added to the issue reporter 315 func (issueReporter *IssueReporter) Contribute(log *zap.SugaredLogger, source string) { 316 if len(issueReporter.PendingIssues) == 0 { 317 return 318 } 319 // Contribute the issues all at once 320 ContributeIssuesMap(log, source, issueReporter.PendingIssues) 321 issueReporter.PendingIssues = make(map[string]Issue) 322 } 323 324 // SingleMessage is a helper which is useful when adding a single message to supporting data 325 func SingleMessage(message string) (messages []string) { 326 messages = make([]string, 1) 327 messages[0] = message 328 return messages 329 } 330 331 // GetRelatedPodMessage returns the message for an issue in pod, used for setting supporting data 332 func GetRelatedPodMessage(pod, ns string) string { 333 return "Pod \"" + pod + "\" in namespace \"" + ns + "\"" 334 } 335 336 // GetRelatedServiceMessage returns the message for an issue in a service, used for setting supporting data 337 func GetRelatedServiceMessage(service, ns string) string { 338 return "Service \"" + service + "\" in namespace \"" + ns + "\"" 339 } 340 341 // GetRelatedLogFromPodMessage returns the message to indicate the issue in the pod log, in a given namespace 342 func GetRelatedLogFromPodMessage(podLog string) string { 343 splitStr := strings.Split(podLog, "/") 344 pod := splitStr[len(splitStr)-2] 345 ns := splitStr[len(splitStr)-3] 346 return "Log from pod \"" + pod + "\" in namespace \"" + ns + "\"" 347 } 348 349 // GetRelatedEventMessage returns the message for an event, used for setting supporting data 350 func GetRelatedEventMessage(ns string) string { 351 return "Event(s) in namespace \"" + ns + "\"" 352 } 353 354 // GetRelatedVZResourceMessage returns the message for Verrazzano resource, used for setting supporting data 355 func GetRelatedVZResourceMessage() string { 356 return "Verrazzano custom resource" 357 }