github.com/verrazzano/verrazzano@v1.7.1/tools/vz/pkg/internal/util/cluster/analyzer.go

github.com/verrazzano/verrazzano@v1.7.1/tools/vz/pkg/internal/util/cluster/analyzer.go (about)

     1  // Copyright (c) 2021, 2024, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  // Package cluster handles cluster analysis
     5  package cluster
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"regexp"
    11  
    12  	"github.com/verrazzano/verrazzano/tools/vz/pkg/helpers"
    13  	"github.com/verrazzano/verrazzano/tools/vz/pkg/internal/util/files"
    14  	"github.com/verrazzano/verrazzano/tools/vz/pkg/internal/util/report"
    15  	"go.uber.org/zap"
    16  )
    17  
    18  // TBD: Overall the intention/design is that we could execute analysis in parallel if we want to do that in the
    19  //
    20  //	future. So in general analyzers are independent of each other and thread safe, and not expecting to
    21  //	be executed in a particular order.
    22  //	However, there may be special cases where we want an analysis to be done and information gleaned
    23  //	from that analysis to be available to other analyzers. For example, the analysis of the state
    24  //	of Verrazzano is something that is likely to fall into that category. It will make a high level
    25  //	determination of where in the lifecycle we are at, and other analyzers may need to easily get that
    26  //	information to give better guidance on the issues/actions.
    27  //
    28  //	The current implementation is calling the analyzers serially in order.
    29  //	If we do decide to handle analysis in a parallel fashion later, we likely will need to have some
    30  //	analyzers called deterministically in exact order before we fire off other analyzers in parallel.
    31  //	So we may break this into 2 lists in the future: serial analysis functions, parallel analysis functions
    32  //	Analyzers that may fall into this category should be annotated, with a comment, there currently is only
    33  //	one that may require that.
    34  
    35  // These are the high level analysis functions that are called. The "Runtime Issues" maps to only certificate functions currently.
    36  var clusterAnalysisFunctions = map[string]func(log *zap.SugaredLogger, directory string) (err error){
    37  	"Verrazzano Status":                         AnalyzeVerrazzano, // Execute first, this may share data other analyzers can use
    38  	"Pod Related Issues":                        AnalyzePodIssues,
    39  	"Rancher Status":                            AnalyzeRancher,
    40  	"Runtime Issues":                            AnalyzeCertificateRelatedIssues,
    41  	"Cluster API Issues":                        AnalyzeClusterAPI,
    42  	"Networking Issues":                         AnalyzeNetworkingIssues,
    43  	"Finalizer and Resource Termination Issues": AnalyzeNamespaceRelatedIssues,
    44  	"MySQL Issues":                              AnalyzeMySQLRelatedIssues,
    45  }
    46  
    47  // ClusterDumpDirectoriesRe is used for finding cluster-snapshot directory name matches
    48  var ClusterDumpDirectoriesRe = regexp.MustCompile(`.*/cluster-snapshot$`)
    49  
    50  // LogFilesMatchRe is used for finding pod log files in a cluster dump
    51  var LogFilesMatchRe = regexp.MustCompile(`logs.txt`)
    52  
    53  // PodFilesMatchRe is used for finding pod files in a cluster dump
    54  var PodFilesMatchRe = regexp.MustCompile(`pods.json`)
    55  
    56  // ErrorSearchRe is used for searching for case insensitive "error". This is useful when we know there is a
    57  // problem lurking but we can't identify the specific issue and are trying to capture relevant information
    58  // to include in support data from logs and events
    59  var ErrorSearchRe = regexp.MustCompile(`(?i).*error.*`)
    60  
    61  // WideErrorSearchRe is used for casting a wider net while looking for issues TBD: .*ERROR.*|.*Error.*|.*FAILED.*
    62  var WideErrorSearchRe = regexp.MustCompile(`(?i).*error.*|.*failed.*`)
    63  
    64  // EventReasonFailedRe is used for finding event reason failures
    65  var EventReasonFailedRe = regexp.MustCompile(`.*Failed.*`)
    66  
    67  // RunAnalysis is the main entry analysis function
    68  func RunAnalysis(vzHelper helpers.VZHelper, log *zap.SugaredLogger, rootDirectory string) (err error) {
    69  	log.Debugf("Cluster Analyzer runAnalysis on %s", rootDirectory)
    70  
    71  	clusterRoots, err := files.GetMatchingDirectoryNames(log, rootDirectory, ClusterDumpDirectoriesRe)
    72  	if err != nil {
    73  		log.Debugf("Cluster Analyzer runAnalysis failed examining directories for %s", rootDirectory, err)
    74  		return fmt.Errorf("Cluster Analyzer runAnalysis failed examining directories for %s", rootDirectory)
    75  	}
    76  	if len(clusterRoots) == 0 {
    77  		log.Debugf("Cluster Analyzer runAnalysis didn't find any clusters to analyze for %s", rootDirectory)
    78  		return fmt.Errorf("Cluster Analyzer runAnalysis didn't find any clusters to analyze for %s", rootDirectory)
    79  	}
    80  
    81  	for _, clusterRoot := range clusterRoots {
    82  		// Ignore directories if they don't contain snapshots. Checking if verrazzano-resources.json exists in the dir
    83  		// that implies the directory has the required snapshots.
    84  		vzResourcesPath := files.FormFilePathInClusterRoot(clusterRoot, verrazzanoResource)
    85  		fileInfo, e := os.Stat(vzResourcesPath)
    86  		if e != nil || fileInfo.Size() == 0 {
    87  			log.Debugf("Verrazzano resource file %s is either empty or not there", vzResourcesPath)
    88  			continue
    89  		}
    90  		analyzeCluster(vzHelper, log, clusterRoot)
    91  	}
    92  
    93  	return nil
    94  }
    95  
    96  func analyzeCluster(vzHelper helpers.VZHelper, log *zap.SugaredLogger, clusterRoot string) (err error) {
    97  	log.Debugf("analyzeCluster called for %s", clusterRoot)
    98  	report.AddSourceAnalyzed(clusterRoot)
    99  
   100  	for functionName, function := range clusterAnalysisFunctions {
   101  		err := function(log, clusterRoot)
   102  		if err != nil {
   103  			// Log the error and continue on
   104  			fmt.Fprintf(vzHelper.GetErrorStream(), fmt.Sprintf("Error processing analysis function %s\n", functionName), err)
   105  		}
   106  	}
   107  
   108  	return nil
   109  }