github.com/verrazzano/verrazzano@v1.7.0/tools/vz/pkg/analysis/main_test.go (about)

     1  // Copyright (c) 2021, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  package analysis
     4  
     5  import (
     6  	"fmt"
     7  	"testing"
     8  
     9  	"github.com/stretchr/testify/assert"
    10  	"github.com/verrazzano/verrazzano/tools/vz/pkg/analysis/internal/util/log"
    11  	"github.com/verrazzano/verrazzano/tools/vz/pkg/analysis/internal/util/report"
    12  )
    13  
    14  // TestHandleMain Tests the handleMain function
    15  // GIVEN a call to handleMain
    16  // WHEN with valid/invalid inputs
    17  // THEN exit codes returned are as expected
    18  func TestHandleMain(t *testing.T) {
    19  	// This is setting up the main.logger, do NOT set it as a var here (or you will get a nil reference running
    20  	// the test)
    21  	logger = log.GetDebugEnabledLogger()
    22  	analyzerType = "cluster"
    23  }
    24  
    25  // TestAnalyzeBad Tests the main Analyze function
    26  // GIVEN a call to Analyze
    27  // WHEN with invalid inputs
    28  // THEN errors are generated as expected
    29  func TestExecuteAnalysisBadArgs(t *testing.T) {
    30  	logger := log.GetDebugEnabledLogger()
    31  
    32  	// Call the analyzer with an unknown type, give it a good cluster dump directory
    33  	err := Analyze(logger, "badnamehere", "../test/cluster/image-pull-case1")
    34  	assert.NotNil(t, err)
    35  	// TODO: Check error message is what we expected here
    36  
    37  }
    38  func TestProblemPodsInCattleSystem(t *testing.T) {
    39  	logger := log.GetDebugEnabledLogger()
    40  
    41  	err := Analyze(logger, "cluster", "test/cluster/testCattleSystempods")
    42  	assert.Nil(t, err)
    43  
    44  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
    45  	assert.Nil(t, reportedIssues)
    46  	assert.False(t, len(reportedIssues) > 0)
    47  	problemPodsFound := 0
    48  	for _, issue := range reportedIssues {
    49  		if issue.Type == report.PodProblemsNotReported {
    50  			problemPodsFound++
    51  		}
    52  
    53  	}
    54  	assert.True(t, problemPodsFound == 0)
    55  }
    56  
    57  // TestImagePullCase1 Tests that analysis of a cluster dump with image pull issues is handled
    58  // GIVEN a call to analyze a cluster-snapshot
    59  // WHEN the cluster-snapshot shows image pull issues
    60  // THEN a report is generated with image pull issues identified
    61  func TestImagePull(t *testing.T) {
    62  	logger := log.GetDebugEnabledLogger()
    63  
    64  	report.ClearReports()
    65  	err := Analyze(logger, "cluster", "test/cluster/image-pull-case1")
    66  	assert.Nil(t, err)
    67  
    68  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
    69  	assert.NotNil(t, reportedIssues)
    70  	assert.True(t, len(reportedIssues) > 0)
    71  	imagePullsFound := 0
    72  	for _, issue := range reportedIssues {
    73  		if issue.Type == report.ImagePullNotFound {
    74  			imagePullsFound++
    75  		}
    76  	}
    77  	assert.True(t, imagePullsFound > 0)
    78  }
    79  
    80  // TestInsufficientMemory Tests that analysis of a cluster dump with pods that failed due to insufficient memory
    81  // GIVEN a call to analyze a cluster-snapshot
    82  // WHEN the cluster-snapshot shows pods with insufficient memory problems
    83  // THEN a report is generated with issues identified
    84  func TestInsufficientMemory(t *testing.T) {
    85  	logger := log.GetDebugEnabledLogger()
    86  
    87  	report.ClearReports()
    88  	err := Analyze(logger, "cluster", "test/cluster/insufficient-mem")
    89  	assert.Nil(t, err)
    90  
    91  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
    92  	assert.NotNil(t, reportedIssues)
    93  	assert.True(t, len(reportedIssues) > 0)
    94  	issuesFound := 0
    95  	for _, issue := range reportedIssues {
    96  		if issue.Type == report.InsufficientMemory {
    97  			issuesFound++
    98  		}
    99  	}
   100  	assert.True(t, issuesFound > 0)
   101  }
   102  
   103  // TestProblemPodsNotReportedUninstall Tests that analysis of a cluster dump with pods that have unknown issues during
   104  // uninstall, is handled
   105  // GIVEN a call to analyze a cluster-snapshot
   106  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   107  // THEN a report is generated with problem pod issues identified
   108  func TestProblemPodsNotReportedUninstall(t *testing.T) {
   109  	logger := log.GetDebugEnabledLogger()
   110  
   111  	report.ClearReports()
   112  	err := Analyze(logger, "cluster", "test/cluster/problem-pods")
   113  	assert.Nil(t, err)
   114  
   115  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   116  	assert.NotNil(t, reportedIssues)
   117  	assert.True(t, len(reportedIssues) > 0)
   118  	problemPodsFound := 0
   119  	for _, issue := range reportedIssues {
   120  		if issue.Type == report.PodProblemsNotReported {
   121  			problemPodsFound++
   122  		}
   123  	}
   124  	assert.True(t, problemPodsFound > 0)
   125  }
   126  
   127  // TestProblemPodsNotReportedInstall Tests that analysis of a cluster dump with pods that have unknown issues during
   128  // install, is handled
   129  // GIVEN a call to analyze a cluster-snapshot
   130  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   131  // THEN a report is generated with problem pod issues identified
   132  func TestProblemPodsNotReportedInstall(t *testing.T) {
   133  	logger := log.GetDebugEnabledLogger()
   134  
   135  	report.ClearReports()
   136  	err := Analyze(logger, "cluster", "test/cluster/problem-pods-install")
   137  	assert.Nil(t, err)
   138  
   139  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   140  	assert.NotNil(t, reportedIssues)
   141  	assert.True(t, len(reportedIssues) > 0)
   142  
   143  	exceededLBLimit := 0
   144  	for _, issue := range reportedIssues {
   145  		if issue.Type == report.IngressLBLimitExceeded {
   146  			exceededLBLimit++
   147  		}
   148  
   149  	}
   150  	assert.True(t, exceededLBLimit > 0)
   151  }
   152  
   153  // TestLBIpNotSet Tests that analysis of a cluster dump where LB issue occurred with no IP set is handled
   154  // GIVEN a call to analyze a cluster-snapshot
   155  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   156  // THEN a report is generated with problem pod issues identified
   157  // Note: With the latest changes to platform operator and analysis tool, the issue is reported differently.
   158  // Commenting the test for now, and added a new test TestLBIpNotFound
   159  //func TestLBIpNotSet(t *testing.T) {
   160  //	logger := log.GetDebugEnabledLogger()
   161  
   162  //	err := Analyze(logger, "cluster", "test/cluster/lb-ipnotset")
   163  //	assert.Nil(t, err)
   164  
   165  //	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   166  //	assert.NotNil(t, reportedIssues)
   167  //	assert.True(t, len(reportedIssues) > 0)
   168  //	problemsFound := 0
   169  //	for _, issue := range reportedIssues {
   170  //		if issue.Type == report.IngressNoLoadBalancerIP {
   171  //			problemsFound++
   172  //		}
   173  //	}
   174  //	assert.True(t, problemsFound > 0)
   175  //}
   176  
   177  // TestLBIpNotFound Tests that analysis of a cluster dump where no IP was found for load balancer
   178  // GIVEN a call to analyze a cluster-snapshot
   179  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   180  // THEN a report is generated with problem pod issues identified
   181  func TestLBIpNotFound(t *testing.T) {
   182  	logger := log.GetDebugEnabledLogger()
   183  
   184  	report.ClearReports()
   185  	err := Analyze(logger, "cluster", "test/cluster/ingress-ip-not-found")
   186  	assert.Nil(t, err)
   187  
   188  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   189  	assert.NotNil(t, reportedIssues)
   190  	assert.True(t, len(reportedIssues) > 0)
   191  	problemsFound := 0
   192  	for _, issue := range reportedIssues {
   193  		if issue.Type == report.IngressNoIPFound {
   194  			problemsFound++
   195  		}
   196  	}
   197  	assert.True(t, problemsFound > 0)
   198  }
   199  
   200  // TestIstioLBIpNotFound Tests that analysis of a cluster dump where no Istio Gateway IP was found
   201  // GIVEN a call to analyze a cluster-snapshot
   202  // WHEN the cluster-snapshot shows services with external IP problems
   203  // THEN a report is generated with issues identified
   204  func TestIstioLBIpNotFound(t *testing.T) {
   205  	logger := log.GetDebugEnabledLogger()
   206  
   207  	report.ClearReports()
   208  	err := Analyze(logger, "cluster", "test/cluster/istio-ingress-ip-not-found")
   209  	assert.Nil(t, err)
   210  
   211  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   212  	assert.NotNil(t, reportedIssues)
   213  	assert.True(t, len(reportedIssues) > 0)
   214  	problemsFound := 0
   215  	for _, issue := range reportedIssues {
   216  		if issue.Type == report.IstioIngressNoIP {
   217  			problemsFound++
   218  		}
   219  	}
   220  	assert.True(t, problemsFound > 0)
   221  }
   222  
   223  // TODO: Enable this test once there is a cluster dump for this use case
   224  // TestIngressInstall Tests that analysis of a cluster dump where Ingress install failed without more info handled
   225  // GIVEN a call to analyze a cluster-snapshot
   226  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   227  // THEN a report is generated with problem pod issues identified
   228  // func TestIngressInstall(t *testing.T) {
   229  //	logger := log.GetDebugEnabledLogger()
   230  
   231  //	err := Analyze(logger, "cluster", "test/cluster/ingress-install-unknown")
   232  //	assert.Nil(t, err)
   233  
   234  //	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   235  //	assert.NotNil(t, reportedIssues)
   236  //	assert.True(t, len(reportedIssues) > 0)
   237  //	problemsFound := 0
   238  //	for _, issue := range reportedIssues {
   239  //		if issue.Type == report.IngressInstallFailure {
   240  //			problemsFound++
   241  //		}
   242  //	}
   243  //	assert.True(t, problemsFound > 0)
   244  //}
   245  
   246  // TestLBLimitExceeded Test that analysis of a cluster dump where Ingress install failed due to LoadBalancer service limit handled
   247  // GIVEN a call to analyze a cluster-snapshot
   248  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   249  // THEN a report is generated with problem pod issues identified
   250  func TestLBLimitExceeded(t *testing.T) {
   251  	logger := log.GetDebugEnabledLogger()
   252  
   253  	report.ClearReports()
   254  	err := Analyze(logger, "cluster", "test/cluster/ingress-lb-limit")
   255  	assert.Nil(t, err)
   256  
   257  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   258  	assert.NotNil(t, reportedIssues)
   259  	assert.True(t, len(reportedIssues) > 0)
   260  	problemsFound := 0
   261  	for _, issue := range reportedIssues {
   262  		if issue.Type == report.IngressLBLimitExceeded {
   263  			problemsFound++
   264  		}
   265  	}
   266  	assert.True(t, problemsFound > 0)
   267  }
   268  
   269  // TestOciIPLimitExceeded Tests that analysis of a cluster dump where Ingress install failed due to OCI limit handled
   270  // GIVEN a call to analyze a cluster-snapshot
   271  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   272  // THEN a report is generated with problem pod issues identified
   273  func TestOciIPLimitExceeded(t *testing.T) {
   274  	logger := log.GetDebugEnabledLogger()
   275  
   276  	report.ClearReports()
   277  	err := Analyze(logger, "cluster", "test/cluster/ingress-oci-limit")
   278  	assert.Nil(t, err)
   279  
   280  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   281  	assert.NotNil(t, reportedIssues)
   282  	assert.True(t, len(reportedIssues) > 0)
   283  	problemsFound := 0
   284  	for _, issue := range reportedIssues {
   285  		if issue.Type == report.IngressOciIPLimitExceeded {
   286  			problemsFound++
   287  		}
   288  	}
   289  	assert.True(t, problemsFound > 0)
   290  }
   291  
   292  // TestOciLBInvalidShape Tests that analysis of a cluster dump where an invalid shape specified for OCI load balancer
   293  // GIVEN a call to analyze a cluster-snapshot
   294  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   295  // THEN a report is generated with problem pod issues identified
   296  func TestOciLBInvalidShape(t *testing.T) {
   297  	logger := log.GetDebugEnabledLogger()
   298  
   299  	report.ClearReports()
   300  	err := Analyze(logger, "cluster", "test/cluster/ingress-invalid-shape")
   301  	assert.Nil(t, err)
   302  
   303  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   304  	assert.NotNil(t, reportedIssues)
   305  	assert.True(t, len(reportedIssues) > 0)
   306  	problemsFound := 0
   307  	for _, issue := range reportedIssues {
   308  		if issue.Type == report.IngressShapeInvalid {
   309  			problemsFound++
   310  		}
   311  	}
   312  	assert.True(t, problemsFound > 0)
   313  }
   314  
   315  // TestPendingPods that analysis of a cluster dump where pending pods only is handled
   316  // GIVEN a call to analyze a cluster-snapshot
   317  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   318  // THEN a report is generated with problem pod issues identified
   319  func TestPendingPods(t *testing.T) {
   320  	logger := log.GetDebugEnabledLogger()
   321  
   322  	report.ClearReports()
   323  	err := Analyze(logger, "cluster", "test/cluster/pending-pods")
   324  	assert.Nil(t, err)
   325  
   326  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   327  	assert.NotNil(t, reportedIssues)
   328  	assert.True(t, len(reportedIssues) > 0)
   329  	problemsFound := 0
   330  	for _, issue := range reportedIssues {
   331  		if issue.Type == report.PendingPods {
   332  			problemsFound++
   333  		}
   334  	}
   335  	assert.True(t, problemsFound > 0)
   336  }
   337  
   338  // TestUnknownInstall Tests that analysis of a cluster dump where install failed without more info handled
   339  // GIVEN a call to analyze a cluster-snapshot
   340  // WHEN the cluster-snapshot shows pods with problems that are not known issues
   341  // THEN a report is generated with problem pod issues identified
   342  // Commenting this test as there might not be an install issue like this now.
   343  //func TestUnknownInstall(t *testing.T) {
   344  //	logger := log.GetDebugEnabledLogger()
   345  
   346  //	err := Analyze(logger, "cluster", "test/cluster/install-unknown")
   347  //	assert.Nil(t, err)
   348  
   349  //	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   350  //	assert.NotNil(t, reportedIssues)
   351  //	assert.True(t, len(reportedIssues) > 0)
   352  //	problemsFound := 0
   353  //	for _, issue := range reportedIssues {
   354  //		if issue.Type == report.InstallFailure {
   355  //			problemsFound++
   356  //		}
   357  //	}
   358  //	assert.True(t, problemsFound > 0)
   359  //}
   360  
   361  // TestIstioIngressInstallFailure Tests that analysis of a cluster dump when IstioIngressLoadBalancer was not created
   362  // GIVEN a call to analyze a cluster-snapshot
   363  // WHEN the cluster-snapshot shows private subnet not allowed in public LB.
   364  // THEN a report is generated with issues identified
   365  func TestIstioIngressInstallFailure(t *testing.T) {
   366  	logger := log.GetDebugEnabledLogger()
   367  
   368  	report.ClearReports()
   369  	err := Analyze(logger, "cluster", "test/cluster/istio-loadbalancer-creation-issue")
   370  	assert.Nil(t, err)
   371  
   372  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   373  	assert.NotNil(t, reportedIssues)
   374  	assert.True(t, len(reportedIssues) > 0)
   375  	problemsFound := 0
   376  	for _, issue := range reportedIssues {
   377  		if issue.Type == report.IstioIngressPrivateSubnet {
   378  			problemsFound++
   379  		}
   380  	}
   381  	assert.True(t, problemsFound > 0)
   382  }
   383  
   384  // TestComponentsNotReadyNoErrorMsg Tests that analysis of a cluster dump where there are failed components with no error message in the VPO logs
   385  // GIVEN a call to analyze a cluster-snapshot
   386  // WHEN the cluster-snapshot shows that there is install failure with no known root cause
   387  // THEN a report is generated with supporting messages from the events related to those failed components' pods
   388  func TestComponentsNotReadyNoErrorMsg(t *testing.T) {
   389  	logger := log.GetDebugEnabledLogger()
   390  
   391  	report.ClearReports()
   392  	err := Analyze(logger, "cluster", "test/cluster/components-not-ready")
   393  	assert.Nil(t, err)
   394  
   395  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   396  	assert.NotNil(t, reportedIssues)
   397  	assert.True(t, len(reportedIssues) > 0)
   398  
   399  	problemsFound := 0
   400  	for _, issue := range reportedIssues {
   401  		if issue.Type == report.InstallFailure {
   402  			problemsFound++
   403  			// Two supporting messages are always included. Rest should come from events related to failed components
   404  			assert.True(t, len(issue.SupportingData[0].Messages) > 2)
   405  		}
   406  	}
   407  	assert.True(t, problemsFound > 0)
   408  }
   409  
   410  // TestExternalDNSConfigurationIssue Tests that analysis of a cluster dump when dns(oci,custom dns) was not configured
   411  // GIVEN a call to analyze a cluster-snapshot
   412  // WHEN the cluster-snapshot shows private subnet not allowed in public LB.
   413  // THEN a report is generated with issues identified
   414  func TestExternalDNSConfigurationIssue(t *testing.T) {
   415  	logger := log.GetDebugEnabledLogger()
   416  
   417  	report.ClearReports()
   418  	err := Analyze(logger, "cluster", "test/cluster/external-dns-issue")
   419  	assert.Nil(t, err)
   420  
   421  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   422  	assert.NotNil(t, reportedIssues)
   423  	assert.True(t, len(reportedIssues) > 0)
   424  	problemsFound := 0
   425  	for _, issue := range reportedIssues {
   426  		if issue.Type == report.ExternalDNSConfigureIssue {
   427  			problemsFound++
   428  		}
   429  	}
   430  	assert.True(t, problemsFound > 0)
   431  }
   432  
   433  // TestResourceJSONWithVerrazzanoFormat Tests that analysis of a cluster dump
   434  // when there is an install failure and the verrazzano-resource.json contains Verrazzano type instead of VerrazzanoList type
   435  // GIVEN a call to analyze a cluster-snapshot
   436  // WHEN the cluster-snapshot shows components not in ready state
   437  // THEN a report is generated with install failure
   438  func TestResourceJSONWithVerrazzanoFormat(t *testing.T) {
   439  	logger := log.GetDebugEnabledLogger()
   440  
   441  	report.ClearReports()
   442  	err := Analyze(logger, "cluster", "test/cluster/install-failure-verrazzano-format-json")
   443  	assert.Nil(t, err)
   444  
   445  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   446  	assert.NotNil(t, reportedIssues)
   447  	assert.True(t, len(reportedIssues) > 0)
   448  	problemsFound := 0
   449  	for _, issue := range reportedIssues {
   450  		fmt.Println(issue)
   451  		if issue.Type == report.InstallFailure {
   452  			problemsFound++
   453  		}
   454  	}
   455  	assert.True(t, problemsFound > 0)
   456  }
   457  
   458  // TestKeycloakDataMigrationFailure tests that analysis of a cluster dump when keycloak data migration during upgrade has failed
   459  // GIVEN a call to analyze a cluster-snapshot
   460  // WHEN the cluster-snapshot data load job failure
   461  // THEN a report is generated with issues identified
   462  func TestKeycloakDataMigrationFailure(t *testing.T) {
   463  	logger := log.GetDebugEnabledLogger()
   464  
   465  	report.ClearReports()
   466  	err := Analyze(logger, "cluster", "test/cluster/keycloak-data-migration-failure")
   467  	assert.Nil(t, err)
   468  
   469  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   470  	assert.NotNil(t, reportedIssues)
   471  	assert.True(t, len(reportedIssues) > 0)
   472  	problemsFound := 0
   473  	for _, issue := range reportedIssues {
   474  		if issue.Type == report.KeycloakDataMigrationFailure {
   475  			problemsFound++
   476  		}
   477  	}
   478  	assert.True(t, problemsFound > 0)
   479  }
   480  
   481  // TestCertificateVZClientHangingIssue tests analysis of a cluster dump when the VZ Client is hanging
   482  // GIVEN a call to analyze a cluster-snapshot
   483  // WHEN the VZ Client is hanging on a certificate, but the certificate is not expired
   484  // THEN a report is generated with issues identified
   485  // This test also tests for detecting a separate expired certificate in the certificates.json
   486  func TestCertificateVZClientHangingIssue(t *testing.T) {
   487  	logger := log.GetDebugEnabledLogger()
   488  
   489  	report.ClearReports()
   490  	err := Analyze(logger, "cluster", "test/cluster/testCLIHangingIssue")
   491  	assert.Nil(t, err)
   492  
   493  	reportedIssues := report.GetAllSourcesFilteredIssues(logger, true, 0, 0)
   494  	assert.NotNil(t, reportedIssues)
   495  	assert.True(t, len(reportedIssues) > 0)
   496  	problemsFound := 0
   497  	for _, issue := range reportedIssues {
   498  		if issue.Type == report.VZClientHangingIssueDueToLongCertificateApproval {
   499  			problemsFound++
   500  		}
   501  		if issue.Type == report.CertificateExpired {
   502  			problemsFound++
   503  		}
   504  	}
   505  	assert.True(t, problemsFound == 2)
   506  }