github.com/verrazzano/verrazzano@v1.7.1/tests/e2e/logging/system/system_logging_test.go (about)

     1  // Copyright (c) 2022, 2023, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package system
     5  
     6  import (
     7  	"encoding/json"
     8  	"fmt"
     9  	"regexp"
    10  	"strings"
    11  	"time"
    12  
    13  	. "github.com/onsi/ginkgo/v2"
    14  	. "github.com/onsi/gomega"
    15  	"github.com/verrazzano/verrazzano/pkg/constants"
    16  	"github.com/verrazzano/verrazzano/pkg/k8sutil"
    17  	"github.com/verrazzano/verrazzano/pkg/log/vzlog"
    18  	"github.com/verrazzano/verrazzano/pkg/nginxutil"
    19  	"github.com/verrazzano/verrazzano/pkg/vzcr"
    20  	"github.com/verrazzano/verrazzano/tests/e2e/pkg"
    21  	dump "github.com/verrazzano/verrazzano/tests/e2e/pkg/test/clusterdump"
    22  	"github.com/verrazzano/verrazzano/tests/e2e/pkg/test/framework"
    23  )
    24  
    25  const (
    26  	systemNamespace           = constants.VerrazzanoSystemNamespace
    27  	installNamespace          = constants.VerrazzanoInstallNamespace
    28  	capiNamespace             = constants.VerrazzanoCAPINamespace
    29  	certMgrNamespace          = constants.CertManagerNamespace
    30  	keycloakNamespace         = constants.KeycloakNamespace
    31  	cattleSystemNamespace     = constants.RancherSystemNamespace
    32  	fleetLocalSystemNamespace = "cattle-fleet-local-system"
    33  	monitoringNamespace       = "monitoring"
    34  	shortPollingInterval      = 10 * time.Second
    35  	shortWaitTimeout          = 5 * time.Minute
    36  	searchTimeWindow          = "1h"
    37  	fleetLocalSystemIndex     = "verrazzano-namespace-cattle-fleet-local-system"
    38  )
    39  
    40  var (
    41  	noExceptions    []*regexp.Regexp
    42  	istioExceptions = []*regexp.Regexp{
    43  		regexp.MustCompile(`^-A .*$`),
    44  		regexp.MustCompile(`^-N .*$`),
    45  		regexp.MustCompile(`^:\w+? -.*$`),
    46  		regexp.MustCompile(`^:\w+? ACCEPT.*$`),
    47  		regexp.MustCompile(`^\w+?=.*$`),
    48  		regexp.MustCompile(`^COMMIT.*$`),
    49  		regexp.MustCompile(`^ {0,4}\w+:.*$`),
    50  		regexp.MustCompile(`^:.*$`),
    51  		regexp.MustCompile(`^\* ?nat.*$`),
    52  		regexp.MustCompile(`^# Generated by.*$`),
    53  		regexp.MustCompile(`^# Completed on.*$`),
    54  		regexp.MustCompile(`^Writing following contents to rules file:.*$`),
    55  		regexp.MustCompile(`^ip\w?tables.*$`),
    56  		regexp.MustCompile(`^-+$`),
    57  		regexp.MustCompile(`^$`),
    58  	}
    59  	jaegerExceptions = []*regexp.Regexp{
    60  		regexp.MustCompile(`^.*http: TLS handshake error.*$`),
    61  		regexp.MustCompile(`^.*GOMAXPROCS.*$`),
    62  	}
    63  )
    64  
    65  var ingressNGINXNamespace string
    66  
    67  var t = framework.NewTestFramework("system-logging")
    68  
    69  var beforeSuite = t.BeforeSuiteFunc(func() {
    70  	var err error
    71  	ingressNGINXNamespace, err = nginxutil.DetermineNamespaceForIngressNGINX(vzlog.DefaultLogger())
    72  	if err != nil {
    73  		Fail("Error determining ingress-nginx namespace")
    74  	}
    75  
    76  })
    77  var _ = BeforeSuite(beforeSuite)
    78  var failed = false
    79  var _ = t.AfterEach(func() {
    80  	failed = failed || CurrentSpecReport().Failed()
    81  })
    82  
    83  var afterSuite = t.AfterSuiteFunc(func() {
    84  	if failed {
    85  		dump.ExecuteBugReport()
    86  	}
    87  })
    88  
    89  var _ = AfterSuite(afterSuite)
    90  
    91  var _ = t.Describe("Opensearch system component data", Label("f:observability.logging.es"), func() {
    92  	t.It("contains verrazzano-system index with valid records", func() {
    93  		// GIVEN existing system logs
    94  		// WHEN the Opensearch index for the verrazzano-system namespace is retrieved
    95  		// THEN verify that it is found
    96  		indexName, err := pkg.GetOpenSearchSystemIndex(systemNamespace)
    97  		Expect(err).To(BeNil())
    98  		Eventually(func() bool {
    99  			return pkg.LogIndexFound(indexName)
   100  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index verrazzano-system")
   101  
   102  		valid := true
   103  		valid = validateAuthProxyLogs() && valid
   104  		valid = validateCoherenceLogs() && valid
   105  		valid = validateOAMLogs() && valid
   106  		valid = validateIstioProxyLogs() && valid
   107  		valid = validateKialiLogs() && valid
   108  		valid = validatePrometheusLogs() && valid
   109  		valid = validatePrometheusConfigReloaderLogs() && valid
   110  		valid = validateGrafanaLogs() && valid
   111  		valid = validateOpenSearchLogs() && valid
   112  		valid = validateWeblogicOperatorLogs() && valid
   113  		kubeConfigPath, err := k8sutil.GetKubeConfigLocation()
   114  		Expect(err).To(BeNil())
   115  		isJaegerSupported, err := pkg.IsVerrazzanoMinVersion("1.4.0", kubeConfigPath)
   116  		Expect(err).To(BeNil())
   117  		if isJaegerSupported {
   118  			valid = validateJaegerCollectorLogs() && valid
   119  			valid = validateJaegerQueryLogs() && valid
   120  		}
   121  		if !valid {
   122  			// Don't fail for invalid logs until this is stable.
   123  			t.Logs.Info("Found problems with log records in verrazzano-system index")
   124  		}
   125  	})
   126  
   127  	t.It("contains valid verrazzano-install index with valid records", func() {
   128  		// GIVEN existing system logs
   129  		// WHEN the Opensearch index for the verrazzano-install namespace is retrieved
   130  		// THEN verify that it is found
   131  		indexName, err := pkg.GetOpenSearchSystemIndex(installNamespace)
   132  		Expect(err).To(BeNil())
   133  		Eventually(func() bool {
   134  			return pkg.LogIndexFound(indexName)
   135  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index verrazzano-install")
   136  
   137  		// GIVEN Log message in Opensearch in the verrazzano-namespace-verrazzano-install index
   138  		// With field kubernetes.labels.app.keyword==verrazzano-platform-operator
   139  		// WHEN Log messages are retrieved from Opensearch
   140  		// THEN Verify there are valid log records
   141  		valid := true
   142  		valid = validateVPOLogs() && valid
   143  		if !valid {
   144  			// Don't fail for invalid logs until this is stable.
   145  			t.Logs.Info("Found problems with log records in verrazzano-install index")
   146  		}
   147  	})
   148  
   149  	t.It("contains valid verrazzano-system index with valid records", func() {
   150  		// GIVEN existing system logs
   151  		// WHEN the Opensearch index for the verrazzano-system namespace is retrieved
   152  		// THEN verify that it is found
   153  		indexName, err := pkg.GetOpenSearchSystemIndex(systemNamespace)
   154  		Expect(err).To(BeNil())
   155  		Eventually(func() bool {
   156  			return pkg.LogIndexFound(indexName)
   157  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index verrazzano-system")
   158  
   159  		// GIVEN Log message in Opensearch in the verrazzano-namespace-verrazzano-system index
   160  		// With field
   161  		//  kubernetes.labels.app.keyword==verrazzano-application-operator,
   162  		//  kubernetes.labels.app.keyword==verrazzano-monitoring-operator,
   163  		// WHEN Log messages are retrieved from Opensearch
   164  		// THEN Verify there are valid log records
   165  		if !validateVAOLogs() {
   166  			// Don't fail for invalid logs until this is stable.
   167  			t.Logs.Info("Found problems with Verrazzano Application Operator log records in verrazzano-system index")
   168  		}
   169  		if !validateVMOLogs() {
   170  			// Don't fail for invalid logs until this is stable.
   171  			t.Logs.Info("Found problems with Verrazzano Monitoring Operator log records in verrazzano-system index")
   172  		}
   173  	})
   174  
   175  	t.It("contains cert-manager index with valid records", func() {
   176  		// GIVEN existing system logs
   177  		// WHEN the Opensearch index for the cert-manager namespace is retrieved
   178  		// THEN verify that it is found
   179  
   180  		indexName, err := pkg.GetOpenSearchSystemIndex(certMgrNamespace)
   181  		Expect(err).To(BeNil())
   182  		Eventually(func() bool {
   183  			return pkg.LogIndexFound(indexName)
   184  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index cert-manager")
   185  
   186  		valid := true
   187  		valid = validateCertManagerLogs() && valid
   188  
   189  		dnsPodExist, err := pkg.DoesPodExist("cert-manager", "external-dns")
   190  		if err != nil {
   191  			dnsPodExist = false
   192  			t.Logs.Infof("Error calling DoesPodExist for external-dns: %s", err)
   193  		}
   194  		if dnsPodExist {
   195  			valid = validateExternalDNSLogs() && valid
   196  		}
   197  
   198  		if !valid {
   199  			// Don't fail for invalid logs until this is stable.
   200  			t.Logs.Info("Found problems with log records in cert-manager index")
   201  		}
   202  	})
   203  
   204  	t.It("contains valid Keycloak index with valid records", func() {
   205  		// GIVEN existing system logs
   206  		// WHEN the Opensearch index for the Keycloak namespace is retrieved
   207  		// THEN verify that it is found
   208  		indexName, err := pkg.GetOpenSearchSystemIndex(keycloakNamespace)
   209  		Expect(err).To(BeNil())
   210  		Eventually(func() bool {
   211  			return pkg.LogIndexFound(indexName)
   212  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index verrazzano-namepace-keycloak")
   213  
   214  		// GIVEN Log message in Opensearch in the verrazzano-namespace-keycloak index
   215  		// With field kubernetes.labels.app.kubernetes.io/name=keycloak
   216  		// WHEN Log messages are retrieved from Opensearch
   217  		// THEN Verify there are valid log records
   218  		valid := true
   219  		valid = validateKeycloakLogs() && valid
   220  		valid = validateKeycloakMySQLLogs() && valid
   221  		if !valid {
   222  			// Don't fail for invalid logs until this is stable.
   223  			t.Logs.Info("Found problems with log records in Keycloak index")
   224  		}
   225  	})
   226  
   227  	t.It("contains ingress-nginx index with valid records", func() {
   228  		// GIVEN existing system logs
   229  		// WHEN the index for the ingress-nginx namespace is retrieved
   230  		// THEN verify that it is found
   231  		indexName, err := pkg.GetOpenSearchSystemIndex(ingressNGINXNamespace)
   232  		Expect(err).To(BeNil())
   233  		Eventually(func() bool {
   234  			return pkg.LogIndexFound(indexName)
   235  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find NGINX index ingress-nginx")
   236  
   237  		valid := true
   238  		valid = validateIngressNginxLogs() && valid
   239  		if !valid {
   240  			// Don't fail for invalid logs until this is stable.
   241  			t.Logs.Info("Found problems with log records in ingress-nginx index")
   242  		}
   243  	})
   244  
   245  	t.It("contains cattle-system index with valid records", func() {
   246  		// GIVEN existing system logs
   247  		// WHEN the Opensearch index for the cattle-system namespace is retrieved
   248  		// THEN verify that it is found
   249  		indexName, err := pkg.GetOpenSearchSystemIndex(cattleSystemNamespace)
   250  		Expect(err).To(BeNil())
   251  		Eventually(func() bool {
   252  			return pkg.LogIndexFound(indexName)
   253  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index cattle-system")
   254  
   255  		valid := true
   256  		valid = validateRancherLogs() && valid
   257  		valid = validateRancherWebhookLogs() && valid
   258  		if !valid {
   259  			// Don't fail for invalid logs until this is stable.
   260  			t.Logs.Info("Found problems with log records in cattle-system index")
   261  		}
   262  	})
   263  
   264  	t.It("contains cattle-fleet-local-system index with valid records", func() {
   265  		// GIVEN existing system logs
   266  		// WHEN the Opensearch index for the cattle-fleet-system namespace is retrieved
   267  		// THEN verify that it is found
   268  		indexName, err := pkg.GetOpenSearchSystemIndex(fleetLocalSystemIndex)
   269  		Expect(err).To(BeNil())
   270  		Eventually(func() bool {
   271  			return pkg.LogIndexFound(indexName)
   272  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index cattle-fleet-local-system")
   273  
   274  		if !validateFleetSystemLogs() {
   275  			// Don't fail for invalid logs until this is stable.
   276  			t.Logs.Info("Found problems with log records in cattle-fleet-local-system index")
   277  		}
   278  	})
   279  
   280  	t.It("contains cattle-fleet-local-system index with valid records", func() {
   281  		// GIVEN existing system logs
   282  		// WHEN the Opensearch index for the cattle-fleet-local-system namespace is retrieved
   283  		// THEN verify that it is found
   284  		indexName, err := pkg.GetOpenSearchSystemIndex(fleetLocalSystemNamespace)
   285  		Expect(err).To(BeNil())
   286  		Eventually(func() bool {
   287  			return pkg.LogIndexFound(indexName)
   288  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index cattle-fleet-local-system")
   289  
   290  		if !validateFleetSystemLogs() {
   291  			// Don't fail for invalid logs until this is stable.
   292  			t.Logs.Info("Found problems with log records in cattle-fleet-local-system index")
   293  		}
   294  	})
   295  
   296  	t.It("contains capi index with valid records", func() {
   297  		// Only run test if clusterAPI is enabled
   298  		vz, err := pkg.GetVerrazzanoV1beta1()
   299  		Expect(err).To(Not(HaveOccurred()))
   300  		if vzcr.IsClusterAPIEnabled(vz) {
   301  			// GIVEN existing system logs
   302  			// WHEN the Opensearch index for the verrazzano-capi namespace is retrieved
   303  			// THEN verify that it is found
   304  			indexName, err := pkg.GetOpenSearchSystemIndex(capiNamespace)
   305  			Expect(err).To(BeNil())
   306  			Eventually(func() bool {
   307  				return pkg.LogIndexFound(indexName)
   308  			}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), fmt.Sprintf("Expected to find Opensearch index %s", capiNamespace))
   309  
   310  			if !validateCapiSystemLogs() {
   311  				// Don't fail for invalid logs until this is stable.
   312  				t.Logs.Info(fmt.Sprintf("Found problems with log records in %s index", capiNamespace))
   313  			}
   314  		}
   315  	})
   316  
   317  	t.It("contains monitoring index with valid records", func() {
   318  		// GIVEN existing system logs
   319  		// WHEN the Opensearch index for the monitoring namespace is retrieved
   320  		// THEN verify that it is found
   321  		indexName, err := pkg.GetOpenSearchSystemIndex(monitoringNamespace)
   322  		Expect(err).To(BeNil())
   323  		Eventually(func() bool {
   324  			return pkg.LogIndexFound(indexName)
   325  		}, shortWaitTimeout, shortPollingInterval).Should(BeTrue(), "Expected to find Opensearch index monitoring")
   326  
   327  		if !validateNodeExporterLogs() {
   328  			// Don't fail for invalid logs until this is stable.
   329  			t.Logs.Info("Found problems with log records in monitoring index")
   330  		}
   331  	})
   332  })
   333  
   334  func validateAuthProxyLogs() bool {
   335  	exceptions := []*regexp.Regexp{
   336  		regexp.MustCompile(`^Adding local CA cert to .*$`),
   337  		regexp.MustCompile(`^Detected Nginx Configuration Change$`),
   338  	}
   339  	exceptions = append(exceptions, istioExceptions...)
   340  	return validateOpensearchRecords(
   341  		noLevelOpensearchRecordValidator,
   342  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   343  		"kubernetes.labels.app.keyword",
   344  		"verrazzano-authproxy",
   345  		searchTimeWindow,
   346  		exceptions)
   347  }
   348  
   349  func validateCoherenceLogs() bool {
   350  	return validateOpensearchRecords(
   351  		allOpensearchRecordValidator,
   352  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   353  		"kubernetes.labels.app_kubernetes_io/name.keyword",
   354  		"coherence-operator",
   355  		searchTimeWindow,
   356  		noExceptions)
   357  }
   358  
   359  func validateOAMLogs() bool {
   360  	return validateOpensearchRecords(
   361  		allOpensearchRecordValidator,
   362  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   363  		"kubernetes.labels.app_kubernetes_io/name.keyword",
   364  		"oam-kubernetes-runtime",
   365  		searchTimeWindow,
   366  		noExceptions)
   367  }
   368  
   369  // message:configPath: ./etc/istio/proxy
   370  func validateIstioProxyLogs() bool {
   371  	return validateOpensearchRecords(
   372  		allOpensearchRecordValidator,
   373  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   374  		"kubernetes.container_name",
   375  		"istio-proxy",
   376  		searchTimeWindow,
   377  		istioExceptions)
   378  }
   379  
   380  func validateKialiLogs() bool {
   381  	return validateOpensearchRecords(
   382  		allOpensearchRecordValidator,
   383  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   384  		"kubernetes.labels.app_kubernetes_io/part-of",
   385  		"kiali",
   386  		searchTimeWindow,
   387  		istioExceptions)
   388  }
   389  
   390  func validateVPOLogs() bool {
   391  	return validateOpensearchRecords(
   392  		allOpensearchRecordValidator,
   393  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(installNamespace) },
   394  		"kubernetes.labels.app.keyword",
   395  		"verrazzano-platform-operator",
   396  		searchTimeWindow,
   397  		noExceptions)
   398  }
   399  
   400  func validateVAOLogs() bool {
   401  	return validateOpensearchRecords(
   402  		allOpensearchRecordValidator,
   403  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   404  		"kubernetes.labels.app.keyword",
   405  		"verrazzano-application-operator",
   406  		searchTimeWindow,
   407  		noExceptions)
   408  }
   409  
   410  func validateVMOLogs() bool {
   411  	return validateOpensearchRecords(
   412  		allOpensearchRecordValidator,
   413  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   414  		"kubernetes.labels.app.keyword",
   415  		"verrazzano-monitoring-operator",
   416  		searchTimeWindow,
   417  		noExceptions)
   418  }
   419  
   420  func validatePrometheusLogs() bool {
   421  	return validateOpensearchRecords(
   422  		allOpensearchRecordValidator,
   423  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   424  		"kubernetes.container_name",
   425  		"prometheus",
   426  		searchTimeWindow,
   427  		noExceptions)
   428  }
   429  
   430  func validatePrometheusConfigReloaderLogs() bool {
   431  	return validateOpensearchRecords(
   432  		noLevelOpensearchRecordValidator,
   433  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   434  		"kubernetes.container_name",
   435  		"config-reloader",
   436  		searchTimeWindow,
   437  		noExceptions)
   438  }
   439  
   440  func validateCertManagerLogs() bool {
   441  	return validateOpensearchRecords(
   442  		allOpensearchRecordValidator,
   443  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(certMgrNamespace) },
   444  		"kubernetes.labels.app_kubernetes_io/instance",
   445  		"cert-manager",
   446  		searchTimeWindow,
   447  		noExceptions)
   448  }
   449  
   450  func validateExternalDNSLogs() bool {
   451  	return validateOpensearchRecords(
   452  		allOpensearchRecordValidator,
   453  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(certMgrNamespace) },
   454  		"kubernetes.labels.app_kubernetes_io/instance",
   455  		"external-dns",
   456  		searchTimeWindow,
   457  		noExceptions)
   458  }
   459  
   460  func validateGrafanaLogs() bool {
   461  	return validateOpensearchRecords(
   462  		allOpensearchRecordValidator,
   463  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   464  		"kubernetes.labels.app.keyword",
   465  		"system-grafana",
   466  		searchTimeWindow,
   467  		noExceptions)
   468  }
   469  
   470  func validateOpenSearchLogs() bool {
   471  	valid := true
   472  	openSearchAppComponents := []string{"system-osd", "system-es-data", "system-es-master", "system-os-ingest"}
   473  	for _, appLabel := range openSearchAppComponents {
   474  		valid = validateOpensearchRecords(
   475  			noLevelOpensearchRecordValidator,
   476  			func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   477  			"kubernetes.labels.app.keyword",
   478  			appLabel,
   479  			searchTimeWindow,
   480  			noExceptions) && valid
   481  	}
   482  	return valid
   483  }
   484  
   485  func validateWeblogicOperatorLogs() bool {
   486  	return validateOpensearchRecords(
   487  		allOpensearchRecordValidator,
   488  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(systemNamespace) },
   489  		"kubernetes.labels.app.keyword",
   490  		"weblogic-operator",
   491  		searchTimeWindow,
   492  		noExceptions)
   493  }
   494  
   495  func validateKeycloakLogs() bool {
   496  	return validateOpensearchRecords(
   497  		allOpensearchRecordValidator,
   498  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(keycloakNamespace) },
   499  		"kubernetes.labels.app.kubernetes.io/name",
   500  		"keycloak",
   501  		searchTimeWindow,
   502  		noExceptions)
   503  }
   504  
   505  func validateIngressNginxLogs() bool {
   506  	return validateOpensearchRecords(
   507  		noLevelOpensearchRecordValidator,
   508  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(ingressNGINXNamespace) },
   509  		"kubernetes.labels.app_kubernetes_io/name",
   510  		"ingress-nginx",
   511  		searchTimeWindow,
   512  		noExceptions)
   513  }
   514  
   515  func validateKeycloakMySQLLogs() bool {
   516  	return validateOpensearchRecords(
   517  		allOpensearchRecordValidator,
   518  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(keycloakNamespace) },
   519  		"kubernetes.labels.app.keyword",
   520  		"mysql",
   521  		searchTimeWindow,
   522  		noExceptions)
   523  }
   524  
   525  func validateRancherLogs() bool {
   526  	return validateOpensearchRecords(
   527  		allOpensearchRecordValidator,
   528  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(cattleSystemNamespace) },
   529  		"kubernetes.labels.app.keyword",
   530  		"rancher",
   531  		searchTimeWindow,
   532  		noExceptions)
   533  }
   534  
   535  func validateRancherWebhookLogs() bool {
   536  	return validateOpensearchRecords(
   537  		allOpensearchRecordValidator,
   538  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(cattleSystemNamespace) },
   539  		"kubernetes.labels.app.keyword",
   540  		"rancher-webhook",
   541  		searchTimeWindow,
   542  		noExceptions)
   543  }
   544  func validateFleetSystemLogs() bool {
   545  	return validateOpensearchRecords(
   546  		allOpensearchRecordValidator,
   547  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(fleetLocalSystemNamespace) },
   548  		"kubernetes.namespace_name",
   549  		"fleet-system",
   550  		searchTimeWindow,
   551  		noExceptions)
   552  }
   553  
   554  func validateCapiSystemLogs() bool {
   555  	return validateOpensearchRecords(
   556  		allOpensearchRecordValidator,
   557  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(capiNamespace) },
   558  		"kubernetes.namespace_name",
   559  		capiNamespace,
   560  		searchTimeWindow,
   561  		noExceptions)
   562  }
   563  
   564  func validateNodeExporterLogs() bool {
   565  	return validateOpensearchRecords(
   566  		allOpensearchRecordValidator,
   567  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(monitoringNamespace) },
   568  		"kubernetes.labels.app.keyword",
   569  		"node-exporter",
   570  		searchTimeWindow,
   571  		noExceptions)
   572  }
   573  
   574  func validateJaegerCollectorLogs() bool {
   575  	return validateOpensearchRecords(
   576  		logLevelOpensearchRecordValidator,
   577  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(monitoringNamespace) },
   578  		"kubernetes.container_name",
   579  		"jaeger-collector",
   580  		searchTimeWindow,
   581  		jaegerExceptions)
   582  }
   583  
   584  func validateJaegerQueryLogs() bool {
   585  	return validateOpensearchRecords(
   586  		logLevelOpensearchRecordValidator,
   587  		func() (string, error) { return pkg.GetOpenSearchSystemIndex(monitoringNamespace) },
   588  		"kubernetes.container_name",
   589  		"jaeger-query",
   590  		searchTimeWindow,
   591  		jaegerExceptions)
   592  }
   593  
   594  func validateOpensearchRecords(hitValidator pkg.OpensearchHitValidator, indexFunc func() (string, error), appLabel string, appName string, timeRange string, exceptions []*regexp.Regexp) bool {
   595  	pkg.Log(pkg.Info, fmt.Sprintf("Validating log records for %s", appName))
   596  	index, err := indexFunc()
   597  	if err != nil {
   598  		pkg.Log(pkg.Error, fmt.Sprintf("Failed to get OpenSearch index: %v", err))
   599  		return false
   600  	}
   601  
   602  	template :=
   603  		`{
   604  			"size": 1000,
   605  			"sort": [{"@timestamp": {"order": "desc"}}],
   606  			"query": {
   607  				"bool": {
   608  					"filter" : [
   609  						{"match_phrase": {"%s": "%s"}},
   610  						{"range": {"@timestamp": {"gte": "now-%s"}}}
   611  					]
   612  				}
   613  			}
   614  		}`
   615  	query := fmt.Sprintf(template, appLabel, appName, timeRange)
   616  	resp, err := pkg.PostOpensearch(fmt.Sprintf("%s/_search", index), query)
   617  	if err != nil {
   618  		pkg.Log(pkg.Error, fmt.Sprintf("Failed to query Opensearch: %v", err))
   619  		return false
   620  	}
   621  	if resp.StatusCode != 200 {
   622  		pkg.Log(pkg.Error, fmt.Sprintf("Failed to query Opensearch: status=%d: body=%s", resp.StatusCode, string(resp.Body)))
   623  		return false
   624  	}
   625  	var result map[string]interface{}
   626  	json.Unmarshal(resp.Body, &result)
   627  
   628  	if !pkg.ValidateOpensearchHits(result, hitValidator, exceptions) {
   629  		pkg.Log(pkg.Info, fmt.Sprintf("Found invalid (or zero) log records in %s logs", appName))
   630  		return false
   631  	}
   632  	return true
   633  }
   634  
   635  // allOpensearchRecordValidator does all validation for log records
   636  func allOpensearchRecordValidator(hit pkg.OpensearchHit) bool {
   637  	valid := true
   638  	if !commonOpensearchRecordValidator(hit) {
   639  		valid = false
   640  	}
   641  	if !logLevelOpensearchRecordValidator(hit) {
   642  		valid = false
   643  	}
   644  
   645  	return valid
   646  }
   647  
   648  // noLevelOpensearchRecordValidator does validation for log records except level validation
   649  func noLevelOpensearchRecordValidator(hit pkg.OpensearchHit) bool {
   650  	return commonOpensearchRecordValidator(hit)
   651  }
   652  
   653  // commonOpensearchRecordValidator does all validation for log records except level validation
   654  func commonOpensearchRecordValidator(hit pkg.OpensearchHit) bool {
   655  	ts := ""
   656  	valid := true
   657  	// Verify the record has a @timestamp field.
   658  	// If so extract it.
   659  	if val, ok := hit["@timestamp"]; !ok || len(val.(string)) == 0 {
   660  		pkg.Log(pkg.Info, "Log record has missing or empty @timestamp field")
   661  		valid = false
   662  	} else {
   663  		ts = hit["@timestamp"].(string)
   664  	}
   665  	// Verify the record has a log field.
   666  	// If so verify the time in the log field matches the @timestamp field.
   667  	if val, ok := hit["log"]; !ok || len(val.(string)) == 0 {
   668  		pkg.Log(pkg.Info, "Log record has missing or empty log field")
   669  		valid = false
   670  	} else {
   671  		re := regexp.MustCompile(`(\d{2}:\d{2}:\d{2})`)
   672  		m := re.FindStringSubmatch(val.(string))
   673  		if len(m) < 2 {
   674  			pkg.Log(pkg.Info, "Log record log field does not contain a time")
   675  			valid = false
   676  		} else {
   677  			if !strings.Contains(ts, m[1]) {
   678  				pkg.Log(pkg.Info, fmt.Sprintf("Log record @timestamp field %s does not match log field %s content", ts, m[1]))
   679  				valid = false
   680  			}
   681  		}
   682  	}
   683  	// Verify the record has a message field.
   684  	if val, ok := hit["message"]; !ok || len(val.(string)) == 0 {
   685  		pkg.Log(pkg.Info, "Log record has missing or empty message field")
   686  		valid = false
   687  	}
   688  	// Verify the log field isn't exactly the same as the message field.
   689  	if hit["log"] == hit["message"] {
   690  		pkg.Log(pkg.Info, "Log record has duplicate log and message field values")
   691  		valid = false
   692  	}
   693  	// Verify the record does not have a timestamp field.
   694  	if _, ok := hit["timestamp"]; ok {
   695  		pkg.Log(pkg.Info, "Log record has unwanted timestamp field")
   696  		valid = false
   697  	}
   698  	if !valid {
   699  		pkg.Log(pkg.Info, fmt.Sprintf("Log record is invalid: %v", hit))
   700  	}
   701  	return valid
   702  }
   703  
   704  // logLevelOpensearchRecordValidator does validation of level for log records
   705  func logLevelOpensearchRecordValidator(hit pkg.OpensearchHit) bool {
   706  	// Verify the record has a level field.
   707  	// If so verify that the level isn't debug.
   708  	if val, ok := hit["level"]; !ok || len(val.(string)) == 0 {
   709  		pkg.Log(pkg.Info, "Log record has missing or empty level field")
   710  		return false
   711  	}
   712  	// level := val.(string)
   713  	// Put this validation back in when the OAM logging is fixed.
   714  	// if strings.EqualFold(level, "debug") || strings.EqualFold(level, "dbg") || strings.EqualFold(level, "d") {
   715  	// 	pkg.Log(pkg.Info, fmt.Sprintf("Log record has invalid debug level: %s", level))
   716  	// 	valid = false
   717  	// }
   718  	// There is an Istio proxy error that causes this to fail.
   719  	// Put this validation back in when that is addressed.
   720  	// if strings.EqualFold(level, "error") || strings.EqualFold(level, "err") || strings.EqualFold(level, "e") {
   721  	//	pkg.Log(pkg.Info, fmt.Sprintf("Log record has invalid error level: %s", level))
   722  	//	valid = false
   723  	// }
   724  
   725  	return true
   726  }