github.com/operator-framework/operator-lifecycle-manager@v0.30.0/test/e2e/metrics_e2e_test.go (about) 1 package e2e 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "regexp" 8 "strconv" 9 "strings" 10 "sync" 11 12 "github.com/blang/semver/v4" 13 . "github.com/onsi/ginkgo/v2" 14 . "github.com/onsi/gomega" 15 io_prometheus_client "github.com/prometheus/client_model/go" 16 "github.com/prometheus/common/expfmt" 17 appsv1 "k8s.io/api/apps/v1" 18 corev1 "k8s.io/api/core/v1" 19 apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" 20 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 "k8s.io/apimachinery/pkg/util/net" 22 23 "github.com/operator-framework/api/pkg/operators/v1alpha1" 24 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned" 25 "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/registry" 26 "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient" 27 "github.com/operator-framework/operator-lifecycle-manager/test/e2e/ctx" 28 ) 29 30 var _ = Describe("Metrics are generated for OLM managed resources", func() { 31 var ( 32 c operatorclient.ClientInterface 33 crc versioned.Interface 34 generatedNamespace corev1.Namespace 35 ) 36 37 BeforeEach(func() { 38 namespaceName := genName("metrics-e2e-") 39 generatedNamespace = SetupGeneratedTestNamespace(namespaceName, namespaceName) 40 c = ctx.Ctx().KubeClient() 41 crc = ctx.Ctx().OperatorClient() 42 }) 43 44 AfterEach(func() { 45 TeardownNamespace(generatedNamespace.GetName()) 46 }) 47 48 Context("Given an OperatorGroup that supports all namespaces", func() { 49 BeforeEach(func() { 50 By("using the default OperatorGroup created in BeforeSuite") 51 }) 52 53 When("a CSV spec does not include Install Mode", func() { 54 var ( 55 cleanupCSV cleanupFunc 56 failingCSV v1alpha1.ClusterServiceVersion 57 ) 58 59 BeforeEach(func() { 60 failingCSV = v1alpha1.ClusterServiceVersion{ 61 TypeMeta: metav1.TypeMeta{ 62 Kind: v1alpha1.ClusterServiceVersionKind, 63 APIVersion: v1alpha1.ClusterServiceVersionAPIVersion, 64 }, 65 ObjectMeta: metav1.ObjectMeta{ 66 Name: genName("failing-csv-test-"), 67 }, 68 Spec: v1alpha1.ClusterServiceVersionSpec{ 69 InstallStrategy: v1alpha1.NamedInstallStrategy{ 70 StrategyName: v1alpha1.InstallStrategyNameDeployment, 71 StrategySpec: strategy, 72 }, 73 }, 74 } 75 76 var err error 77 cleanupCSV, err = createCSV(c, crc, failingCSV, generatedNamespace.GetName(), false, false) 78 Expect(err).ToNot(HaveOccurred()) 79 80 _, err = fetchCSV(crc, generatedNamespace.GetName(), failingCSV.Name, csvFailedChecker) 81 Expect(err).ToNot(HaveOccurred()) 82 }) 83 84 It("generates csv_abnormal metric for OLM pod", func() { 85 86 Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To(And( 87 ContainElement(LikeMetric( 88 WithFamily("csv_abnormal"), 89 WithName(failingCSV.Name), 90 WithPhase("Failed"), 91 WithReason("UnsupportedOperatorGroup"), 92 WithVersion("0.0.0"), 93 )), 94 ContainElement(LikeMetric( 95 WithFamily("csv_succeeded"), 96 WithValue(0), 97 WithName(failingCSV.Name), 98 )), 99 )) 100 101 cleanupCSV() 102 }) 103 104 When("the failed CSV is deleted", func() { 105 106 BeforeEach(func() { 107 if cleanupCSV != nil { 108 cleanupCSV() 109 } 110 }) 111 112 It("deletes its associated CSV metrics", func() { 113 By(`Verify that when the csv has been deleted, it deletes the corresponding CSV metrics`) 114 Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).ToNot(And( 115 ContainElement(LikeMetric(WithFamily("csv_abnormal"), WithName(failingCSV.Name))), 116 ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(failingCSV.Name))), 117 )) 118 }) 119 }) 120 }) 121 122 When("a CSV is created", func() { 123 var ( 124 cleanupCSV cleanupFunc 125 csv v1alpha1.ClusterServiceVersion 126 ) 127 128 BeforeEach(func() { 129 packageName := genName("csv-test-") 130 packageStable := fmt.Sprintf("%s-stable", packageName) 131 csv = newCSV(packageStable, generatedNamespace.GetName(), "", semver.MustParse("0.1.0"), nil, nil, nil) 132 133 var err error 134 _, err = createCSV(c, crc, csv, generatedNamespace.GetName(), false, false) 135 Expect(err).ToNot(HaveOccurred()) 136 _, err = fetchCSV(crc, generatedNamespace.GetName(), csv.Name, csvSucceededChecker) 137 Expect(err).ToNot(HaveOccurred()) 138 }) 139 140 AfterEach(func() { 141 if cleanupCSV != nil { 142 cleanupCSV() 143 } 144 }) 145 146 It("emits a CSV metrics", func() { 147 Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To( 148 ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(csv.Name), WithValue(1))), 149 ) 150 }) 151 152 When("the OLM pod restarts", func() { 153 154 BeforeEach(func() { 155 restartDeploymentWithLabel(c, "app=olm-operator") 156 }) 157 158 It("CSV metric is preserved", func() { 159 Eventually(func() []Metric { 160 return getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator")) 161 }).Should(ContainElement(LikeMetric( 162 WithFamily("csv_succeeded"), 163 WithName(csv.Name), 164 WithValue(1), 165 ))) 166 }) 167 }) 168 }) 169 }) 170 171 Context("Metrics emitted by objects during operator installation", func() { 172 var ( 173 subscriptionCleanup cleanupFunc 174 subscription *v1alpha1.Subscription 175 ) 176 177 When("A subscription object is created", func() { 178 179 BeforeEach(func() { 180 subscriptionCleanup, _ = createSubscription(GinkgoT(), crc, generatedNamespace.GetName(), "metric-subscription-for-create", testPackageName, stableChannel, v1alpha1.ApprovalManual) 181 }) 182 183 AfterEach(func() { 184 if subscriptionCleanup != nil { 185 subscriptionCleanup() 186 } 187 }) 188 189 It("generates subscription_sync_total metric", func() { 190 191 By(`Verify metrics have been emitted for subscription`) 192 Eventually(func() []Metric { 193 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 194 }).Should(ContainElement(LikeMetric( 195 WithFamily("subscription_sync_total"), 196 WithName("metric-subscription-for-create"), 197 WithChannel(stableChannel), 198 WithPackage(testPackageName), 199 WithApproval(string(v1alpha1.ApprovalManual)), 200 ))) 201 }) 202 203 It("generates dependency_resolution metric", func() { 204 205 By(`Verify metrics have been emitted for dependency resolution`) 206 Eventually(func() bool { 207 return Eventually(func() []Metric { 208 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 209 }).Should(ContainElement(LikeMetric( 210 WithFamily("olm_resolution_duration_seconds"), 211 WithLabel("outcome", "failed"), 212 WithValueGreaterThan(0), 213 ))) 214 }) 215 }) 216 }) 217 218 When("A subscription object is updated after emitting metrics", func() { 219 220 BeforeEach(func() { 221 subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, generatedNamespace.GetName(), "metric-subscription-for-update", testPackageName, stableChannel, v1alpha1.ApprovalManual) 222 Eventually(func() []Metric { 223 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 224 }).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-update")))) 225 Eventually(func() error { 226 s, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(context.Background(), subscription.GetName(), metav1.GetOptions{}) 227 if err != nil { 228 return err 229 } 230 s.Spec.Channel = betaChannel 231 _, err = crc.OperatorsV1alpha1().Subscriptions(s.GetNamespace()).Update(context.Background(), s, metav1.UpdateOptions{}) 232 return err 233 }).Should(Succeed()) 234 }) 235 236 AfterEach(func() { 237 if subscriptionCleanup != nil { 238 subscriptionCleanup() 239 } 240 }) 241 242 It("deletes the old Subscription metric and emits the new metric", func() { 243 Eventually(func() []Metric { 244 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 245 }).Should(And( 246 Not(ContainElement(LikeMetric( 247 WithFamily("subscription_sync_total"), 248 WithName("metric-subscription-for-update"), 249 WithChannel(stableChannel), 250 WithPackage(testPackageName), 251 WithApproval(string(v1alpha1.ApprovalManual)), 252 ))), 253 ContainElement(LikeMetric( 254 WithFamily("subscription_sync_total"), 255 WithName("metric-subscription-for-update"), 256 WithChannel(betaChannel), 257 WithPackage(testPackageName), 258 WithApproval(string(v1alpha1.ApprovalManual)), 259 )), 260 )) 261 }) 262 When("The subscription object is updated again", func() { 263 264 BeforeEach(func() { 265 Eventually(func() error { 266 s, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(context.Background(), subscription.GetName(), metav1.GetOptions{}) 267 if err != nil { 268 return err 269 } 270 s.Spec.Channel = alphaChannel 271 _, err = crc.OperatorsV1alpha1().Subscriptions(s.GetNamespace()).Update(context.Background(), s, metav1.UpdateOptions{}) 272 return err 273 }).Should(Succeed()) 274 }) 275 276 It("deletes the old subscription metric and emits the new metric(there is only one metric for the subscription)", func() { 277 Eventually(func() []Metric { 278 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 279 }).Should(And( 280 Not(ContainElement(LikeMetric( 281 WithFamily("subscription_sync_total"), 282 WithName("metric-subscription-for-update"), 283 WithChannel(stableChannel), 284 ))), 285 Not(ContainElement(LikeMetric( 286 WithFamily("subscription_sync_total"), 287 WithName("metric-subscription-for-update"), 288 WithChannel(betaChannel), 289 WithPackage(testPackageName), 290 WithApproval(string(v1alpha1.ApprovalManual)), 291 ))), 292 ContainElement(LikeMetric( 293 WithFamily("subscription_sync_total"), 294 WithName("metric-subscription-for-update"), 295 WithChannel(alphaChannel), 296 WithPackage(testPackageName), 297 WithApproval(string(v1alpha1.ApprovalManual)), 298 )))) 299 }) 300 }) 301 }) 302 303 When("A subscription object is deleted after emitting metrics", func() { 304 305 BeforeEach(func() { 306 subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, generatedNamespace.GetName(), "metric-subscription-for-delete", testPackageName, stableChannel, v1alpha1.ApprovalManual) 307 Eventually(func() []Metric { 308 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 309 }).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-delete")))) 310 if subscriptionCleanup != nil { 311 subscriptionCleanup() 312 subscriptionCleanup = nil 313 } 314 }) 315 316 AfterEach(func() { 317 if subscriptionCleanup != nil { 318 subscriptionCleanup() 319 } 320 }) 321 322 It("deletes the Subscription metric", func() { 323 Eventually(func() []Metric { 324 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 325 }).ShouldNot(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithName("metric-subscription-for-delete")))) 326 }) 327 }) 328 }) 329 330 Context("Metrics emitted by CatalogSources", func() { 331 When("A valid CatalogSource object is created", func() { 332 var ( 333 name = "metrics-catsrc-valid" 334 cleanup func() 335 ) 336 337 BeforeEach(func() { 338 mainPackageName := genName("nginx-") 339 340 mainPackageStable := fmt.Sprintf("%s-stable", mainPackageName) 341 342 stableChannel := "stable" 343 344 mainCRD := newCRD(genName("ins-")) 345 mainCSV := newCSV(mainPackageStable, generatedNamespace.GetName(), "", semver.MustParse("0.1.0"), []apiextensionsv1.CustomResourceDefinition{mainCRD}, nil, nil) 346 347 mainManifests := []registry.PackageManifest{ 348 { 349 PackageName: mainPackageName, 350 Channels: []registry.PackageChannel{ 351 {Name: stableChannel, CurrentCSVName: mainPackageStable}, 352 }, 353 DefaultChannelName: stableChannel, 354 }, 355 } 356 cs, cleanupAll := createInternalCatalogSource(c, crc, name, generatedNamespace.GetName(), mainManifests, []apiextensionsv1.CustomResourceDefinition{mainCRD}, []v1alpha1.ClusterServiceVersion{mainCSV}) 357 By(`Note(tflannag): Dependending on how ginkgo orders these test specs, and how bloated the cluster we're running`) 358 By(`this test case against, we risk creating and then immediately deleting the catalogsource before the catalog`) 359 By(`operator can generate all the requisite resources (e.g. the ServiceAccount), which can leave the underlying`) 360 By(`registry Pod in a terminating state until kubelet times out waiting for the generated ServiceAccount`) 361 By(`resource to be present so it can mount it in the registry container.`) 362 _, err := fetchCatalogSourceOnStatus(crc, cs.GetName(), cs.GetNamespace(), catalogSourceRegistryPodSynced()) 363 Expect(err).ShouldNot(HaveOccurred()) 364 365 var once sync.Once 366 cleanup = func() { 367 once.Do(cleanupAll) 368 } 369 }) 370 371 AfterEach(func() { 372 cleanup() 373 }) 374 375 It("emits catalogsource_ready metric for the catalogSource with Value equal to 1", func() { 376 Eventually(func() []Metric { 377 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 378 }).Should(And( 379 ContainElement(LikeMetric( 380 WithFamily("catalog_source_count"), 381 WithValueGreaterThan(0), 382 )), 383 ContainElement(LikeMetric( 384 WithFamily("catalogsource_ready"), 385 WithName(name), 386 WithNamespace(generatedNamespace.GetName()), 387 WithValue(1), 388 )), 389 )) 390 }) 391 When("The CatalogSource object is deleted", func() { 392 393 BeforeEach(func() { 394 cleanup() 395 }) 396 397 It("deletes the metrics for the CatalogSource", func() { 398 Eventually(func() []Metric { 399 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 400 }).Should(And( 401 Not(ContainElement(LikeMetric( 402 WithFamily("catalogsource_ready"), 403 WithName(name), 404 WithNamespace(generatedNamespace.GetName()), 405 ))))) 406 }) 407 }) 408 }) 409 410 When("A CatalogSource object is in an invalid state", func() { 411 var ( 412 name = "metrics-catsrc-invalid" 413 cleanup func() 414 ) 415 416 BeforeEach(func() { 417 _, cleanup = createInvalidGRPCCatalogSource(c, crc, name, generatedNamespace.GetName()) 418 }) 419 420 AfterEach(func() { 421 cleanup() 422 }) 423 424 It("emits metrics for the CatlogSource with a Value equal to 0", func() { 425 Eventually(func() []Metric { 426 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 427 }).Should(And( 428 ContainElement(LikeMetric( 429 WithFamily("catalogsource_ready"), 430 WithName(name), 431 WithNamespace(generatedNamespace.GetName()), 432 WithValue(0), 433 )), 434 )) 435 Consistently(func() []Metric { 436 return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) 437 }, "1m", "30s").Should(And( 438 ContainElement(LikeMetric( 439 WithFamily("catalogsource_ready"), 440 WithName(name), 441 WithNamespace(generatedNamespace.GetName()), 442 WithValue(0), 443 )), 444 )) 445 }) 446 }) 447 }) 448 }) 449 450 func getPodWithLabel(client operatorclient.ClientInterface, label string) *corev1.Pod { 451 listOptions := metav1.ListOptions{LabelSelector: label} 452 var podList *corev1.PodList 453 EventuallyWithOffset(1, func() (numPods int, err error) { 454 podList, err = client.KubernetesInterface().CoreV1().Pods(operatorNamespace).List(context.Background(), listOptions) 455 if podList != nil { 456 numPods = len(podList.Items) 457 } 458 459 return 460 }).Should(Equal(1), "number of pods never scaled to one") 461 462 return &podList.Items[0] 463 } 464 465 func getDeploymentWithLabel(client operatorclient.ClientInterface, label string) *appsv1.Deployment { 466 listOptions := metav1.ListOptions{LabelSelector: label} 467 var deploymentList *appsv1.DeploymentList 468 EventuallyWithOffset(1, func() (numDeps int, err error) { 469 deploymentList, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).List(context.Background(), listOptions) 470 if deploymentList != nil { 471 numDeps = len(deploymentList.Items) 472 } 473 474 return 475 }).Should(Equal(1), "expected exactly one Deployment") 476 477 return &deploymentList.Items[0] 478 } 479 480 func restartDeploymentWithLabel(client operatorclient.ClientInterface, l string) { 481 d := getDeploymentWithLabel(client, l) 482 z := int32(0) 483 oldZ := *d.Spec.Replicas 484 d.Spec.Replicas = &z 485 _, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.Background(), d, metav1.UpdateOptions{}) 486 Expect(err).ToNot(HaveOccurred()) 487 488 EventuallyWithOffset(1, func() (replicas int32, err error) { 489 deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.Background(), d.Name, metav1.GetOptions{}) 490 if deployment != nil { 491 replicas = deployment.Status.Replicas 492 } 493 return 494 }).Should(Equal(int32(0)), "expected exactly 0 Deployments") 495 496 updated := getDeploymentWithLabel(client, l) 497 updated.Spec.Replicas = &oldZ 498 _, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.Background(), updated, metav1.UpdateOptions{}) 499 Expect(err).ToNot(HaveOccurred()) 500 501 EventuallyWithOffset(1, func() (replicas int32, err error) { 502 deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.Background(), d.Name, metav1.GetOptions{}) 503 if deployment != nil { 504 replicas = deployment.Status.Replicas 505 } 506 return 507 }).Should(Equal(oldZ), "expected exactly 1 Deployment") 508 } 509 510 func extractMetricPortFromPod(pod *corev1.Pod) string { 511 for _, container := range pod.Spec.Containers { 512 for _, port := range container.Ports { 513 if port.Name == "metrics" { 514 return strconv.Itoa(int(port.ContainerPort)) 515 } 516 } 517 } 518 return "-1" 519 } 520 521 func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod) []Metric { 522 ctx.Ctx().Logf("querying pod %s/%s\n", pod.GetNamespace(), pod.GetName()) 523 524 By(`assuming -tls-cert and -tls-key aren't used anywhere else as a parameter value`) 525 var foundCert, foundKey bool 526 for _, arg := range pod.Spec.Containers[0].Args { 527 matched, err := regexp.MatchString(`^-?-tls-cert`, arg) 528 Expect(err).ToNot(HaveOccurred()) 529 foundCert = foundCert || matched 530 531 matched, err = regexp.MatchString(`^-?-tls-key`, arg) 532 Expect(err).ToNot(HaveOccurred()) 533 foundKey = foundKey || matched 534 } 535 536 var scheme string 537 if foundCert && foundKey { 538 scheme = "https" 539 } else { 540 scheme = "http" 541 } 542 ctx.Ctx().Logf("Retrieving metrics using scheme %v\n", scheme) 543 mfs := make(map[string]*io_prometheus_client.MetricFamily) 544 EventuallyWithOffset(1, func() error { 545 raw, err := client.KubernetesInterface().CoreV1().RESTClient().Get(). 546 Namespace(pod.GetNamespace()). 547 Resource("pods"). 548 SubResource("proxy"). 549 Name(net.JoinSchemeNamePort(scheme, pod.GetName(), extractMetricPortFromPod(pod))). 550 Suffix("metrics"). 551 Do(context.Background()).Raw() 552 if err != nil { 553 return err 554 } 555 var p expfmt.TextParser 556 mfs, err = p.TextToMetricFamilies(bytes.NewReader(raw)) 557 if err != nil { 558 return err 559 } 560 return nil 561 }).Should(Succeed()) 562 563 var metrics []Metric 564 for family, mf := range mfs { 565 var ignore bool 566 for _, ignoredPrefix := range []string{"go_", "process_", "promhttp_"} { 567 ignore = ignore || strings.HasPrefix(family, ignoredPrefix) 568 } 569 if ignore { 570 // Metrics with these prefixes shouldn't be 571 // relevant to these tests, so they can be 572 // stripped out to make test failures easier 573 // to understand. 574 continue 575 } 576 577 for _, metric := range mf.Metric { 578 m := Metric{ 579 Family: family, 580 } 581 if len(metric.GetLabel()) > 0 { 582 m.Labels = make(map[string][]string) 583 } 584 for _, pair := range metric.GetLabel() { 585 m.Labels[pair.GetName()] = append(m.Labels[pair.GetName()], pair.GetValue()) 586 } 587 if u := metric.GetUntyped(); u != nil { 588 m.Value = u.GetValue() 589 } 590 if g := metric.GetGauge(); g != nil { 591 m.Value = g.GetValue() 592 } 593 if c := metric.GetCounter(); c != nil { 594 m.Value = c.GetValue() 595 } 596 metrics = append(metrics, m) 597 } 598 } 599 return metrics 600 }