github.com/argoproj-labs/argocd-operator@v0.10.0/controllers/argocd/prometheus.go (about) 1 // Copyright 2019 ArgoCD Operator Developers 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package argocd 16 17 import ( 18 "context" 19 "fmt" 20 21 monitoringv1 "github.com/coreos/prometheus-operator/pkg/apis/monitoring/v1" 22 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 "k8s.io/apimachinery/pkg/util/intstr" 24 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 25 26 argoproj "github.com/argoproj-labs/argocd-operator/api/v1beta1" 27 "github.com/argoproj-labs/argocd-operator/common" 28 "github.com/argoproj-labs/argocd-operator/controllers/argoutil" 29 ) 30 31 var prometheusAPIFound = false 32 33 // getPrometheusHost will return the hostname value for Prometheus. 34 func getPrometheusHost(cr *argoproj.ArgoCD) string { 35 host := nameWithSuffix("prometheus", cr) 36 if len(cr.Spec.Prometheus.Host) > 0 { 37 host = cr.Spec.Prometheus.Host 38 } 39 return host 40 } 41 42 // getPrometheusSize will return the size value for the Prometheus replica count. 43 func getPrometheusReplicas(cr *argoproj.ArgoCD) *int32 { 44 replicas := common.ArgoCDDefaultPrometheusReplicas 45 if cr.Spec.Prometheus.Size != nil { 46 if *cr.Spec.Prometheus.Size >= 0 && *cr.Spec.Prometheus.Size != replicas { 47 replicas = *cr.Spec.Prometheus.Size 48 } 49 } 50 return &replicas 51 } 52 53 // IsPrometheusAPIAvailable returns true if the Prometheus API is present. 54 func IsPrometheusAPIAvailable() bool { 55 return prometheusAPIFound 56 } 57 58 // hasPrometheusSpecChanged will return true if the supported properties differs in the actual versus the desired state. 59 func hasPrometheusSpecChanged(actual *monitoringv1.Prometheus, desired *argoproj.ArgoCD) bool { 60 // Replica count 61 if desired.Spec.Prometheus.Size != nil && *desired.Spec.Prometheus.Size >= 0 { // Valid replica count specified in desired state 62 if actual.Spec.Replicas != nil { // Actual replicas value is set 63 if *actual.Spec.Replicas != *desired.Spec.Prometheus.Size { 64 return true 65 } 66 } else if *desired.Spec.Prometheus.Size != common.ArgoCDDefaultPrometheusReplicas { // Actual replicas value is NOT set, but desired replicas differs from the default 67 return true 68 } 69 } else { // Replica count NOT specified in desired state 70 if actual.Spec.Replicas != nil && *actual.Spec.Replicas != common.ArgoCDDefaultPrometheusReplicas { 71 return true 72 } 73 } 74 return false 75 } 76 77 // verifyPrometheusAPI will verify that the Prometheus API is present. 78 func verifyPrometheusAPI() error { 79 found, err := argoutil.VerifyAPI(monitoringv1.SchemeGroupVersion.Group, monitoringv1.SchemeGroupVersion.Version) 80 if err != nil { 81 return err 82 } 83 prometheusAPIFound = found 84 return nil 85 } 86 87 // newPrometheus returns a new Prometheus instance for the given ArgoCD. 88 func newPrometheus(cr *argoproj.ArgoCD) *monitoringv1.Prometheus { 89 return &monitoringv1.Prometheus{ 90 ObjectMeta: metav1.ObjectMeta{ 91 Name: cr.Name, 92 Namespace: cr.Namespace, 93 Labels: argoutil.LabelsForCluster(cr), 94 }, 95 } 96 } 97 98 // newServiceMonitor returns a new ServiceMonitor instance. 99 func newServiceMonitor(cr *argoproj.ArgoCD) *monitoringv1.ServiceMonitor { 100 return &monitoringv1.ServiceMonitor{ 101 ObjectMeta: metav1.ObjectMeta{ 102 Name: cr.Name, 103 Namespace: cr.Namespace, 104 Labels: argoutil.LabelsForCluster(cr), 105 }, 106 } 107 } 108 109 // newServiceMonitorWithName returns a new ServiceMonitor instance for the given ArgoCD using the given name. 110 func newServiceMonitorWithName(name string, cr *argoproj.ArgoCD) *monitoringv1.ServiceMonitor { 111 svcmon := newServiceMonitor(cr) 112 svcmon.ObjectMeta.Name = name 113 114 lbls := svcmon.ObjectMeta.Labels 115 lbls[common.ArgoCDKeyName] = name 116 lbls[common.ArgoCDKeyRelease] = "prometheus-operator" 117 svcmon.ObjectMeta.Labels = lbls 118 119 return svcmon 120 } 121 122 // newServiceMonitorWithSuffix returns a new ServiceMonitor instance for the given ArgoCD using the given suffix. 123 func newServiceMonitorWithSuffix(suffix string, cr *argoproj.ArgoCD) *monitoringv1.ServiceMonitor { 124 return newServiceMonitorWithName(fmt.Sprintf("%s-%s", cr.Name, suffix), cr) 125 } 126 127 // reconcileMetricsServiceMonitor will ensure that the ServiceMonitor is present for the ArgoCD metrics Service. 128 func (r *ReconcileArgoCD) reconcileMetricsServiceMonitor(cr *argoproj.ArgoCD) error { 129 sm := newServiceMonitorWithSuffix(common.ArgoCDKeyMetrics, cr) 130 if argoutil.IsObjectFound(r.Client, cr.Namespace, sm.Name, sm) { 131 if !cr.Spec.Prometheus.Enabled { 132 // ServiceMonitor exists but enabled flag has been set to false, delete the ServiceMonitor 133 return r.Client.Delete(context.TODO(), sm) 134 } 135 return nil // ServiceMonitor found, do nothing 136 } 137 138 if !cr.Spec.Prometheus.Enabled { 139 return nil // Prometheus not enabled, do nothing. 140 } 141 142 sm.Spec.Selector = metav1.LabelSelector{ 143 MatchLabels: map[string]string{ 144 common.ArgoCDKeyName: nameWithSuffix(common.ArgoCDKeyMetrics, cr), 145 }, 146 } 147 sm.Spec.Endpoints = []monitoringv1.Endpoint{ 148 { 149 Port: common.ArgoCDKeyMetrics, 150 }, 151 } 152 153 if err := controllerutil.SetControllerReference(cr, sm, r.Scheme); err != nil { 154 return err 155 } 156 return r.Client.Create(context.TODO(), sm) 157 } 158 159 // reconcilePrometheus will ensure that Prometheus is present for ArgoCD metrics. 160 func (r *ReconcileArgoCD) reconcilePrometheus(cr *argoproj.ArgoCD) error { 161 prometheus := newPrometheus(cr) 162 if argoutil.IsObjectFound(r.Client, cr.Namespace, prometheus.Name, prometheus) { 163 if !cr.Spec.Prometheus.Enabled { 164 // Prometheus exists but enabled flag has been set to false, delete the Prometheus 165 return r.Client.Delete(context.TODO(), prometheus) 166 } 167 if hasPrometheusSpecChanged(prometheus, cr) { 168 prometheus.Spec.Replicas = cr.Spec.Prometheus.Size 169 return r.Client.Update(context.TODO(), prometheus) 170 } 171 return nil // Prometheus found, do nothing 172 } 173 174 if !cr.Spec.Prometheus.Enabled { 175 return nil // Prometheus not enabled, do nothing. 176 } 177 178 prometheus.Spec.Replicas = getPrometheusReplicas(cr) 179 prometheus.Spec.ServiceAccountName = "prometheus-k8s" 180 prometheus.Spec.ServiceMonitorSelector = &metav1.LabelSelector{} 181 182 if err := controllerutil.SetControllerReference(cr, prometheus, r.Scheme); err != nil { 183 return err 184 } 185 return r.Client.Create(context.TODO(), prometheus) 186 } 187 188 // reconcileRepoServerServiceMonitor will ensure that the ServiceMonitor is present for the Repo Server metrics Service. 189 func (r *ReconcileArgoCD) reconcileRepoServerServiceMonitor(cr *argoproj.ArgoCD) error { 190 sm := newServiceMonitorWithSuffix("repo-server-metrics", cr) 191 if argoutil.IsObjectFound(r.Client, cr.Namespace, sm.Name, sm) { 192 if !cr.Spec.Prometheus.Enabled { 193 // ServiceMonitor exists but enabled flag has been set to false, delete the ServiceMonitor 194 return r.Client.Delete(context.TODO(), sm) 195 } 196 return nil // ServiceMonitor found, do nothing 197 } 198 199 if !cr.Spec.Prometheus.Enabled { 200 return nil // Prometheus not enabled, do nothing. 201 } 202 203 sm.Spec.Selector = metav1.LabelSelector{ 204 MatchLabels: map[string]string{ 205 common.ArgoCDKeyName: nameWithSuffix("repo-server", cr), 206 }, 207 } 208 sm.Spec.Endpoints = []monitoringv1.Endpoint{ 209 { 210 Port: common.ArgoCDKeyMetrics, 211 }, 212 } 213 214 if err := controllerutil.SetControllerReference(cr, sm, r.Scheme); err != nil { 215 return err 216 } 217 return r.Client.Create(context.TODO(), sm) 218 } 219 220 // reconcileServerMetricsServiceMonitor will ensure that the ServiceMonitor is present for the ArgoCD Server metrics Service. 221 func (r *ReconcileArgoCD) reconcileServerMetricsServiceMonitor(cr *argoproj.ArgoCD) error { 222 sm := newServiceMonitorWithSuffix("server-metrics", cr) 223 if argoutil.IsObjectFound(r.Client, cr.Namespace, sm.Name, sm) { 224 if !cr.Spec.Prometheus.Enabled { 225 // ServiceMonitor exists but enabled flag has been set to false, delete the ServiceMonitor 226 return r.Client.Delete(context.TODO(), sm) 227 } 228 return nil // ServiceMonitor found, do nothing 229 } 230 231 if !cr.Spec.Prometheus.Enabled { 232 return nil // Prometheus not enabled, do nothing. 233 } 234 235 sm.Spec.Selector = metav1.LabelSelector{ 236 MatchLabels: map[string]string{ 237 common.ArgoCDKeyName: nameWithSuffix("server-metrics", cr), 238 }, 239 } 240 sm.Spec.Endpoints = []monitoringv1.Endpoint{ 241 { 242 Port: common.ArgoCDKeyMetrics, 243 }, 244 } 245 246 if err := controllerutil.SetControllerReference(cr, sm, r.Scheme); err != nil { 247 return err 248 } 249 return r.Client.Create(context.TODO(), sm) 250 } 251 252 // reconcilePrometheusRule reconciles the PrometheusRule that triggers alerts based on workload statuses 253 func (r *ReconcileArgoCD) reconcilePrometheusRule(cr *argoproj.ArgoCD) error { 254 255 promRule := newPrometheusRule(cr.Namespace, "argocd-component-status-alert") 256 257 if argoutil.IsObjectFound(r.Client, cr.Namespace, promRule.Name, promRule) { 258 259 if !cr.Spec.Monitoring.Enabled { 260 // PrometheusRule exists but enabled flag has been set to false, delete the PrometheusRule 261 log.Info("instance monitoring disabled, deleting component status tracking prometheusRule") 262 return r.Client.Delete(context.TODO(), promRule) 263 } 264 return nil // PrometheusRule found, do nothing 265 } 266 267 if !cr.Spec.Monitoring.Enabled { 268 return nil // Monitoring not enabled, do nothing. 269 } 270 271 ruleGroups := []monitoringv1.RuleGroup{ 272 { 273 Name: "ArgoCDComponentStatus", 274 Rules: []monitoringv1.Rule{ 275 { 276 Alert: "ApplicationControllerNotReady", 277 Annotations: map[string]string{ 278 "message": fmt.Sprintf("application controller deployment for Argo CD instance in namespace %s is not running", cr.Namespace), 279 }, 280 Expr: intstr.IntOrString{ 281 Type: intstr.String, 282 StrVal: fmt.Sprintf("kube_statefulset_status_replicas{statefulset=\"%s\", namespace=\"%s\"} != kube_statefulset_status_replicas_ready{statefulset=\"%s\", namespace=\"%s\"} ", fmt.Sprintf(cr.Name+"-application-controller"), cr.Namespace, fmt.Sprintf(cr.Name+"-application-controller"), cr.Namespace), 283 }, 284 For: "1m", 285 Labels: map[string]string{ 286 "severity": "critical", 287 }, 288 }, 289 { 290 Alert: "ServerNotReady", 291 Annotations: map[string]string{ 292 "message": fmt.Sprintf("server deployment for Argo CD instance in namespace %s is not running", cr.Namespace), 293 }, 294 Expr: intstr.IntOrString{ 295 Type: intstr.String, 296 StrVal: fmt.Sprintf("kube_deployment_status_replicas{deployment=\"%s\", namespace=\"%s\"} != kube_deployment_status_replicas_ready{deployment=\"%s\", namespace=\"%s\"} ", fmt.Sprintf(cr.Name+"-server"), cr.Namespace, fmt.Sprintf(cr.Name+"-server"), cr.Namespace), 297 }, 298 For: "1m", 299 Labels: map[string]string{ 300 "severity": "critical", 301 }, 302 }, 303 { 304 Alert: "RepoServerNotReady", 305 Annotations: map[string]string{ 306 "message": fmt.Sprintf("repo server deployment for Argo CD instance in namespace %s is not running", cr.Namespace), 307 }, 308 Expr: intstr.IntOrString{ 309 Type: intstr.String, 310 StrVal: fmt.Sprintf("kube_deployment_status_replicas{deployment=\"%s\", namespace=\"%s\"} != kube_deployment_status_replicas_ready{deployment=\"%s\", namespace=\"%s\"} ", fmt.Sprintf(cr.Name+"-repo-server"), cr.Namespace, fmt.Sprintf(cr.Name+"-repo-server"), cr.Namespace), 311 }, 312 For: "1m", 313 Labels: map[string]string{ 314 "severity": "critical", 315 }, 316 }, 317 { 318 Alert: "ApplicationSetControllerNotReady", 319 Annotations: map[string]string{ 320 "message": fmt.Sprintf("applicationSet controller deployment for Argo CD instance in namespace %s is not running", cr.Namespace), 321 }, 322 Expr: intstr.IntOrString{ 323 Type: intstr.String, 324 StrVal: fmt.Sprintf("kube_deployment_status_replicas{deployment=\"%s\", namespace=\"%s\"} != kube_deployment_status_replicas_ready{deployment=\"%s\", namespace=\"%s\"} ", fmt.Sprintf(cr.Name+"-applicationset-controller"), cr.Namespace, fmt.Sprintf(cr.Name+"-applicationset-controller"), cr.Namespace), 325 }, 326 For: "5m", 327 Labels: map[string]string{ 328 "severity": "warning", 329 }, 330 }, 331 { 332 Alert: "DexNotReady", 333 Annotations: map[string]string{ 334 "message": fmt.Sprintf("dex deployment for Argo CD instance in namespace %s is not running", cr.Namespace), 335 }, 336 Expr: intstr.IntOrString{ 337 Type: intstr.String, 338 StrVal: fmt.Sprintf("kube_deployment_status_replicas{deployment=\"%s\", namespace=\"%s\"} != kube_deployment_status_replicas_ready{deployment=\"%s\", namespace=\"%s\"} ", fmt.Sprintf(cr.Name+"-dex-server"), cr.Namespace, fmt.Sprintf(cr.Name+"-dex-server"), cr.Namespace), 339 }, 340 For: "5m", 341 Labels: map[string]string{ 342 "severity": "warning", 343 }, 344 }, 345 { 346 Alert: "NotificationsControllerNotReady", 347 Annotations: map[string]string{ 348 "message": fmt.Sprintf("notifications controller deployment for Argo CD instance in namespace %s is not running", cr.Namespace), 349 }, 350 Expr: intstr.IntOrString{ 351 Type: intstr.String, 352 StrVal: fmt.Sprintf("kube_deployment_status_replicas{deployment=\"%s\", namespace=\"%s\"} != kube_deployment_status_replicas_ready{deployment=\"%s\", namespace=\"%s\"} ", fmt.Sprintf(cr.Name+"-notifications-controller"), cr.Namespace, fmt.Sprintf(cr.Name+"-notifications-controller"), cr.Namespace), 353 }, 354 For: "5m", 355 Labels: map[string]string{ 356 "severity": "warning", 357 }, 358 }, 359 { 360 Alert: "RedisNotReady", 361 Annotations: map[string]string{ 362 "message": fmt.Sprintf("redis deployment for Argo CD instance in namespace %s is not running", cr.Namespace), 363 }, 364 Expr: intstr.IntOrString{ 365 Type: intstr.String, 366 StrVal: fmt.Sprintf("kube_deployment_status_replicas{deployment=\"%s\", namespace=\"%s\"} != kube_deployment_status_replicas_ready{deployment=\"%s\", namespace=\"%s\"} ", fmt.Sprintf(cr.Name+"-redis"), cr.Namespace, fmt.Sprintf(cr.Name+"-redis"), cr.Namespace), 367 }, 368 For: "5m", 369 Labels: map[string]string{ 370 "severity": "warning", 371 }, 372 }, 373 }, 374 }, 375 } 376 promRule.Spec.Groups = ruleGroups 377 378 if err := controllerutil.SetControllerReference(cr, promRule, r.Scheme); err != nil { 379 return err 380 } 381 382 log.Info("instance monitoring enabled, creating component status tracking prometheusRule") 383 return r.Client.Create(context.TODO(), promRule) // Create PrometheusRule 384 } 385 386 // newPrometheusRule returns an empty PrometheusRule 387 func newPrometheusRule(namespace, alertRuleName string) *monitoringv1.PrometheusRule { 388 389 promRule := &monitoringv1.PrometheusRule{ 390 ObjectMeta: metav1.ObjectMeta{ 391 Name: alertRuleName, 392 Namespace: namespace, 393 }, 394 Spec: monitoringv1.PrometheusRuleSpec{}, 395 } 396 return promRule 397 }