github.com/argoproj/argo-cd@v1.8.7/controller/metrics/metrics.go (about) 1 package metrics 2 3 import ( 4 "context" 5 "net/http" 6 "os" 7 "strconv" 8 "time" 9 10 "github.com/argoproj/gitops-engine/pkg/health" 11 "github.com/prometheus/client_golang/prometheus" 12 "github.com/prometheus/client_golang/prometheus/promhttp" 13 log "github.com/sirupsen/logrus" 14 "k8s.io/apimachinery/pkg/labels" 15 16 argoappv1 "github.com/argoproj/argo-cd/pkg/apis/application/v1alpha1" 17 applister "github.com/argoproj/argo-cd/pkg/client/listers/application/v1alpha1" 18 "github.com/argoproj/argo-cd/util/git" 19 "github.com/argoproj/argo-cd/util/healthz" 20 ) 21 22 type MetricsServer struct { 23 *http.Server 24 syncCounter *prometheus.CounterVec 25 kubectlExecCounter *prometheus.CounterVec 26 kubectlExecPendingGauge *prometheus.GaugeVec 27 k8sRequestCounter *prometheus.CounterVec 28 clusterEventsCounter *prometheus.CounterVec 29 redisRequestCounter *prometheus.CounterVec 30 reconcileHistogram *prometheus.HistogramVec 31 redisRequestHistogram *prometheus.HistogramVec 32 registry *prometheus.Registry 33 hostname string 34 } 35 36 const ( 37 // MetricsPath is the endpoint to collect application metrics 38 MetricsPath = "/metrics" 39 // EnvVarLegacyControllerMetrics is a env var to re-enable deprecated prometheus metrics 40 EnvVarLegacyControllerMetrics = "ARGOCD_LEGACY_CONTROLLER_METRICS" 41 ) 42 43 // Follow Prometheus naming practices 44 // https://prometheus.io/docs/practices/naming/ 45 var ( 46 descAppDefaultLabels = []string{"namespace", "name", "project"} 47 48 descAppInfo = prometheus.NewDesc( 49 "argocd_app_info", 50 "Information about application.", 51 append(descAppDefaultLabels, "repo", "dest_server", "dest_namespace", "sync_status", "health_status", "operation"), 52 nil, 53 ) 54 // DEPRECATED 55 descAppCreated = prometheus.NewDesc( 56 "argocd_app_created_time", 57 "Creation time in unix timestamp for an application.", 58 descAppDefaultLabels, 59 nil, 60 ) 61 // DEPRECATED: superceded by sync_status label in argocd_app_info 62 descAppSyncStatusCode = prometheus.NewDesc( 63 "argocd_app_sync_status", 64 "The application current sync status.", 65 append(descAppDefaultLabels, "sync_status"), 66 nil, 67 ) 68 // DEPRECATED: superceded by health_status label in argocd_app_info 69 descAppHealthStatus = prometheus.NewDesc( 70 "argocd_app_health_status", 71 "The application current health status.", 72 append(descAppDefaultLabels, "health_status"), 73 nil, 74 ) 75 76 syncCounter = prometheus.NewCounterVec( 77 prometheus.CounterOpts{ 78 Name: "argocd_app_sync_total", 79 Help: "Number of application syncs.", 80 }, 81 append(descAppDefaultLabels, "dest_server", "phase"), 82 ) 83 84 k8sRequestCounter = prometheus.NewCounterVec( 85 prometheus.CounterOpts{ 86 Name: "argocd_app_k8s_request_total", 87 Help: "Number of kubernetes requests executed during application reconciliation.", 88 }, 89 append(descAppDefaultLabels, "server", "response_code", "verb", "resource_kind", "resource_namespace"), 90 ) 91 92 kubectlExecCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ 93 Name: "argocd_kubectl_exec_total", 94 Help: "Number of kubectl executions", 95 }, []string{"hostname", "command"}) 96 97 kubectlExecPendingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 98 Name: "argocd_kubectl_exec_pending", 99 Help: "Number of pending kubectl executions", 100 }, []string{"hostname", "command"}) 101 102 reconcileHistogram = prometheus.NewHistogramVec( 103 prometheus.HistogramOpts{ 104 Name: "argocd_app_reconcile", 105 Help: "Application reconciliation performance.", 106 // Buckets chosen after observing a ~2100ms mean reconcile time 107 Buckets: []float64{0.25, .5, 1, 2, 4, 8, 16}, 108 }, 109 []string{"namespace", "dest_server"}, 110 ) 111 112 clusterEventsCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ 113 Name: "argocd_cluster_events_total", 114 Help: "Number of processes k8s resource events.", 115 }, append(descClusterDefaultLabels, "group", "kind")) 116 117 redisRequestCounter = prometheus.NewCounterVec( 118 prometheus.CounterOpts{ 119 Name: "argocd_redis_request_total", 120 Help: "Number of kubernetes requests executed during application reconciliation.", 121 }, 122 []string{"hostname", "initiator", "failed"}, 123 ) 124 125 redisRequestHistogram = prometheus.NewHistogramVec( 126 prometheus.HistogramOpts{ 127 Name: "argocd_redis_request_duration", 128 Help: "Redis requests duration.", 129 Buckets: []float64{0.01, 0.05, 0.10, 0.25, .5, 1}, 130 }, 131 []string{"hostname", "initiator"}, 132 ) 133 ) 134 135 // NewMetricsServer returns a new prometheus server which collects application metrics 136 func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFilter func(obj interface{}) bool, healthCheck func(r *http.Request) error) (*MetricsServer, error) { 137 hostname, err := os.Hostname() 138 if err != nil { 139 return nil, err 140 } 141 mux := http.NewServeMux() 142 registry := NewAppRegistry(appLister, appFilter) 143 mux.Handle(MetricsPath, promhttp.HandlerFor(prometheus.Gatherers{ 144 // contains app controller specific metrics 145 registry, 146 // contains process, golang and controller workqueues metrics 147 prometheus.DefaultGatherer, 148 }, promhttp.HandlerOpts{})) 149 healthz.ServeHealthCheck(mux, healthCheck) 150 151 registry.MustRegister(syncCounter) 152 registry.MustRegister(k8sRequestCounter) 153 registry.MustRegister(kubectlExecCounter) 154 registry.MustRegister(kubectlExecPendingGauge) 155 registry.MustRegister(reconcileHistogram) 156 registry.MustRegister(clusterEventsCounter) 157 registry.MustRegister(redisRequestCounter) 158 registry.MustRegister(redisRequestHistogram) 159 160 return &MetricsServer{ 161 registry: registry, 162 Server: &http.Server{ 163 Addr: addr, 164 Handler: mux, 165 }, 166 syncCounter: syncCounter, 167 k8sRequestCounter: k8sRequestCounter, 168 kubectlExecCounter: kubectlExecCounter, 169 kubectlExecPendingGauge: kubectlExecPendingGauge, 170 reconcileHistogram: reconcileHistogram, 171 clusterEventsCounter: clusterEventsCounter, 172 redisRequestCounter: redisRequestCounter, 173 redisRequestHistogram: redisRequestHistogram, 174 hostname: hostname, 175 }, nil 176 } 177 178 func (m *MetricsServer) RegisterClustersInfoSource(ctx context.Context, source HasClustersInfo) { 179 collector := &clusterCollector{infoSource: source} 180 go collector.Run(ctx) 181 m.registry.MustRegister(collector) 182 } 183 184 // IncSync increments the sync counter for an application 185 func (m *MetricsServer) IncSync(app *argoappv1.Application, state *argoappv1.OperationState) { 186 if !state.Phase.Completed() { 187 return 188 } 189 m.syncCounter.WithLabelValues(app.Namespace, app.Name, app.Spec.GetProject(), app.Spec.Destination.Server, string(state.Phase)).Inc() 190 } 191 192 func (m *MetricsServer) IncKubectlExec(command string) { 193 m.kubectlExecCounter.WithLabelValues(m.hostname, command).Inc() 194 } 195 196 func (m *MetricsServer) IncKubectlExecPending(command string) { 197 m.kubectlExecPendingGauge.WithLabelValues(m.hostname, command).Inc() 198 } 199 200 func (m *MetricsServer) DecKubectlExecPending(command string) { 201 m.kubectlExecPendingGauge.WithLabelValues(m.hostname, command).Dec() 202 } 203 204 // IncClusterEventsCount increments the number of cluster events 205 func (m *MetricsServer) IncClusterEventsCount(server, group, kind string) { 206 m.clusterEventsCounter.WithLabelValues(server, group, kind).Inc() 207 } 208 209 // IncKubernetesRequest increments the kubernetes requests counter for an application 210 func (m *MetricsServer) IncKubernetesRequest(app *argoappv1.Application, server, statusCode, verb, resourceKind, resourceNamespace string) { 211 var namespace, name, project string 212 if app != nil { 213 namespace = app.Namespace 214 name = app.Name 215 project = app.Spec.GetProject() 216 } 217 m.k8sRequestCounter.WithLabelValues( 218 namespace, name, project, server, statusCode, 219 verb, resourceKind, resourceNamespace, 220 ).Inc() 221 } 222 223 func (m *MetricsServer) IncRedisRequest(failed bool) { 224 m.redisRequestCounter.WithLabelValues(m.hostname, "argocd-application-controller", strconv.FormatBool(failed)).Inc() 225 } 226 227 // ObserveRedisRequestDuration observes redis request duration 228 func (m *MetricsServer) ObserveRedisRequestDuration(duration time.Duration) { 229 m.redisRequestHistogram.WithLabelValues(m.hostname, "argocd-application-controller").Observe(duration.Seconds()) 230 } 231 232 // IncReconcile increments the reconcile counter for an application 233 func (m *MetricsServer) IncReconcile(app *argoappv1.Application, duration time.Duration) { 234 m.reconcileHistogram.WithLabelValues(app.Namespace, app.Spec.Destination.Server).Observe(duration.Seconds()) 235 } 236 237 type appCollector struct { 238 store applister.ApplicationLister 239 appFilter func(obj interface{}) bool 240 } 241 242 // NewAppCollector returns a prometheus collector for application metrics 243 func NewAppCollector(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool) prometheus.Collector { 244 return &appCollector{ 245 store: appLister, 246 appFilter: appFilter, 247 } 248 } 249 250 // NewAppRegistry creates a new prometheus registry that collects applications 251 func NewAppRegistry(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool) *prometheus.Registry { 252 registry := prometheus.NewRegistry() 253 registry.MustRegister(NewAppCollector(appLister, appFilter)) 254 return registry 255 } 256 257 // Describe implements the prometheus.Collector interface 258 func (c *appCollector) Describe(ch chan<- *prometheus.Desc) { 259 ch <- descAppInfo 260 ch <- descAppSyncStatusCode 261 ch <- descAppHealthStatus 262 } 263 264 // Collect implements the prometheus.Collector interface 265 func (c *appCollector) Collect(ch chan<- prometheus.Metric) { 266 apps, err := c.store.List(labels.NewSelector()) 267 if err != nil { 268 log.Warnf("Failed to collect applications: %v", err) 269 return 270 } 271 for _, app := range apps { 272 if c.appFilter(app) { 273 collectApps(ch, app) 274 } 275 } 276 } 277 278 func boolFloat64(b bool) float64 { 279 if b { 280 return 1 281 } 282 return 0 283 } 284 285 func collectApps(ch chan<- prometheus.Metric, app *argoappv1.Application) { 286 addConstMetric := func(desc *prometheus.Desc, t prometheus.ValueType, v float64, lv ...string) { 287 project := app.Spec.GetProject() 288 lv = append([]string{app.Namespace, app.Name, project}, lv...) 289 ch <- prometheus.MustNewConstMetric(desc, t, v, lv...) 290 } 291 addGauge := func(desc *prometheus.Desc, v float64, lv ...string) { 292 addConstMetric(desc, prometheus.GaugeValue, v, lv...) 293 } 294 295 var operation string 296 if app.DeletionTimestamp != nil { 297 operation = "delete" 298 } else if app.Operation != nil && app.Operation.Sync != nil { 299 operation = "sync" 300 } 301 syncStatus := app.Status.Sync.Status 302 if syncStatus == "" { 303 syncStatus = argoappv1.SyncStatusCodeUnknown 304 } 305 healthStatus := app.Status.Health.Status 306 if healthStatus == "" { 307 healthStatus = health.HealthStatusUnknown 308 } 309 310 addGauge(descAppInfo, 1, git.NormalizeGitURL(app.Spec.Source.RepoURL), app.Spec.Destination.Server, app.Spec.Destination.Namespace, string(syncStatus), string(healthStatus), operation) 311 312 // Deprecated controller metrics 313 if os.Getenv(EnvVarLegacyControllerMetrics) == "true" { 314 addGauge(descAppCreated, float64(app.CreationTimestamp.Unix())) 315 316 addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeSynced), string(argoappv1.SyncStatusCodeSynced)) 317 addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeOutOfSync), string(argoappv1.SyncStatusCodeOutOfSync)) 318 addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeUnknown || syncStatus == ""), string(argoappv1.SyncStatusCodeUnknown)) 319 320 healthStatus := app.Status.Health.Status 321 addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusUnknown || healthStatus == ""), string(health.HealthStatusUnknown)) 322 addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusProgressing), string(health.HealthStatusProgressing)) 323 addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusSuspended), string(health.HealthStatusSuspended)) 324 addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusHealthy), string(health.HealthStatusHealthy)) 325 addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusDegraded), string(health.HealthStatusDegraded)) 326 addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusMissing), string(health.HealthStatusMissing)) 327 } 328 }