github.com/argoproj/argo-cd@v1.8.7/controller/metrics/metrics.go (about)

     1  package metrics
     2  
     3  import (
     4  	"context"
     5  	"net/http"
     6  	"os"
     7  	"strconv"
     8  	"time"
     9  
    10  	"github.com/argoproj/gitops-engine/pkg/health"
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/prometheus/client_golang/prometheus/promhttp"
    13  	log "github.com/sirupsen/logrus"
    14  	"k8s.io/apimachinery/pkg/labels"
    15  
    16  	argoappv1 "github.com/argoproj/argo-cd/pkg/apis/application/v1alpha1"
    17  	applister "github.com/argoproj/argo-cd/pkg/client/listers/application/v1alpha1"
    18  	"github.com/argoproj/argo-cd/util/git"
    19  	"github.com/argoproj/argo-cd/util/healthz"
    20  )
    21  
    22  type MetricsServer struct {
    23  	*http.Server
    24  	syncCounter             *prometheus.CounterVec
    25  	kubectlExecCounter      *prometheus.CounterVec
    26  	kubectlExecPendingGauge *prometheus.GaugeVec
    27  	k8sRequestCounter       *prometheus.CounterVec
    28  	clusterEventsCounter    *prometheus.CounterVec
    29  	redisRequestCounter     *prometheus.CounterVec
    30  	reconcileHistogram      *prometheus.HistogramVec
    31  	redisRequestHistogram   *prometheus.HistogramVec
    32  	registry                *prometheus.Registry
    33  	hostname                string
    34  }
    35  
    36  const (
    37  	// MetricsPath is the endpoint to collect application metrics
    38  	MetricsPath = "/metrics"
    39  	// EnvVarLegacyControllerMetrics is a env var to re-enable deprecated prometheus metrics
    40  	EnvVarLegacyControllerMetrics = "ARGOCD_LEGACY_CONTROLLER_METRICS"
    41  )
    42  
    43  // Follow Prometheus naming practices
    44  // https://prometheus.io/docs/practices/naming/
    45  var (
    46  	descAppDefaultLabels = []string{"namespace", "name", "project"}
    47  
    48  	descAppInfo = prometheus.NewDesc(
    49  		"argocd_app_info",
    50  		"Information about application.",
    51  		append(descAppDefaultLabels, "repo", "dest_server", "dest_namespace", "sync_status", "health_status", "operation"),
    52  		nil,
    53  	)
    54  	// DEPRECATED
    55  	descAppCreated = prometheus.NewDesc(
    56  		"argocd_app_created_time",
    57  		"Creation time in unix timestamp for an application.",
    58  		descAppDefaultLabels,
    59  		nil,
    60  	)
    61  	// DEPRECATED: superceded by sync_status label in argocd_app_info
    62  	descAppSyncStatusCode = prometheus.NewDesc(
    63  		"argocd_app_sync_status",
    64  		"The application current sync status.",
    65  		append(descAppDefaultLabels, "sync_status"),
    66  		nil,
    67  	)
    68  	// DEPRECATED: superceded by health_status label in argocd_app_info
    69  	descAppHealthStatus = prometheus.NewDesc(
    70  		"argocd_app_health_status",
    71  		"The application current health status.",
    72  		append(descAppDefaultLabels, "health_status"),
    73  		nil,
    74  	)
    75  
    76  	syncCounter = prometheus.NewCounterVec(
    77  		prometheus.CounterOpts{
    78  			Name: "argocd_app_sync_total",
    79  			Help: "Number of application syncs.",
    80  		},
    81  		append(descAppDefaultLabels, "dest_server", "phase"),
    82  	)
    83  
    84  	k8sRequestCounter = prometheus.NewCounterVec(
    85  		prometheus.CounterOpts{
    86  			Name: "argocd_app_k8s_request_total",
    87  			Help: "Number of kubernetes requests executed during application reconciliation.",
    88  		},
    89  		append(descAppDefaultLabels, "server", "response_code", "verb", "resource_kind", "resource_namespace"),
    90  	)
    91  
    92  	kubectlExecCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
    93  		Name: "argocd_kubectl_exec_total",
    94  		Help: "Number of kubectl executions",
    95  	}, []string{"hostname", "command"})
    96  
    97  	kubectlExecPendingGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
    98  		Name: "argocd_kubectl_exec_pending",
    99  		Help: "Number of pending kubectl executions",
   100  	}, []string{"hostname", "command"})
   101  
   102  	reconcileHistogram = prometheus.NewHistogramVec(
   103  		prometheus.HistogramOpts{
   104  			Name: "argocd_app_reconcile",
   105  			Help: "Application reconciliation performance.",
   106  			// Buckets chosen after observing a ~2100ms mean reconcile time
   107  			Buckets: []float64{0.25, .5, 1, 2, 4, 8, 16},
   108  		},
   109  		[]string{"namespace", "dest_server"},
   110  	)
   111  
   112  	clusterEventsCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
   113  		Name: "argocd_cluster_events_total",
   114  		Help: "Number of processes k8s resource events.",
   115  	}, append(descClusterDefaultLabels, "group", "kind"))
   116  
   117  	redisRequestCounter = prometheus.NewCounterVec(
   118  		prometheus.CounterOpts{
   119  			Name: "argocd_redis_request_total",
   120  			Help: "Number of kubernetes requests executed during application reconciliation.",
   121  		},
   122  		[]string{"hostname", "initiator", "failed"},
   123  	)
   124  
   125  	redisRequestHistogram = prometheus.NewHistogramVec(
   126  		prometheus.HistogramOpts{
   127  			Name:    "argocd_redis_request_duration",
   128  			Help:    "Redis requests duration.",
   129  			Buckets: []float64{0.01, 0.05, 0.10, 0.25, .5, 1},
   130  		},
   131  		[]string{"hostname", "initiator"},
   132  	)
   133  )
   134  
   135  // NewMetricsServer returns a new prometheus server which collects application metrics
   136  func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFilter func(obj interface{}) bool, healthCheck func(r *http.Request) error) (*MetricsServer, error) {
   137  	hostname, err := os.Hostname()
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  	mux := http.NewServeMux()
   142  	registry := NewAppRegistry(appLister, appFilter)
   143  	mux.Handle(MetricsPath, promhttp.HandlerFor(prometheus.Gatherers{
   144  		// contains app controller specific metrics
   145  		registry,
   146  		// contains process, golang and controller workqueues metrics
   147  		prometheus.DefaultGatherer,
   148  	}, promhttp.HandlerOpts{}))
   149  	healthz.ServeHealthCheck(mux, healthCheck)
   150  
   151  	registry.MustRegister(syncCounter)
   152  	registry.MustRegister(k8sRequestCounter)
   153  	registry.MustRegister(kubectlExecCounter)
   154  	registry.MustRegister(kubectlExecPendingGauge)
   155  	registry.MustRegister(reconcileHistogram)
   156  	registry.MustRegister(clusterEventsCounter)
   157  	registry.MustRegister(redisRequestCounter)
   158  	registry.MustRegister(redisRequestHistogram)
   159  
   160  	return &MetricsServer{
   161  		registry: registry,
   162  		Server: &http.Server{
   163  			Addr:    addr,
   164  			Handler: mux,
   165  		},
   166  		syncCounter:             syncCounter,
   167  		k8sRequestCounter:       k8sRequestCounter,
   168  		kubectlExecCounter:      kubectlExecCounter,
   169  		kubectlExecPendingGauge: kubectlExecPendingGauge,
   170  		reconcileHistogram:      reconcileHistogram,
   171  		clusterEventsCounter:    clusterEventsCounter,
   172  		redisRequestCounter:     redisRequestCounter,
   173  		redisRequestHistogram:   redisRequestHistogram,
   174  		hostname:                hostname,
   175  	}, nil
   176  }
   177  
   178  func (m *MetricsServer) RegisterClustersInfoSource(ctx context.Context, source HasClustersInfo) {
   179  	collector := &clusterCollector{infoSource: source}
   180  	go collector.Run(ctx)
   181  	m.registry.MustRegister(collector)
   182  }
   183  
   184  // IncSync increments the sync counter for an application
   185  func (m *MetricsServer) IncSync(app *argoappv1.Application, state *argoappv1.OperationState) {
   186  	if !state.Phase.Completed() {
   187  		return
   188  	}
   189  	m.syncCounter.WithLabelValues(app.Namespace, app.Name, app.Spec.GetProject(), app.Spec.Destination.Server, string(state.Phase)).Inc()
   190  }
   191  
   192  func (m *MetricsServer) IncKubectlExec(command string) {
   193  	m.kubectlExecCounter.WithLabelValues(m.hostname, command).Inc()
   194  }
   195  
   196  func (m *MetricsServer) IncKubectlExecPending(command string) {
   197  	m.kubectlExecPendingGauge.WithLabelValues(m.hostname, command).Inc()
   198  }
   199  
   200  func (m *MetricsServer) DecKubectlExecPending(command string) {
   201  	m.kubectlExecPendingGauge.WithLabelValues(m.hostname, command).Dec()
   202  }
   203  
   204  // IncClusterEventsCount increments the number of cluster events
   205  func (m *MetricsServer) IncClusterEventsCount(server, group, kind string) {
   206  	m.clusterEventsCounter.WithLabelValues(server, group, kind).Inc()
   207  }
   208  
   209  // IncKubernetesRequest increments the kubernetes requests counter for an application
   210  func (m *MetricsServer) IncKubernetesRequest(app *argoappv1.Application, server, statusCode, verb, resourceKind, resourceNamespace string) {
   211  	var namespace, name, project string
   212  	if app != nil {
   213  		namespace = app.Namespace
   214  		name = app.Name
   215  		project = app.Spec.GetProject()
   216  	}
   217  	m.k8sRequestCounter.WithLabelValues(
   218  		namespace, name, project, server, statusCode,
   219  		verb, resourceKind, resourceNamespace,
   220  	).Inc()
   221  }
   222  
   223  func (m *MetricsServer) IncRedisRequest(failed bool) {
   224  	m.redisRequestCounter.WithLabelValues(m.hostname, "argocd-application-controller", strconv.FormatBool(failed)).Inc()
   225  }
   226  
   227  // ObserveRedisRequestDuration observes redis request duration
   228  func (m *MetricsServer) ObserveRedisRequestDuration(duration time.Duration) {
   229  	m.redisRequestHistogram.WithLabelValues(m.hostname, "argocd-application-controller").Observe(duration.Seconds())
   230  }
   231  
   232  // IncReconcile increments the reconcile counter for an application
   233  func (m *MetricsServer) IncReconcile(app *argoappv1.Application, duration time.Duration) {
   234  	m.reconcileHistogram.WithLabelValues(app.Namespace, app.Spec.Destination.Server).Observe(duration.Seconds())
   235  }
   236  
   237  type appCollector struct {
   238  	store     applister.ApplicationLister
   239  	appFilter func(obj interface{}) bool
   240  }
   241  
   242  // NewAppCollector returns a prometheus collector for application metrics
   243  func NewAppCollector(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool) prometheus.Collector {
   244  	return &appCollector{
   245  		store:     appLister,
   246  		appFilter: appFilter,
   247  	}
   248  }
   249  
   250  // NewAppRegistry creates a new prometheus registry that collects applications
   251  func NewAppRegistry(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool) *prometheus.Registry {
   252  	registry := prometheus.NewRegistry()
   253  	registry.MustRegister(NewAppCollector(appLister, appFilter))
   254  	return registry
   255  }
   256  
   257  // Describe implements the prometheus.Collector interface
   258  func (c *appCollector) Describe(ch chan<- *prometheus.Desc) {
   259  	ch <- descAppInfo
   260  	ch <- descAppSyncStatusCode
   261  	ch <- descAppHealthStatus
   262  }
   263  
   264  // Collect implements the prometheus.Collector interface
   265  func (c *appCollector) Collect(ch chan<- prometheus.Metric) {
   266  	apps, err := c.store.List(labels.NewSelector())
   267  	if err != nil {
   268  		log.Warnf("Failed to collect applications: %v", err)
   269  		return
   270  	}
   271  	for _, app := range apps {
   272  		if c.appFilter(app) {
   273  			collectApps(ch, app)
   274  		}
   275  	}
   276  }
   277  
   278  func boolFloat64(b bool) float64 {
   279  	if b {
   280  		return 1
   281  	}
   282  	return 0
   283  }
   284  
   285  func collectApps(ch chan<- prometheus.Metric, app *argoappv1.Application) {
   286  	addConstMetric := func(desc *prometheus.Desc, t prometheus.ValueType, v float64, lv ...string) {
   287  		project := app.Spec.GetProject()
   288  		lv = append([]string{app.Namespace, app.Name, project}, lv...)
   289  		ch <- prometheus.MustNewConstMetric(desc, t, v, lv...)
   290  	}
   291  	addGauge := func(desc *prometheus.Desc, v float64, lv ...string) {
   292  		addConstMetric(desc, prometheus.GaugeValue, v, lv...)
   293  	}
   294  
   295  	var operation string
   296  	if app.DeletionTimestamp != nil {
   297  		operation = "delete"
   298  	} else if app.Operation != nil && app.Operation.Sync != nil {
   299  		operation = "sync"
   300  	}
   301  	syncStatus := app.Status.Sync.Status
   302  	if syncStatus == "" {
   303  		syncStatus = argoappv1.SyncStatusCodeUnknown
   304  	}
   305  	healthStatus := app.Status.Health.Status
   306  	if healthStatus == "" {
   307  		healthStatus = health.HealthStatusUnknown
   308  	}
   309  
   310  	addGauge(descAppInfo, 1, git.NormalizeGitURL(app.Spec.Source.RepoURL), app.Spec.Destination.Server, app.Spec.Destination.Namespace, string(syncStatus), string(healthStatus), operation)
   311  
   312  	// Deprecated controller metrics
   313  	if os.Getenv(EnvVarLegacyControllerMetrics) == "true" {
   314  		addGauge(descAppCreated, float64(app.CreationTimestamp.Unix()))
   315  
   316  		addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeSynced), string(argoappv1.SyncStatusCodeSynced))
   317  		addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeOutOfSync), string(argoappv1.SyncStatusCodeOutOfSync))
   318  		addGauge(descAppSyncStatusCode, boolFloat64(syncStatus == argoappv1.SyncStatusCodeUnknown || syncStatus == ""), string(argoappv1.SyncStatusCodeUnknown))
   319  
   320  		healthStatus := app.Status.Health.Status
   321  		addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusUnknown || healthStatus == ""), string(health.HealthStatusUnknown))
   322  		addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusProgressing), string(health.HealthStatusProgressing))
   323  		addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusSuspended), string(health.HealthStatusSuspended))
   324  		addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusHealthy), string(health.HealthStatusHealthy))
   325  		addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusDegraded), string(health.HealthStatusDegraded))
   326  		addGauge(descAppHealthStatus, boolFloat64(healthStatus == health.HealthStatusMissing), string(health.HealthStatusMissing))
   327  	}
   328  }