github.com/argoproj/argo-cd/v3@v3.2.1/controller/metrics/clustercollector.go (about) 1 package metrics 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 "github.com/argoproj/gitops-engine/pkg/cache" 9 "github.com/prometheus/client_golang/prometheus" 10 log "github.com/sirupsen/logrus" 11 12 argoappv1 "github.com/argoproj/argo-cd/v3/pkg/apis/application/v1alpha1" 13 metricsutil "github.com/argoproj/argo-cd/v3/util/metrics" 14 ) 15 16 const ( 17 metricsCollectionInterval = 30 * time.Second 18 metricsCollectionTimeout = 10 * time.Second 19 ) 20 21 var ( 22 descClusterDefaultLabels = []string{"server"} 23 24 descClusterLabels *prometheus.Desc 25 26 descClusterInfo = prometheus.NewDesc( 27 "argocd_cluster_info", 28 "Information about cluster.", 29 append(descClusterDefaultLabels, "k8s_version", "name"), 30 nil, 31 ) 32 descClusterCacheResources = prometheus.NewDesc( 33 "argocd_cluster_api_resource_objects", 34 "Number of k8s resource objects in the cache.", 35 descClusterDefaultLabels, 36 nil, 37 ) 38 descClusterAPIs = prometheus.NewDesc( 39 "argocd_cluster_api_resources", 40 "Number of monitored kubernetes API resources.", 41 descClusterDefaultLabels, 42 nil, 43 ) 44 descClusterCacheAgeSeconds = prometheus.NewDesc( 45 "argocd_cluster_cache_age_seconds", 46 "Cluster cache age in seconds.", 47 descClusterDefaultLabels, 48 nil, 49 ) 50 descClusterConnectionStatus = prometheus.NewDesc( 51 "argocd_cluster_connection_status", 52 "The k8s cluster current connection status.", 53 append(descClusterDefaultLabels, "k8s_version"), 54 nil, 55 ) 56 ) 57 58 type HasClustersInfo interface { 59 GetClustersInfo() []cache.ClusterInfo 60 } 61 62 type ClusterLister func(ctx context.Context) (*argoappv1.ClusterList, error) 63 64 type clusterCollector struct { 65 infoSource HasClustersInfo 66 lock sync.RWMutex 67 clusterLabels []string 68 clusterLister ClusterLister 69 70 latestInfo []*clusterData 71 } 72 73 type clusterData struct { 74 info *cache.ClusterInfo 75 cluster *argoappv1.Cluster 76 } 77 78 func NewClusterCollector(ctx context.Context, source HasClustersInfo, clusterLister ClusterLister, clusterLabels []string) prometheus.Collector { 79 if len(clusterLabels) > 0 { 80 normalizedClusterLabels := metricsutil.NormalizeLabels("label", clusterLabels) 81 descClusterLabels = prometheus.NewDesc( 82 "argocd_cluster_labels", 83 "Argo Cluster labels converted to Prometheus labels", 84 append(append(descClusterDefaultLabels, "name"), normalizedClusterLabels...), 85 nil, 86 ) 87 } 88 89 collector := &clusterCollector{ 90 infoSource: source, 91 clusterLabels: clusterLabels, 92 clusterLister: clusterLister, 93 lock: sync.RWMutex{}, 94 } 95 96 collector.setClusterData() 97 go collector.run(ctx) 98 99 return collector 100 } 101 102 func (c *clusterCollector) run(ctx context.Context) { 103 //nolint:staticcheck // FIXME: complains about SA1015 104 tick := time.Tick(metricsCollectionInterval) 105 for { 106 select { 107 case <-ctx.Done(): 108 case <-tick: 109 c.setClusterData() 110 } 111 } 112 } 113 114 func (c *clusterCollector) setClusterData() { 115 if clusterData, err := c.getClusterData(); err == nil { 116 c.lock.Lock() 117 c.latestInfo = clusterData 118 c.lock.Unlock() 119 } else { 120 log.Warnf("error collecting cluster metrics: %v", err) 121 } 122 } 123 124 func (c *clusterCollector) getClusterData() ([]*clusterData, error) { 125 clusterDatas := []*clusterData{} 126 clusterInfos := c.infoSource.GetClustersInfo() 127 128 ctx, cancel := context.WithTimeout(context.Background(), metricsCollectionTimeout) 129 defer cancel() 130 clusters, err := c.clusterLister(ctx) 131 if err != nil { 132 return nil, err 133 } 134 135 clusterMap := map[string]*argoappv1.Cluster{} 136 for i, cluster := range clusters.Items { 137 clusterMap[cluster.Server] = &clusters.Items[i] 138 } 139 140 // Base the cluster data on the ClusterInfo because it only contains the 141 // clusters managed by this controller instance 142 for i, info := range clusterInfos { 143 cluster, ok := clusterMap[info.Server] 144 if !ok { 145 // This should not happen, but we cannot emit incomplete metrics, so we skip this cluster 146 log.WithField("server", info.Server).Warnf("could find cluster for metrics collection") 147 continue 148 } 149 clusterDatas = append(clusterDatas, &clusterData{ 150 info: &clusterInfos[i], 151 cluster: cluster, 152 }) 153 } 154 return clusterDatas, nil 155 } 156 157 // Describe implements the prometheus.Collector interface 158 func (c *clusterCollector) Describe(ch chan<- *prometheus.Desc) { 159 ch <- descClusterInfo 160 ch <- descClusterCacheResources 161 ch <- descClusterAPIs 162 ch <- descClusterCacheAgeSeconds 163 ch <- descClusterConnectionStatus 164 if len(c.clusterLabels) > 0 { 165 ch <- descClusterLabels 166 } 167 } 168 169 func (c *clusterCollector) Collect(ch chan<- prometheus.Metric) { 170 c.lock.RLock() 171 latestInfo := c.latestInfo 172 c.lock.RUnlock() 173 174 now := time.Now() 175 for _, clusterData := range latestInfo { 176 info := clusterData.info 177 name := clusterData.cluster.Name 178 labels := clusterData.cluster.Labels 179 180 defaultValues := []string{info.Server} 181 ch <- prometheus.MustNewConstMetric(descClusterInfo, prometheus.GaugeValue, 1, append(defaultValues, info.K8SVersion, name)...) 182 ch <- prometheus.MustNewConstMetric(descClusterCacheResources, prometheus.GaugeValue, float64(info.ResourcesCount), defaultValues...) 183 ch <- prometheus.MustNewConstMetric(descClusterAPIs, prometheus.GaugeValue, float64(info.APIsCount), defaultValues...) 184 cacheAgeSeconds := -1 185 if info.LastCacheSyncTime != nil { 186 cacheAgeSeconds = int(now.Sub(*info.LastCacheSyncTime).Seconds()) 187 } 188 ch <- prometheus.MustNewConstMetric(descClusterCacheAgeSeconds, prometheus.GaugeValue, float64(cacheAgeSeconds), defaultValues...) 189 ch <- prometheus.MustNewConstMetric(descClusterConnectionStatus, prometheus.GaugeValue, boolFloat64(info.SyncError == nil), append(defaultValues, info.K8SVersion)...) 190 191 if len(c.clusterLabels) > 0 && labels != nil { 192 labelValues := []string{} 193 labelValues = append(labelValues, info.Server, name) 194 for _, desiredLabel := range c.clusterLabels { 195 value := labels[desiredLabel] 196 labelValues = append(labelValues, value) 197 } 198 ch <- prometheus.MustNewConstMetric(descClusterLabels, prometheus.GaugeValue, 1, labelValues...) 199 } 200 } 201 }