github.com/polarismesh/polaris@v1.17.8/cache/service/instance_metrics.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package service
    19  
    20  import (
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	"go.uber.org/zap"
    23  
    24  	types "github.com/polarismesh/polaris/cache/api"
    25  	"github.com/polarismesh/polaris/common/metrics"
    26  	"github.com/polarismesh/polaris/common/model"
    27  	"github.com/polarismesh/polaris/plugin"
    28  )
    29  
    30  var preServiceInfos = map[string]map[string]struct{}{}
    31  
    32  func (ic *instanceCache) reportMetricsInfo() {
    33  	cacheMgr := ic.BaseCache.CacheMgr
    34  	tmpServiceInfos := map[string]map[string]struct{}{}
    35  
    36  	allServices := map[string]map[string]struct{}{}
    37  	onlineService := map[string]map[string]struct{}{}
    38  	offlineService := map[string]map[string]struct{}{}
    39  	abnormalService := map[string]map[string]struct{}{}
    40  	serviceCache := cacheMgr.GetCacher(types.CacheService).(*serviceCache)
    41  	metricValues := make([]metrics.DiscoveryMetric, 0, 32)
    42  
    43  	_ = serviceCache.IteratorServices(func(key string, svc *model.Service) (bool, error) {
    44  		if _, ok := tmpServiceInfos[svc.Namespace]; !ok {
    45  			tmpServiceInfos[svc.Namespace] = map[string]struct{}{}
    46  		}
    47  		tmpServiceInfos[svc.Namespace][svc.Name] = struct{}{}
    48  
    49  		if _, ok := allServices[svc.Namespace]; !ok {
    50  			allServices[svc.Namespace] = map[string]struct{}{}
    51  		}
    52  		allServices[svc.Namespace][svc.Name] = struct{}{}
    53  
    54  		if _, ok := offlineService[svc.Namespace]; !ok {
    55  			offlineService[svc.Namespace] = map[string]struct{}{}
    56  		}
    57  		countInfo := ic.GetInstancesCountByServiceID(svc.ID)
    58  		if countInfo.TotalInstanceCount == 0 {
    59  			offlineService[svc.Namespace][svc.Name] = struct{}{}
    60  		}
    61  		return true, nil
    62  	})
    63  
    64  	// instance count metrics
    65  	ic.instanceCounts.Range(func(serviceID string, countInfo *model.InstanceCount) bool {
    66  		svc := serviceCache.GetServiceByID(serviceID)
    67  		if svc == nil {
    68  			log.Debug("[Cache][Instance] report metrics get service not found", zap.String("svc-id", serviceID))
    69  			return true
    70  		}
    71  
    72  		if _, ok := onlineService[svc.Namespace]; !ok {
    73  			onlineService[svc.Namespace] = map[string]struct{}{}
    74  		}
    75  		if _, ok := abnormalService[svc.Namespace]; !ok {
    76  			abnormalService[svc.Namespace] = map[string]struct{}{}
    77  		}
    78  
    79  		if countInfo.TotalInstanceCount != 0 && countInfo.HealthyInstanceCount == 0 {
    80  			abnormalService[svc.Namespace][svc.Name] = struct{}{}
    81  		}
    82  		if countInfo.TotalInstanceCount != 0 && countInfo.HealthyInstanceCount > 0 {
    83  			onlineService[svc.Namespace][svc.Name] = struct{}{}
    84  		}
    85  
    86  		metricValues = append(metricValues, metrics.DiscoveryMetric{
    87  			Type:     metrics.InstanceMetrics,
    88  			Total:    int64(countInfo.TotalInstanceCount),
    89  			Abnormal: int64(countInfo.TotalInstanceCount - countInfo.HealthyInstanceCount),
    90  			Online:   int64(countInfo.HealthyInstanceCount),
    91  			Isolate:  int64(countInfo.IsolateInstanceCount),
    92  			Labels: map[string]string{
    93  				metrics.LabelNamespace: svc.Namespace,
    94  				metrics.LabelService:   svc.Name,
    95  			},
    96  		})
    97  
    98  		return true
    99  	})
   100  
   101  	for ns := range allServices {
   102  		metricValues = append(metricValues, metrics.DiscoveryMetric{
   103  			Type:     metrics.ServiceMetrics,
   104  			Total:    int64(len(allServices[ns])),
   105  			Abnormal: int64(len(abnormalService[ns])),
   106  			Offline:  int64(len(offlineService[ns])),
   107  			Online:   int64(len(onlineService[ns])),
   108  			Labels: map[string]string{
   109  				metrics.LabelNamespace: ns,
   110  			},
   111  		})
   112  	}
   113  
   114  	cleanExpireServiceMetricLabel(preServiceInfos, tmpServiceInfos)
   115  	preServiceInfos = tmpServiceInfos
   116  	plugin.GetStatis().ReportDiscoveryMetrics(metricValues...)
   117  }
   118  
   119  func cleanExpireServiceMetricLabel(pre, curr map[string]map[string]struct{}) {
   120  	if len(pre) == 0 {
   121  		return
   122  	}
   123  
   124  	var (
   125  		removeNs = map[string]struct{}{}
   126  		remove   = map[string]map[string]struct{}{}
   127  	)
   128  
   129  	for ns, services := range pre {
   130  		if _, ok := curr[ns]; !ok {
   131  			removeNs[ns] = struct{}{}
   132  		}
   133  		if _, ok := remove[ns]; !ok {
   134  			remove[ns] = map[string]struct{}{}
   135  		}
   136  		for service := range services {
   137  			if _, ok := curr[ns][service]; !ok {
   138  				remove[ns][service] = struct{}{}
   139  			}
   140  		}
   141  	}
   142  
   143  	for ns := range removeNs {
   144  		metrics.GetServiceCount().Delete(prometheus.Labels{
   145  			metrics.LabelNamespace: ns,
   146  		})
   147  		metrics.GetServiceOfflineCountl().Delete(prometheus.Labels{
   148  			metrics.LabelNamespace: ns,
   149  		})
   150  		metrics.GetServiceOnlineCountl().Delete(prometheus.Labels{
   151  			metrics.LabelNamespace: ns,
   152  		})
   153  		metrics.GetServiceAbnormalCountl().Delete(prometheus.Labels{
   154  			metrics.LabelNamespace: ns,
   155  		})
   156  	}
   157  
   158  	for ns, services := range remove {
   159  		for service := range services {
   160  			metrics.GetInstanceCount().Delete(prometheus.Labels{
   161  				metrics.LabelNamespace: ns,
   162  				metrics.LabelService:   service,
   163  			})
   164  			metrics.GetInstanceAbnormalCountl().Delete(prometheus.Labels{
   165  				metrics.LabelNamespace: ns,
   166  				metrics.LabelService:   service,
   167  			})
   168  			metrics.GetInstanceIsolateCountl().Delete(prometheus.Labels{
   169  				metrics.LabelNamespace: ns,
   170  				metrics.LabelService:   service,
   171  			})
   172  			metrics.GetInstanceOnlineCountl().Delete(prometheus.Labels{
   173  				metrics.LabelNamespace: ns,
   174  				metrics.LabelService:   service,
   175  			})
   176  		}
   177  	}
   178  
   179  }