github.com/galamsiva2020/kubernetes-heapster-monitoring@v0.0.0-20210823134957-3c1baa7c1e70/metrics/sources/summary/summary.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package summary
    16  
    17  import (
    18  	"fmt"
    19  	"net/url"
    20  	"time"
    21  
    22  	. "k8s.io/heapster/metrics/core"
    23  	"k8s.io/heapster/metrics/sources/kubelet"
    24  
    25  	"github.com/golang/glog"
    26  	"github.com/prometheus/client_golang/prometheus"
    27  	kube_api "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/labels"
    29  	kube_client "k8s.io/client-go/kubernetes"
    30  	v1listers "k8s.io/client-go/listers/core/v1"
    31  	"k8s.io/client-go/tools/cache"
    32  	"k8s.io/heapster/metrics/util"
    33  	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
    34  )
    35  
    36  var (
    37  	summaryRequestLatency = prometheus.NewSummaryVec(
    38  		prometheus.SummaryOpts{
    39  			Namespace: "heapster",
    40  			Subsystem: "kubelet_summary",
    41  			Name:      "request_duration_milliseconds",
    42  			Help:      "The Kubelet summary request latencies in milliseconds.",
    43  		},
    44  		[]string{"node"},
    45  	)
    46  )
    47  
    48  // Prefix used for the LabelResourceID for volume metrics.
    49  const VolumeResourcePrefix = "Volume:"
    50  
    51  func init() {
    52  	prometheus.MustRegister(summaryRequestLatency)
    53  }
    54  
    55  type NodeInfo struct {
    56  	kubelet.Host
    57  	NodeName       string
    58  	HostName       string
    59  	HostID         string
    60  	KubeletVersion string
    61  }
    62  
    63  // Kubelet-provided metrics for pod and system container.
    64  type summaryMetricsSource struct {
    65  	node          NodeInfo
    66  	kubeletClient *kubelet.KubeletClient
    67  }
    68  
    69  func NewSummaryMetricsSource(node NodeInfo, client *kubelet.KubeletClient) MetricsSource {
    70  	return &summaryMetricsSource{
    71  		node:          node,
    72  		kubeletClient: client,
    73  	}
    74  }
    75  
    76  func (this *summaryMetricsSource) Name() string {
    77  	return this.String()
    78  }
    79  
    80  func (this *summaryMetricsSource) String() string {
    81  	return fmt.Sprintf("kubelet_summary:%s:%d", this.node.IP, this.node.Port)
    82  }
    83  
    84  func (this *summaryMetricsSource) ScrapeMetrics(start, end time.Time) (*DataBatch, error) {
    85  	result := &DataBatch{
    86  		Timestamp:  time.Now(),
    87  		MetricSets: map[string]*MetricSet{},
    88  	}
    89  
    90  	summary, err := func() (*stats.Summary, error) {
    91  		startTime := time.Now()
    92  		defer func() {
    93  			summaryRequestLatency.WithLabelValues(this.node.HostName).Observe(float64(time.Since(startTime)) / float64(time.Millisecond))
    94  		}()
    95  		return this.kubeletClient.GetSummary(this.node.Host)
    96  	}()
    97  
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	result.MetricSets = this.decodeSummary(summary)
   103  
   104  	return result, err
   105  }
   106  
   107  const (
   108  	RootFsKey = "/"
   109  	LogsKey   = "logs"
   110  )
   111  
   112  // For backwards compatibility, map summary system names into original names.
   113  // TODO: Migrate to the new system names and remove this.
   114  var systemNameMap = map[string]string{
   115  	stats.SystemContainerRuntime: "docker-daemon",
   116  	stats.SystemContainerMisc:    "system",
   117  }
   118  
   119  // decodeSummary translates the kubelet statsSummary API into the flattened heapster MetricSet API.
   120  func (this *summaryMetricsSource) decodeSummary(summary *stats.Summary) map[string]*MetricSet {
   121  	glog.V(9).Infof("Begin summary decode")
   122  	result := map[string]*MetricSet{}
   123  
   124  	labels := map[string]string{
   125  		LabelNodename.Key: this.node.NodeName,
   126  		LabelHostname.Key: this.node.HostName,
   127  		LabelHostID.Key:   this.node.HostID,
   128  	}
   129  
   130  	this.decodeNodeStats(result, labels, &summary.Node)
   131  	for _, pod := range summary.Pods {
   132  		this.decodePodStats(result, labels, &pod)
   133  	}
   134  
   135  	glog.V(9).Infof("End summary decode")
   136  	return result
   137  }
   138  
   139  // Convenience method for labels deep copy.
   140  func (this *summaryMetricsSource) cloneLabels(labels map[string]string) map[string]string {
   141  	clone := make(map[string]string, len(labels))
   142  	for k, v := range labels {
   143  		clone[k] = v
   144  	}
   145  	return clone
   146  }
   147  
   148  func (this *summaryMetricsSource) decodeNodeStats(metrics map[string]*MetricSet, labels map[string]string, node *stats.NodeStats) {
   149  	glog.V(9).Infof("Decoding node stats for node %s...", node.NodeName)
   150  	nodeMetrics := &MetricSet{
   151  		Labels:              this.cloneLabels(labels),
   152  		MetricValues:        map[string]MetricValue{},
   153  		LabeledMetrics:      []LabeledMetric{},
   154  		CollectionStartTime: node.StartTime.Time,
   155  		ScrapeTime:          this.getScrapeTime(node.CPU, node.Memory, node.Network),
   156  	}
   157  	nodeMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeNode
   158  
   159  	this.decodeUptime(nodeMetrics, node.StartTime.Time)
   160  	this.decodeCPUStats(nodeMetrics, node.CPU)
   161  	this.decodeMemoryStats(nodeMetrics, node.Memory)
   162  	this.decodeNetworkStats(nodeMetrics, node.Network)
   163  	this.decodeFsStats(nodeMetrics, RootFsKey, node.Fs)
   164  	this.decodeEphemeralStorageStats(nodeMetrics, node.Fs)
   165  	metrics[NodeKey(node.NodeName)] = nodeMetrics
   166  
   167  	for _, container := range node.SystemContainers {
   168  		key := NodeContainerKey(node.NodeName, this.getSystemContainerName(&container))
   169  		containerMetrics := this.decodeContainerStats(labels, &container, true)
   170  		containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeSystemContainer
   171  		metrics[key] = containerMetrics
   172  	}
   173  }
   174  
   175  func (this *summaryMetricsSource) decodePodStats(metrics map[string]*MetricSet, nodeLabels map[string]string, pod *stats.PodStats) {
   176  	glog.V(9).Infof("Decoding pod stats for pod %s/%s (%s)...", pod.PodRef.Namespace, pod.PodRef.Name, pod.PodRef.UID)
   177  	podMetrics := &MetricSet{
   178  		Labels:              this.cloneLabels(nodeLabels),
   179  		MetricValues:        map[string]MetricValue{},
   180  		LabeledMetrics:      []LabeledMetric{},
   181  		CollectionStartTime: pod.StartTime.Time,
   182  		ScrapeTime:          this.getScrapeTime(nil, nil, pod.Network),
   183  	}
   184  	ref := pod.PodRef
   185  	podMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePod
   186  	podMetrics.Labels[LabelPodId.Key] = ref.UID
   187  	podMetrics.Labels[LabelPodName.Key] = ref.Name
   188  	podMetrics.Labels[LabelNamespaceName.Key] = ref.Namespace
   189  
   190  	this.decodeUptime(podMetrics, pod.StartTime.Time)
   191  	this.decodeNetworkStats(podMetrics, pod.Network)
   192  	this.decodeCPUStats(podMetrics, pod.CPU)
   193  	this.decodeMemoryStats(podMetrics, pod.Memory)
   194  	this.decodeEphemeralStorageStats(podMetrics, pod.EphemeralStorage)
   195  	for _, vol := range pod.VolumeStats {
   196  		this.decodeFsStats(podMetrics, VolumeResourcePrefix+vol.Name, &vol.FsStats)
   197  	}
   198  	metrics[PodKey(ref.Namespace, ref.Name)] = podMetrics
   199  
   200  	for _, container := range pod.Containers {
   201  		key := PodContainerKey(ref.Namespace, ref.Name, container.Name)
   202  		// This check ensures that we are not replacing metrics of running container with metrics of terminated one if
   203  		// there are two exactly same containers reported by kubelet.
   204  		if _, exist := metrics[key]; exist {
   205  			glog.V(2).Infof("Metrics reported from two containers with the same key: %v. Create time of "+
   206  				"containers are %v and %v. Metrics from the older container are going to be dropped.", key,
   207  				container.StartTime.Time, metrics[key].CollectionStartTime)
   208  			if container.StartTime.Time.Before(metrics[key].CollectionStartTime) {
   209  				continue
   210  			}
   211  		}
   212  		metrics[key] = this.decodeContainerStats(podMetrics.Labels, &container, false)
   213  	}
   214  }
   215  
   216  func (this *summaryMetricsSource) decodeContainerStats(podLabels map[string]string, container *stats.ContainerStats, isSystemContainer bool) *MetricSet {
   217  	glog.V(9).Infof("Decoding container stats stats for container %s...", container.Name)
   218  	containerMetrics := &MetricSet{
   219  		Labels:              this.cloneLabels(podLabels),
   220  		MetricValues:        map[string]MetricValue{},
   221  		LabeledMetrics:      []LabeledMetric{},
   222  		CollectionStartTime: container.StartTime.Time,
   223  		ScrapeTime:          this.getScrapeTime(container.CPU, container.Memory, nil),
   224  	}
   225  	containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePodContainer
   226  	if isSystemContainer {
   227  		containerMetrics.Labels[LabelContainerName.Key] = this.getSystemContainerName(container)
   228  	} else {
   229  		containerMetrics.Labels[LabelContainerName.Key] = container.Name
   230  	}
   231  
   232  	this.decodeUptime(containerMetrics, container.StartTime.Time)
   233  	this.decodeCPUStats(containerMetrics, container.CPU)
   234  	this.decodeMemoryStats(containerMetrics, container.Memory)
   235  	this.decodeAcceleratorStats(containerMetrics, container.Accelerators)
   236  	this.decodeFsStats(containerMetrics, RootFsKey, container.Rootfs)
   237  	this.decodeFsStats(containerMetrics, LogsKey, container.Logs)
   238  	this.decodeEphemeralStorageStatsForContainer(containerMetrics, container.Rootfs, container.Logs)
   239  	this.decodeUserDefinedMetrics(containerMetrics, container.UserDefinedMetrics)
   240  
   241  	return containerMetrics
   242  }
   243  
   244  func (this *summaryMetricsSource) decodeUptime(metrics *MetricSet, startTime time.Time) {
   245  	if startTime.IsZero() {
   246  		glog.V(9).Infof("missing start time!")
   247  		return
   248  	}
   249  
   250  	uptime := uint64(time.Since(startTime).Nanoseconds() / time.Millisecond.Nanoseconds())
   251  	this.addIntMetric(metrics, &MetricUptime, &uptime)
   252  }
   253  
   254  func (this *summaryMetricsSource) decodeCPUStats(metrics *MetricSet, cpu *stats.CPUStats) {
   255  	if cpu == nil {
   256  		glog.V(9).Infof("missing cpu usage metric!")
   257  		return
   258  	}
   259  
   260  	this.addIntMetric(metrics, &MetricCpuUsage, cpu.UsageCoreNanoSeconds)
   261  }
   262  
   263  func (this *summaryMetricsSource) decodeEphemeralStorageStats(metrics *MetricSet, storage *stats.FsStats) {
   264  	if storage == nil {
   265  		glog.V(9).Infof("missing storage usage metric!")
   266  		return
   267  	}
   268  	this.addIntMetric(metrics, &MetricEphemeralStorageUsage, storage.UsedBytes)
   269  }
   270  
   271  func (this *summaryMetricsSource) decodeEphemeralStorageStatsForContainer(metrics *MetricSet, rootfs *stats.FsStats, logs *stats.FsStats) {
   272  	if rootfs == nil || logs == nil {
   273  		glog.V(9).Infof("missing storage usage metric!")
   274  		return
   275  	}
   276  	usage := *rootfs.UsedBytes + *logs.UsedBytes
   277  	this.addIntMetric(metrics, &MetricEphemeralStorageUsage, &usage)
   278  }
   279  
   280  func (this *summaryMetricsSource) decodeMemoryStats(metrics *MetricSet, memory *stats.MemoryStats) {
   281  	if memory == nil {
   282  		glog.V(9).Infof("missing memory metrics!")
   283  		return
   284  	}
   285  
   286  	this.addIntMetric(metrics, &MetricMemoryUsage, memory.UsageBytes)
   287  	this.addIntMetric(metrics, &MetricMemoryWorkingSet, memory.WorkingSetBytes)
   288  	this.addIntMetric(metrics, &MetricMemoryRSS, memory.RSSBytes)
   289  	this.addIntMetric(metrics, &MetricMemoryPageFaults, memory.PageFaults)
   290  	this.addIntMetric(metrics, &MetricMemoryMajorPageFaults, memory.MajorPageFaults)
   291  }
   292  
   293  func (this *summaryMetricsSource) decodeAcceleratorStats(metrics *MetricSet, accelerators []stats.AcceleratorStats) {
   294  	for _, accelerator := range accelerators {
   295  		acceleratorLabels := map[string]string{
   296  			LabelAcceleratorMake.Key:  accelerator.Make,
   297  			LabelAcceleratorModel.Key: accelerator.Model,
   298  			LabelAcceleratorID.Key:    accelerator.ID,
   299  		}
   300  		this.addLabeledIntMetric(metrics, &MetricAcceleratorMemoryTotal, acceleratorLabels, &accelerator.MemoryTotal)
   301  		this.addLabeledIntMetric(metrics, &MetricAcceleratorMemoryUsed, acceleratorLabels, &accelerator.MemoryUsed)
   302  		this.addLabeledIntMetric(metrics, &MetricAcceleratorDutyCycle, acceleratorLabels, &accelerator.DutyCycle)
   303  	}
   304  }
   305  
   306  func (this *summaryMetricsSource) decodeNetworkStats(metrics *MetricSet, network *stats.NetworkStats) {
   307  	if network == nil {
   308  		glog.V(9).Infof("missing network metrics!")
   309  		return
   310  	}
   311  
   312  	this.addIntMetric(metrics, &MetricNetworkRx, network.RxBytes)
   313  	this.addIntMetric(metrics, &MetricNetworkRxErrors, network.RxErrors)
   314  	this.addIntMetric(metrics, &MetricNetworkTx, network.TxBytes)
   315  	this.addIntMetric(metrics, &MetricNetworkTxErrors, network.TxErrors)
   316  }
   317  
   318  func (this *summaryMetricsSource) decodeFsStats(metrics *MetricSet, fsKey string, fs *stats.FsStats) {
   319  	if fs == nil {
   320  		glog.V(9).Infof("missing fs metrics!")
   321  		return
   322  	}
   323  
   324  	fsLabels := map[string]string{LabelResourceID.Key: fsKey}
   325  	this.addLabeledIntMetric(metrics, &MetricFilesystemUsage, fsLabels, fs.UsedBytes)
   326  	this.addLabeledIntMetric(metrics, &MetricFilesystemLimit, fsLabels, fs.CapacityBytes)
   327  	this.addLabeledIntMetric(metrics, &MetricFilesystemAvailable, fsLabels, fs.AvailableBytes)
   328  	this.addLabeledIntMetric(metrics, &MetricFilesystemInodes, fsLabels, fs.Inodes)
   329  	this.addLabeledIntMetric(metrics, &MetricFilesystemInodesFree, fsLabels, fs.InodesFree)
   330  }
   331  
   332  func (this *summaryMetricsSource) decodeUserDefinedMetrics(metrics *MetricSet, udm []stats.UserDefinedMetric) {
   333  	for _, metric := range udm {
   334  		mv := MetricValue{}
   335  		switch metric.Type {
   336  		case stats.MetricGauge:
   337  			mv.MetricType = MetricGauge
   338  		case stats.MetricCumulative:
   339  			mv.MetricType = MetricCumulative
   340  		case stats.MetricDelta:
   341  			mv.MetricType = MetricDelta
   342  		default:
   343  			glog.V(4).Infof("Skipping %s: unknown custom metric type: %v", metric.Name, metric.Type)
   344  			continue
   345  		}
   346  
   347  		// TODO: Handle double-precision values.
   348  		mv.ValueType = ValueFloat
   349  		mv.FloatValue = metric.Value
   350  
   351  		metrics.MetricValues[CustomMetricPrefix+metric.Name] = mv
   352  	}
   353  }
   354  
   355  func (this *summaryMetricsSource) getScrapeTime(cpu *stats.CPUStats, memory *stats.MemoryStats, network *stats.NetworkStats) time.Time {
   356  	// Assume CPU, memory and network scrape times are the same.
   357  	switch {
   358  	case cpu != nil && !cpu.Time.IsZero():
   359  		return cpu.Time.Time
   360  	case memory != nil && !memory.Time.IsZero():
   361  		return memory.Time.Time
   362  	case network != nil && !network.Time.IsZero():
   363  		return network.Time.Time
   364  	default:
   365  		return time.Time{}
   366  	}
   367  }
   368  
   369  // addIntMetric is a convenience method for adding the metric and value to the metric set.
   370  func (this *summaryMetricsSource) addIntMetric(metrics *MetricSet, metric *Metric, value *uint64) {
   371  	if value == nil {
   372  		glog.V(9).Infof("skipping metric %s because the value was nil", metric.Name)
   373  		return
   374  	}
   375  	val := MetricValue{
   376  		ValueType:  ValueInt64,
   377  		MetricType: metric.Type,
   378  		IntValue:   int64(*value),
   379  	}
   380  	metrics.MetricValues[metric.Name] = val
   381  }
   382  
   383  // addLabeledIntMetric is a convenience method for adding the labeled metric and value to the metric set.
   384  func (this *summaryMetricsSource) addLabeledIntMetric(metrics *MetricSet, metric *Metric, labels map[string]string, value *uint64) {
   385  	if value == nil {
   386  		glog.V(9).Infof("skipping labeled metric %s (%v) because the value was nil", metric.Name, labels)
   387  		return
   388  	}
   389  
   390  	val := LabeledMetric{
   391  		Name:   metric.Name,
   392  		Labels: labels,
   393  		MetricValue: MetricValue{
   394  			ValueType:  ValueInt64,
   395  			MetricType: metric.Type,
   396  			IntValue:   int64(*value),
   397  		},
   398  	}
   399  	metrics.LabeledMetrics = append(metrics.LabeledMetrics, val)
   400  }
   401  
   402  // Translate system container names to the legacy names for backwards compatibility.
   403  func (this *summaryMetricsSource) getSystemContainerName(c *stats.ContainerStats) string {
   404  	if legacyName, ok := systemNameMap[c.Name]; ok {
   405  		return legacyName
   406  	}
   407  	return c.Name
   408  }
   409  
   410  // TODO: The summaryProvider duplicates a lot of code from kubeletProvider, and should be refactored.
   411  type summaryProvider struct {
   412  	nodeLister       v1listers.NodeLister
   413  	reflector        *cache.Reflector
   414  	kubeletClient    *kubelet.KubeletClient
   415  	hostIDAnnotation string
   416  }
   417  
   418  func (this *summaryProvider) GetMetricsSources() []MetricsSource {
   419  	sources := []MetricsSource{}
   420  	nodes, err := this.nodeLister.List(labels.Everything())
   421  	if err != nil {
   422  		glog.Errorf("error while listing nodes: %v", err)
   423  		return sources
   424  	}
   425  
   426  	for _, node := range nodes {
   427  		info, err := this.getNodeInfo(node)
   428  		if err != nil {
   429  			glog.Errorf("%v", err)
   430  			continue
   431  		}
   432  		sources = append(sources, NewSummaryMetricsSource(info, this.kubeletClient))
   433  	}
   434  	return sources
   435  }
   436  
   437  func (this *summaryProvider) getNodeInfo(node *kube_api.Node) (NodeInfo, error) {
   438  	hostname, ip, err := kubelet.GetNodeHostnameAndIP(node)
   439  	if err != nil {
   440  		return NodeInfo{}, err
   441  	}
   442  
   443  	if hostname == "" {
   444  		hostname = node.Name
   445  	}
   446  	hostID := node.Spec.ExternalID
   447  	if hostID == "" && this.hostIDAnnotation != "" {
   448  		hostID = node.Annotations[this.hostIDAnnotation]
   449  	}
   450  	info := NodeInfo{
   451  		NodeName: node.Name,
   452  		HostName: hostname,
   453  		HostID:   hostID,
   454  		Host: kubelet.Host{
   455  			IP:   ip,
   456  			Port: this.kubeletClient.GetPort(),
   457  		},
   458  		KubeletVersion: node.Status.NodeInfo.KubeletVersion,
   459  	}
   460  	return info, nil
   461  }
   462  
   463  func NewSummaryProvider(uri *url.URL) (MetricsSourceProvider, error) {
   464  	opts := uri.Query()
   465  
   466  	hostIDAnnotation := ""
   467  	if len(opts["host_id_annotation"]) > 0 {
   468  		hostIDAnnotation = opts["host_id_annotation"][0]
   469  	}
   470  	// create clients
   471  	kubeConfig, kubeletConfig, err := kubelet.GetKubeConfigs(uri)
   472  	if err != nil {
   473  		return nil, err
   474  	}
   475  	kubeClient := kube_client.NewForConfigOrDie(kubeConfig)
   476  	kubeletClient, err := kubelet.NewKubeletClient(kubeletConfig)
   477  	if err != nil {
   478  		return nil, err
   479  	}
   480  	// watch nodes
   481  	nodeLister, reflector, _ := util.GetNodeLister(kubeClient)
   482  
   483  	return &summaryProvider{
   484  		nodeLister:       nodeLister,
   485  		reflector:        reflector,
   486  		kubeletClient:    kubeletClient,
   487  		hostIDAnnotation: hostIDAnnotation,
   488  	}, nil
   489  }