github.com/jonaz/heapster@v1.3.0-beta.0.0.20170208112634-cd3c15ca3d29/metrics/sources/summary/summary.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package summary
    16  
    17  import (
    18  	"fmt"
    19  	"net/url"
    20  	"time"
    21  
    22  	. "k8s.io/heapster/metrics/core"
    23  	"k8s.io/heapster/metrics/sources/kubelet"
    24  
    25  	"github.com/golang/glog"
    26  	"github.com/prometheus/client_golang/prometheus"
    27  	"k8s.io/heapster/metrics/util"
    28  	kube_api "k8s.io/kubernetes/pkg/api"
    29  	"k8s.io/kubernetes/pkg/client/cache"
    30  	kube_client "k8s.io/kubernetes/pkg/client/unversioned"
    31  	"k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
    32  	"k8s.io/kubernetes/pkg/version"
    33  )
    34  
    35  var (
    36  	summaryRequestLatency = prometheus.NewSummaryVec(
    37  		prometheus.SummaryOpts{
    38  			Namespace: "heapster",
    39  			Subsystem: "kubelet_summary",
    40  			Name:      "request_duration_microseconds",
    41  			Help:      "The Kubelet summary request latencies in microseconds.",
    42  		},
    43  		[]string{"node"},
    44  	)
    45  )
    46  
    47  // Prefix used for the LabelResourceID for volume metrics.
    48  const VolumeResourcePrefix = "Volume:"
    49  
    50  // Earliest kubelet version that serves the summary API.
    51  var minSummaryKubeletVersion = version.MustParse("v1.2.0-alpha.8")
    52  
    53  func init() {
    54  	prometheus.MustRegister(summaryRequestLatency)
    55  }
    56  
    57  type NodeInfo struct {
    58  	kubelet.Host
    59  	NodeName       string
    60  	HostName       string
    61  	HostID         string
    62  	KubeletVersion string
    63  }
    64  
    65  // Kubelet-provided metrics for pod and system container.
    66  type summaryMetricsSource struct {
    67  	node          NodeInfo
    68  	kubeletClient *kubelet.KubeletClient
    69  
    70  	// Whether this node requires the fall-back source.
    71  	useFallback bool
    72  	fallback    MetricsSource
    73  }
    74  
    75  func NewSummaryMetricsSource(node NodeInfo, client *kubelet.KubeletClient, fallback MetricsSource) MetricsSource {
    76  	return &summaryMetricsSource{
    77  		node:          node,
    78  		kubeletClient: client,
    79  		useFallback:   !summarySupported(node.KubeletVersion),
    80  		fallback:      fallback,
    81  	}
    82  }
    83  
    84  func (this *summaryMetricsSource) Name() string {
    85  	return this.String()
    86  }
    87  
    88  func (this *summaryMetricsSource) String() string {
    89  	return fmt.Sprintf("kubelet_summary:%s:%d", this.node.IP, this.node.Port)
    90  }
    91  
    92  func (this *summaryMetricsSource) ScrapeMetrics(start, end time.Time) *DataBatch {
    93  	if this.useFallback {
    94  		return this.fallback.ScrapeMetrics(start, end)
    95  	}
    96  
    97  	result := &DataBatch{
    98  		Timestamp:  time.Now(),
    99  		MetricSets: map[string]*MetricSet{},
   100  	}
   101  
   102  	summary, err := func() (*stats.Summary, error) {
   103  		startTime := time.Now()
   104  		defer summaryRequestLatency.WithLabelValues(this.node.HostName).Observe(float64(time.Since(startTime)))
   105  		return this.kubeletClient.GetSummary(this.node.Host)
   106  	}()
   107  
   108  	if err != nil {
   109  		if kubelet.IsNotFoundError(err) {
   110  			glog.Warningf("Summary not found, using fallback: %v", err)
   111  			this.useFallback = true
   112  			return this.fallback.ScrapeMetrics(start, end)
   113  		}
   114  		glog.Errorf("error while getting metrics summary from Kubelet %s(%s:%d): %v", this.node.NodeName, this.node.IP, this.node.Port, err)
   115  		return result
   116  	}
   117  
   118  	result.MetricSets = this.decodeSummary(summary)
   119  
   120  	return result
   121  }
   122  
   123  func summarySupported(kubeletVersion string) bool {
   124  	semver, err := version.Parse(kubeletVersion)
   125  	if err != nil {
   126  		glog.Errorf("Unable to parse kubelet version: %q", kubeletVersion)
   127  		return false
   128  	}
   129  	return semver.GE(minSummaryKubeletVersion)
   130  }
   131  
   132  const (
   133  	RootFsKey = "/"
   134  	LogsKey   = "logs"
   135  )
   136  
   137  // For backwards compatibility, map summary system names into original names.
   138  // TODO: Migrate to the new system names and remove this.
   139  var systemNameMap = map[string]string{
   140  	stats.SystemContainerRuntime: "docker-daemon",
   141  	stats.SystemContainerMisc:    "system",
   142  }
   143  
   144  // decodeSummary translates the kubelet stats.Summary API into the flattened heapster MetricSet API.
   145  func (this *summaryMetricsSource) decodeSummary(summary *stats.Summary) map[string]*MetricSet {
   146  	result := map[string]*MetricSet{}
   147  
   148  	labels := map[string]string{
   149  		LabelNodename.Key: this.node.NodeName,
   150  		LabelHostname.Key: this.node.HostName,
   151  		LabelHostID.Key:   this.node.HostID,
   152  	}
   153  
   154  	this.decodeNodeStats(result, labels, &summary.Node)
   155  	for _, pod := range summary.Pods {
   156  		this.decodePodStats(result, labels, &pod)
   157  	}
   158  
   159  	return result
   160  }
   161  
   162  // Convenience method for labels deep copy.
   163  func (this *summaryMetricsSource) cloneLabels(labels map[string]string) map[string]string {
   164  	clone := make(map[string]string, len(labels))
   165  	for k, v := range labels {
   166  		clone[k] = v
   167  	}
   168  	return clone
   169  }
   170  
   171  func (this *summaryMetricsSource) decodeNodeStats(metrics map[string]*MetricSet, labels map[string]string, node *stats.NodeStats) {
   172  	nodeMetrics := &MetricSet{
   173  		Labels:         this.cloneLabels(labels),
   174  		MetricValues:   map[string]MetricValue{},
   175  		LabeledMetrics: []LabeledMetric{},
   176  		CreateTime:     node.StartTime.Time,
   177  		ScrapeTime:     this.getScrapeTime(node.CPU, node.Memory, node.Network),
   178  	}
   179  	nodeMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeNode
   180  
   181  	this.decodeUptime(nodeMetrics, node.StartTime.Time)
   182  	this.decodeCPUStats(nodeMetrics, node.CPU)
   183  	this.decodeMemoryStats(nodeMetrics, node.Memory)
   184  	this.decodeNetworkStats(nodeMetrics, node.Network)
   185  	this.decodeFsStats(nodeMetrics, RootFsKey, node.Fs)
   186  	metrics[NodeKey(node.NodeName)] = nodeMetrics
   187  
   188  	for _, container := range node.SystemContainers {
   189  		key := NodeContainerKey(node.NodeName, this.getContainerName(&container))
   190  		containerMetrics := this.decodeContainerStats(labels, &container)
   191  		containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeSystemContainer
   192  		metrics[key] = containerMetrics
   193  	}
   194  }
   195  
   196  func (this *summaryMetricsSource) decodePodStats(metrics map[string]*MetricSet, nodeLabels map[string]string, pod *stats.PodStats) {
   197  	podMetrics := &MetricSet{
   198  		Labels:         this.cloneLabels(nodeLabels),
   199  		MetricValues:   map[string]MetricValue{},
   200  		LabeledMetrics: []LabeledMetric{},
   201  		CreateTime:     pod.StartTime.Time,
   202  		ScrapeTime:     this.getScrapeTime(nil, nil, pod.Network),
   203  	}
   204  	ref := pod.PodRef
   205  	podMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePod
   206  	podMetrics.Labels[LabelPodId.Key] = ref.UID
   207  	podMetrics.Labels[LabelPodName.Key] = ref.Name
   208  	podMetrics.Labels[LabelNamespaceName.Key] = ref.Namespace
   209  	// Needed for backward compatibility
   210  	podMetrics.Labels[LabelPodNamespace.Key] = ref.Namespace
   211  
   212  	this.decodeUptime(podMetrics, pod.StartTime.Time)
   213  	this.decodeNetworkStats(podMetrics, pod.Network)
   214  	for _, vol := range pod.VolumeStats {
   215  		this.decodeFsStats(podMetrics, VolumeResourcePrefix+vol.Name, &vol.FsStats)
   216  	}
   217  	metrics[PodKey(ref.Namespace, ref.Name)] = podMetrics
   218  
   219  	for _, container := range pod.Containers {
   220  		key := PodContainerKey(ref.Namespace, ref.Name, container.Name)
   221  		metrics[key] = this.decodeContainerStats(podMetrics.Labels, &container)
   222  	}
   223  }
   224  
   225  func (this *summaryMetricsSource) decodeContainerStats(podLabels map[string]string, container *stats.ContainerStats) *MetricSet {
   226  	containerMetrics := &MetricSet{
   227  		Labels:         this.cloneLabels(podLabels),
   228  		MetricValues:   map[string]MetricValue{},
   229  		LabeledMetrics: []LabeledMetric{},
   230  		CreateTime:     container.StartTime.Time,
   231  		ScrapeTime:     this.getScrapeTime(container.CPU, container.Memory, nil),
   232  	}
   233  	containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePodContainer
   234  	containerMetrics.Labels[LabelContainerName.Key] = this.getContainerName(container)
   235  
   236  	this.decodeUptime(containerMetrics, container.StartTime.Time)
   237  	this.decodeCPUStats(containerMetrics, container.CPU)
   238  	this.decodeMemoryStats(containerMetrics, container.Memory)
   239  	this.decodeFsStats(containerMetrics, RootFsKey, container.Rootfs)
   240  	this.decodeFsStats(containerMetrics, LogsKey, container.Logs)
   241  	this.decodeUserDefinedMetrics(containerMetrics, container.UserDefinedMetrics)
   242  
   243  	return containerMetrics
   244  }
   245  
   246  func (this *summaryMetricsSource) decodeUptime(metrics *MetricSet, startTime time.Time) {
   247  	if startTime.IsZero() {
   248  		return
   249  	}
   250  
   251  	uptime := uint64(time.Since(startTime).Nanoseconds() / time.Millisecond.Nanoseconds())
   252  	this.addIntMetric(metrics, &MetricUptime, &uptime)
   253  }
   254  
   255  func (this *summaryMetricsSource) decodeCPUStats(metrics *MetricSet, cpu *stats.CPUStats) {
   256  	if cpu == nil {
   257  		return
   258  	}
   259  
   260  	this.addIntMetric(metrics, &MetricCpuUsage, cpu.UsageCoreNanoSeconds)
   261  }
   262  
   263  func (this *summaryMetricsSource) decodeMemoryStats(metrics *MetricSet, memory *stats.MemoryStats) {
   264  	if memory == nil {
   265  		return
   266  	}
   267  
   268  	this.addIntMetric(metrics, &MetricMemoryUsage, memory.UsageBytes)
   269  	this.addIntMetric(metrics, &MetricMemoryWorkingSet, memory.WorkingSetBytes)
   270  	this.addIntMetric(metrics, &MetricMemoryPageFaults, memory.PageFaults)
   271  	this.addIntMetric(metrics, &MetricMemoryMajorPageFaults, memory.MajorPageFaults)
   272  }
   273  
   274  func (this *summaryMetricsSource) decodeNetworkStats(metrics *MetricSet, network *stats.NetworkStats) {
   275  	if network == nil {
   276  		return
   277  	}
   278  
   279  	this.addIntMetric(metrics, &MetricNetworkRx, network.RxBytes)
   280  	this.addIntMetric(metrics, &MetricNetworkRxErrors, network.RxErrors)
   281  	this.addIntMetric(metrics, &MetricNetworkTx, network.TxBytes)
   282  	this.addIntMetric(metrics, &MetricNetworkTxErrors, network.TxErrors)
   283  }
   284  
   285  func (this *summaryMetricsSource) decodeFsStats(metrics *MetricSet, fsKey string, fs *stats.FsStats) {
   286  	if fs == nil {
   287  		return
   288  	}
   289  
   290  	fsLabels := map[string]string{LabelResourceID.Key: fsKey}
   291  	this.addLabeledIntMetric(metrics, &MetricFilesystemUsage, fsLabels, fs.UsedBytes)
   292  	this.addLabeledIntMetric(metrics, &MetricFilesystemLimit, fsLabels, fs.CapacityBytes)
   293  	this.addLabeledIntMetric(metrics, &MetricFilesystemAvailable, fsLabels, fs.AvailableBytes)
   294  }
   295  
   296  func (this *summaryMetricsSource) decodeUserDefinedMetrics(metrics *MetricSet, udm []stats.UserDefinedMetric) {
   297  	for _, metric := range udm {
   298  		mv := MetricValue{}
   299  		switch metric.Type {
   300  		case stats.MetricGauge:
   301  			mv.MetricType = MetricGauge
   302  		case stats.MetricCumulative:
   303  			mv.MetricType = MetricCumulative
   304  		case stats.MetricDelta:
   305  			mv.MetricType = MetricDelta
   306  		default:
   307  			glog.V(4).Infof("Skipping %s: unknown custom metric type: %v", metric.Name, metric.Type)
   308  			continue
   309  		}
   310  
   311  		// TODO: Handle double-precision values.
   312  		mv.ValueType = ValueFloat
   313  		mv.FloatValue = float32(metric.Value)
   314  
   315  		metrics.MetricValues[CustomMetricPrefix+metric.Name] = mv
   316  	}
   317  }
   318  
   319  func (this *summaryMetricsSource) getScrapeTime(cpu *stats.CPUStats, memory *stats.MemoryStats, network *stats.NetworkStats) time.Time {
   320  	// Assume CPU, memory and network scrape times are the same.
   321  	switch {
   322  	case cpu != nil && !cpu.Time.IsZero():
   323  		return cpu.Time.Time
   324  	case memory != nil && !memory.Time.IsZero():
   325  		return memory.Time.Time
   326  	case network != nil && !network.Time.IsZero():
   327  		return network.Time.Time
   328  	default:
   329  		return time.Time{}
   330  	}
   331  }
   332  
   333  // addIntMetric is a convenience method for adding the metric and value to the metric set.
   334  func (this *summaryMetricsSource) addIntMetric(metrics *MetricSet, metric *Metric, value *uint64) {
   335  	if value == nil {
   336  		return
   337  	}
   338  	val := MetricValue{
   339  		ValueType:  ValueInt64,
   340  		MetricType: metric.Type,
   341  		IntValue:   int64(*value),
   342  	}
   343  	metrics.MetricValues[metric.Name] = val
   344  }
   345  
   346  // addLabeledIntMetric is a convenience method for adding the labeled metric and value to the metric set.
   347  func (this *summaryMetricsSource) addLabeledIntMetric(metrics *MetricSet, metric *Metric, labels map[string]string, value *uint64) {
   348  	if value == nil {
   349  		return
   350  	}
   351  
   352  	val := LabeledMetric{
   353  		Name:   metric.Name,
   354  		Labels: labels,
   355  		MetricValue: MetricValue{
   356  			ValueType:  ValueInt64,
   357  			MetricType: metric.Type,
   358  			IntValue:   int64(*value),
   359  		},
   360  	}
   361  	metrics.LabeledMetrics = append(metrics.LabeledMetrics, val)
   362  }
   363  
   364  // Translate system container names to the legacy names for backwards compatibility.
   365  func (this *summaryMetricsSource) getContainerName(c *stats.ContainerStats) string {
   366  	if legacyName, ok := systemNameMap[c.Name]; ok {
   367  		return legacyName
   368  	}
   369  	return c.Name
   370  }
   371  
   372  // TODO: The summaryProvider duplicates a lot of code from kubeletProvider, and should be refactored.
   373  type summaryProvider struct {
   374  	nodeLister    *cache.StoreToNodeLister
   375  	reflector     *cache.Reflector
   376  	kubeletClient *kubelet.KubeletClient
   377  }
   378  
   379  func (this *summaryProvider) GetMetricsSources() []MetricsSource {
   380  	sources := []MetricsSource{}
   381  	nodes, err := this.nodeLister.List()
   382  	if err != nil {
   383  		glog.Errorf("error while listing nodes: %v", err)
   384  		return sources
   385  	}
   386  
   387  	for _, node := range nodes.Items {
   388  		info, err := this.getNodeInfo(&node)
   389  		if err != nil {
   390  			glog.Errorf("%v", err)
   391  			continue
   392  		}
   393  		fallback := kubelet.NewKubeletMetricsSource(
   394  			info.Host,
   395  			this.kubeletClient,
   396  			info.NodeName,
   397  			info.HostName,
   398  			info.HostID,
   399  		)
   400  		sources = append(sources, NewSummaryMetricsSource(info, this.kubeletClient, fallback))
   401  	}
   402  	return sources
   403  }
   404  
   405  func (this *summaryProvider) getNodeInfo(node *kube_api.Node) (NodeInfo, error) {
   406  	for _, c := range node.Status.Conditions {
   407  		if c.Type == kube_api.NodeReady && c.Status != kube_api.ConditionTrue {
   408  			return NodeInfo{}, fmt.Errorf("Node %v is not ready", node.Name)
   409  		}
   410  	}
   411  	info := NodeInfo{
   412  		NodeName: node.Name,
   413  		HostName: node.Name,
   414  		HostID:   node.Spec.ExternalID,
   415  		Host: kubelet.Host{
   416  			Port: this.kubeletClient.GetPort(),
   417  		},
   418  		KubeletVersion: node.Status.NodeInfo.KubeletVersion,
   419  	}
   420  
   421  	for _, addr := range node.Status.Addresses {
   422  		if addr.Type == kube_api.NodeHostName && addr.Address != "" {
   423  			info.HostName = addr.Address
   424  		}
   425  		if addr.Type == kube_api.NodeInternalIP && addr.Address != "" {
   426  			info.IP = addr.Address
   427  		}
   428  		if addr.Type == kube_api.NodeLegacyHostIP && addr.Address != "" && info.IP == "" {
   429  			info.IP = addr.Address
   430  		}
   431  	}
   432  
   433  	if info.IP == "" {
   434  		return info, fmt.Errorf("Node %v has no valid hostname and/or IP address: %v %v", node.Name, info.HostName, info.IP)
   435  	}
   436  
   437  	return info, nil
   438  }
   439  
   440  func NewSummaryProvider(uri *url.URL) (MetricsSourceProvider, error) {
   441  	// create clients
   442  	kubeConfig, kubeletConfig, err := kubelet.GetKubeConfigs(uri)
   443  	if err != nil {
   444  		return nil, err
   445  	}
   446  	kubeClient := kube_client.NewOrDie(kubeConfig)
   447  	kubeletClient, err := kubelet.NewKubeletClient(kubeletConfig)
   448  	if err != nil {
   449  		return nil, err
   450  	}
   451  	// watch nodes
   452  	nodeLister, reflector, _ := util.GetNodeLister(kubeClient)
   453  
   454  	return &summaryProvider{
   455  		nodeLister:    nodeLister,
   456  		reflector:     reflector,
   457  		kubeletClient: kubeletClient,
   458  	}, nil
   459  }