github.com/timstclair/heapster@v0.20.0-alpha1/metrics/sources/kubelet/kubelet.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kubelet
    16  
    17  import (
    18  	"fmt"
    19  	"net/url"
    20  	"strings"
    21  	"time"
    22  
    23  	. "k8s.io/heapster/metrics/core"
    24  
    25  	"github.com/golang/glog"
    26  	cadvisor "github.com/google/cadvisor/info/v1"
    27  	"github.com/prometheus/client_golang/prometheus"
    28  	kube_api "k8s.io/kubernetes/pkg/api"
    29  	"k8s.io/kubernetes/pkg/client/cache"
    30  	kube_client "k8s.io/kubernetes/pkg/client/unversioned"
    31  	"k8s.io/kubernetes/pkg/fields"
    32  )
    33  
    34  const (
    35  	infraContainerName = "POD"
    36  	// TODO: following constants are copied from k8s, change to use them directly
    37  	kubernetesPodNameLabel      = "io.kubernetes.pod.name"
    38  	kubernetesPodNamespaceLabel = "io.kubernetes.pod.namespace"
    39  	kubernetesPodUID            = "io.kubernetes.pod.uid"
    40  	kubernetesContainerLabel    = "io.kubernetes.container.name"
    41  
    42  	CustomMetricPrefix = "CM:"
    43  )
    44  
    45  var (
    46  	// The Kubelet request latencies in microseconds.
    47  	kubeletRequestLatency = prometheus.NewSummaryVec(
    48  		prometheus.SummaryOpts{
    49  			Namespace: "heapster",
    50  			Subsystem: "kubelet",
    51  			Name:      "request_duration_microseconds",
    52  			Help:      "The Kubelet request latencies in microseconds.",
    53  		},
    54  		[]string{"node"},
    55  	)
    56  )
    57  
    58  func init() {
    59  	prometheus.MustRegister(kubeletRequestLatency)
    60  }
    61  
    62  type cpuVal struct {
    63  	Val       int64
    64  	Timestamp time.Time
    65  }
    66  
    67  // Kubelet-provided metrics for pod and system container.
    68  type kubeletMetricsSource struct {
    69  	host          Host
    70  	kubeletClient *KubeletClient
    71  	nodename      string
    72  	hostname      string
    73  	hostId        string
    74  	cpuLastVal    map[string]cpuVal
    75  }
    76  
    77  func (this *kubeletMetricsSource) Name() string {
    78  	return this.String()
    79  }
    80  
    81  func (this *kubeletMetricsSource) String() string {
    82  	return fmt.Sprintf("kubelet:%s:%d", this.host.IP, this.host.Port)
    83  }
    84  
    85  func (this *kubeletMetricsSource) decodeMetrics(c *cadvisor.ContainerInfo) (string, *MetricSet) {
    86  	var metricSetKey string
    87  	cMetrics := &MetricSet{
    88  		MetricValues: map[string]MetricValue{},
    89  		Labels: map[string]string{
    90  			LabelNodename.Key: this.nodename,
    91  			LabelHostname.Key: this.hostname,
    92  			LabelHostID.Key:   this.hostId,
    93  		},
    94  		LabeledMetrics: []LabeledMetric{},
    95  	}
    96  
    97  	if isNode(c) {
    98  		metricSetKey = NodeKey(this.nodename)
    99  		cMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeNode
   100  	} else if isSysContainer(c) {
   101  		cName := getSysContainerName(c)
   102  		metricSetKey = NodeContainerKey(this.nodename, cName)
   103  		cMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeSystemContainer
   104  		cMetrics.Labels[LabelContainerName.Key] = cName
   105  	} else {
   106  		cName := c.Spec.Labels[kubernetesContainerLabel]
   107  		ns := c.Spec.Labels[kubernetesPodNamespaceLabel]
   108  		podName := c.Spec.Labels[kubernetesPodNameLabel]
   109  
   110  		// Support for kubernetes 1.0.*
   111  		if ns == "" && strings.Contains(podName, "/") {
   112  			tokens := strings.SplitN(podName, "/", 2)
   113  			if len(tokens) == 2 {
   114  				ns = tokens[0]
   115  				podName = tokens[1]
   116  			}
   117  		}
   118  		if cName == "" {
   119  			// Better this than nothing. This is a temporary hack for new heapster to work
   120  			// with Kubernetes 1.0.*.
   121  			// TODO: fix this with POD list.
   122  			// Parsing name like:
   123  			// k8s_kube-ui.7f9b83f6_kube-ui-v1-bxj1w_kube-system_9abfb0bd-811f-11e5-b548-42010af00002_e6841e8d
   124  			pos := strings.Index(c.Name, ".")
   125  			if pos >= 0 {
   126  				// remove first 4 chars.
   127  				cName = c.Name[len("k8s_"):pos]
   128  			}
   129  		}
   130  
   131  		if cName == "" || ns == "" || podName == "" {
   132  			glog.Errorf("Missing metadata for container %v. Got: %+v", c.Name, c.Spec.Labels)
   133  			return "", nil
   134  		}
   135  
   136  		if cName == infraContainerName {
   137  			metricSetKey = PodKey(ns, podName)
   138  			cMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePod
   139  		} else {
   140  			metricSetKey = PodContainerKey(ns, podName, cName)
   141  			cMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePodContainer
   142  			cMetrics.Labels[LabelContainerName.Key] = cName
   143  			cMetrics.Labels[LabelContainerBaseImage.Key] = c.Spec.Image
   144  		}
   145  		cMetrics.Labels[LabelPodId.Key] = c.Spec.Labels[kubernetesPodUID]
   146  		cMetrics.Labels[LabelPodName.Key] = podName
   147  		cMetrics.Labels[LabelNamespaceName.Key] = ns
   148  		// Needed for backward compatibility
   149  		cMetrics.Labels[LabelPodNamespace.Key] = ns
   150  	}
   151  
   152  	for _, metric := range StandardMetrics {
   153  		if metric.HasValue != nil && metric.HasValue(&c.Spec) {
   154  			cMetrics.MetricValues[metric.Name] = metric.GetValue(&c.Spec, c.Stats[0])
   155  		}
   156  	}
   157  
   158  	for _, metric := range LabeledMetrics {
   159  		if metric.HasLabeledMetric != nil && metric.HasLabeledMetric(&c.Spec) {
   160  			labeledMetrics := metric.GetLabeledMetric(&c.Spec, c.Stats[0])
   161  			cMetrics.LabeledMetrics = append(cMetrics.LabeledMetrics, labeledMetrics...)
   162  		}
   163  	}
   164  
   165  	if c.Spec.HasCustomMetrics {
   166  	metricloop:
   167  		for _, spec := range c.Spec.CustomMetrics {
   168  			if cmValue, ok := c.Stats[0].CustomMetrics[spec.Name]; ok && cmValue != nil && len(cmValue) >= 1 {
   169  				newest := cmValue[0]
   170  				for _, metricVal := range cmValue {
   171  					if newest.Timestamp.Before(metricVal.Timestamp) {
   172  						newest = metricVal
   173  					}
   174  				}
   175  				mv := MetricValue{}
   176  				switch spec.Type {
   177  				case cadvisor.MetricGauge:
   178  					mv.MetricType = MetricGauge
   179  				case cadvisor.MetricCumulative:
   180  					mv.MetricType = MetricCumulative
   181  				default:
   182  					glog.V(4).Infof("Skipping %s: unknown custom metric type: %v", spec.Name, spec.Type)
   183  					continue metricloop
   184  				}
   185  
   186  				switch spec.Format {
   187  				case cadvisor.IntType:
   188  					mv.ValueType = ValueInt64
   189  					mv.IntValue = newest.IntValue
   190  				case cadvisor.FloatType:
   191  					mv.ValueType = ValueFloat
   192  					mv.FloatValue = float32(newest.FloatValue)
   193  				default:
   194  					glog.V(4).Infof("Skipping %s: unknown custom metric format", spec.Name, spec.Format)
   195  					continue metricloop
   196  				}
   197  
   198  				cMetrics.MetricValues[CustomMetricPrefix+spec.Name] = mv
   199  			}
   200  		}
   201  	}
   202  
   203  	// This is temporary workaround to support cpu/usege_rate metric.
   204  	if currentVal, ok := cMetrics.MetricValues["cpu/usage"]; ok {
   205  		if lastVal, ok := this.cpuLastVal[metricSetKey]; ok {
   206  			// cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here).
   207  			rateVal := 1000 * (currentVal.IntValue - lastVal.Val) / (c.Stats[0].Timestamp.UnixNano() - lastVal.Timestamp.UnixNano())
   208  			cMetrics.MetricValues["cpu/usage_rate"] = MetricValue{
   209  				ValueType:  ValueInt64,
   210  				MetricType: MetricGauge,
   211  				IntValue:   rateVal,
   212  			}
   213  		}
   214  		this.cpuLastVal[metricSetKey] = cpuVal{
   215  			Val:       currentVal.IntValue,
   216  			Timestamp: c.Stats[0].Timestamp,
   217  		}
   218  	}
   219  	// TODO: add labels: LabelPodNamespaceUID, LabelLabels, LabelResourceID
   220  
   221  	return metricSetKey, cMetrics
   222  }
   223  
   224  func (this *kubeletMetricsSource) ScrapeMetrics(start, end time.Time) *DataBatch {
   225  	containers, err := this.scrapeKubelet(this.kubeletClient, this.host, start, end)
   226  	if err != nil {
   227  		glog.Errorf("error while getting containers from Kubelet: %v", err)
   228  	}
   229  	glog.Infof("successfully obtained stats for %v containers", len(containers))
   230  
   231  	result := &DataBatch{
   232  		Timestamp:  end,
   233  		MetricSets: map[string]*MetricSet{},
   234  	}
   235  	keys := make(map[string]bool)
   236  	for _, c := range containers {
   237  		name, metrics := this.decodeMetrics(&c)
   238  		if name == "" {
   239  			continue
   240  		}
   241  		result.MetricSets[name] = metrics
   242  		keys[name] = true
   243  	}
   244  	// No remember data for pods that have been removed.
   245  	for key := range this.cpuLastVal {
   246  		if _, ok := keys[key]; !ok {
   247  			delete(this.cpuLastVal, key)
   248  		}
   249  	}
   250  	return result
   251  }
   252  
   253  func (this *kubeletMetricsSource) scrapeKubelet(client *KubeletClient, host Host, start, end time.Time) ([]cadvisor.ContainerInfo, error) {
   254  	startTime := time.Now()
   255  	defer kubeletRequestLatency.WithLabelValues(this.hostname).Observe(float64(time.Since(startTime)))
   256  	return client.GetAllRawContainers(host, start, end)
   257  }
   258  
   259  type kubeletProvider struct {
   260  	nodeLister    *cache.StoreToNodeLister
   261  	reflector     *cache.Reflector
   262  	kubeletClient *KubeletClient
   263  	cpuLastVals   map[string]map[string]cpuVal
   264  }
   265  
   266  func (this *kubeletProvider) GetMetricsSources() []MetricsSource {
   267  	sources := []MetricsSource{}
   268  	nodes, err := this.nodeLister.List()
   269  	if err != nil {
   270  		glog.Errorf("error while listing nodes: %v", err)
   271  		return sources
   272  	}
   273  
   274  	nodeNames := make(map[string]bool)
   275  	for _, node := range nodes.Items {
   276  		nodeNames[node.Name] = true
   277  		hostname, ip, err := getNodeHostnameAndIP(&node)
   278  		if err != nil {
   279  			glog.Errorf("%v", err)
   280  			continue
   281  		}
   282  		if _, ok := this.cpuLastVals[node.Name]; !ok {
   283  			this.cpuLastVals[node.Name] = make(map[string]cpuVal)
   284  		}
   285  		sources = append(sources, &kubeletMetricsSource{
   286  			host:          Host{IP: ip, Port: this.kubeletClient.GetPort()},
   287  			kubeletClient: this.kubeletClient,
   288  			nodename:      node.Name,
   289  			hostname:      hostname,
   290  			hostId:        node.Spec.ExternalID,
   291  			cpuLastVal:    this.cpuLastVals[node.Name],
   292  		})
   293  	}
   294  
   295  	for key := range this.cpuLastVals {
   296  		if _, ok := nodeNames[key]; !ok {
   297  			delete(this.cpuLastVals, key)
   298  		}
   299  	}
   300  
   301  	return sources
   302  }
   303  
   304  func getNodeHostnameAndIP(node *kube_api.Node) (string, string, error) {
   305  	for _, c := range node.Status.Conditions {
   306  		if c.Type == kube_api.NodeReady && c.Status != kube_api.ConditionTrue {
   307  			return "", "", fmt.Errorf("Node %v is not ready", node.Name)
   308  		}
   309  	}
   310  	hostname, ip := node.Name, ""
   311  	for _, addr := range node.Status.Addresses {
   312  		if addr.Type == kube_api.NodeHostName && addr.Address != "" {
   313  			hostname = addr.Address
   314  		}
   315  		if addr.Type == kube_api.NodeInternalIP && addr.Address != "" {
   316  			ip = addr.Address
   317  		}
   318  	}
   319  	if ip != "" {
   320  		return hostname, ip, nil
   321  	}
   322  	return "", "", fmt.Errorf("Node %v has no valid hostname and/or IP address: %v %v", node.Name, hostname, ip)
   323  }
   324  
   325  func NewKubeletProvider(uri *url.URL) (MetricsSourceProvider, error) {
   326  	// create clients
   327  	kubeConfig, kubeletConfig, err := getKubeConfigs(uri)
   328  	if err != nil {
   329  		return nil, err
   330  	}
   331  	kubeClient := kube_client.NewOrDie(kubeConfig)
   332  	kubeletClient, err := NewKubeletClient(kubeletConfig)
   333  	if err != nil {
   334  		return nil, err
   335  	}
   336  	// watch nodes
   337  	lw := cache.NewListWatchFromClient(kubeClient, "nodes", kube_api.NamespaceAll, fields.Everything())
   338  	nodeLister := &cache.StoreToNodeLister{Store: cache.NewStore(cache.MetaNamespaceKeyFunc)}
   339  	reflector := cache.NewReflector(lw, &kube_api.Node{}, nodeLister.Store, time.Hour)
   340  	reflector.Run()
   341  
   342  	return &kubeletProvider{
   343  		nodeLister:    nodeLister,
   344  		reflector:     reflector,
   345  		kubeletClient: kubeletClient,
   346  		cpuLastVals:   make(map[string]map[string]cpuVal),
   347  	}, nil
   348  }