github.com/jonaz/heapster@v1.3.0-beta.0.0.20170208112634-cd3c15ca3d29/metrics/sources/summary/summary.go (about) 1 // Copyright 2015 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package summary 16 17 import ( 18 "fmt" 19 "net/url" 20 "time" 21 22 . "k8s.io/heapster/metrics/core" 23 "k8s.io/heapster/metrics/sources/kubelet" 24 25 "github.com/golang/glog" 26 "github.com/prometheus/client_golang/prometheus" 27 "k8s.io/heapster/metrics/util" 28 kube_api "k8s.io/kubernetes/pkg/api" 29 "k8s.io/kubernetes/pkg/client/cache" 30 kube_client "k8s.io/kubernetes/pkg/client/unversioned" 31 "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats" 32 "k8s.io/kubernetes/pkg/version" 33 ) 34 35 var ( 36 summaryRequestLatency = prometheus.NewSummaryVec( 37 prometheus.SummaryOpts{ 38 Namespace: "heapster", 39 Subsystem: "kubelet_summary", 40 Name: "request_duration_microseconds", 41 Help: "The Kubelet summary request latencies in microseconds.", 42 }, 43 []string{"node"}, 44 ) 45 ) 46 47 // Prefix used for the LabelResourceID for volume metrics. 48 const VolumeResourcePrefix = "Volume:" 49 50 // Earliest kubelet version that serves the summary API. 51 var minSummaryKubeletVersion = version.MustParse("v1.2.0-alpha.8") 52 53 func init() { 54 prometheus.MustRegister(summaryRequestLatency) 55 } 56 57 type NodeInfo struct { 58 kubelet.Host 59 NodeName string 60 HostName string 61 HostID string 62 KubeletVersion string 63 } 64 65 // Kubelet-provided metrics for pod and system container. 66 type summaryMetricsSource struct { 67 node NodeInfo 68 kubeletClient *kubelet.KubeletClient 69 70 // Whether this node requires the fall-back source. 71 useFallback bool 72 fallback MetricsSource 73 } 74 75 func NewSummaryMetricsSource(node NodeInfo, client *kubelet.KubeletClient, fallback MetricsSource) MetricsSource { 76 return &summaryMetricsSource{ 77 node: node, 78 kubeletClient: client, 79 useFallback: !summarySupported(node.KubeletVersion), 80 fallback: fallback, 81 } 82 } 83 84 func (this *summaryMetricsSource) Name() string { 85 return this.String() 86 } 87 88 func (this *summaryMetricsSource) String() string { 89 return fmt.Sprintf("kubelet_summary:%s:%d", this.node.IP, this.node.Port) 90 } 91 92 func (this *summaryMetricsSource) ScrapeMetrics(start, end time.Time) *DataBatch { 93 if this.useFallback { 94 return this.fallback.ScrapeMetrics(start, end) 95 } 96 97 result := &DataBatch{ 98 Timestamp: time.Now(), 99 MetricSets: map[string]*MetricSet{}, 100 } 101 102 summary, err := func() (*stats.Summary, error) { 103 startTime := time.Now() 104 defer summaryRequestLatency.WithLabelValues(this.node.HostName).Observe(float64(time.Since(startTime))) 105 return this.kubeletClient.GetSummary(this.node.Host) 106 }() 107 108 if err != nil { 109 if kubelet.IsNotFoundError(err) { 110 glog.Warningf("Summary not found, using fallback: %v", err) 111 this.useFallback = true 112 return this.fallback.ScrapeMetrics(start, end) 113 } 114 glog.Errorf("error while getting metrics summary from Kubelet %s(%s:%d): %v", this.node.NodeName, this.node.IP, this.node.Port, err) 115 return result 116 } 117 118 result.MetricSets = this.decodeSummary(summary) 119 120 return result 121 } 122 123 func summarySupported(kubeletVersion string) bool { 124 semver, err := version.Parse(kubeletVersion) 125 if err != nil { 126 glog.Errorf("Unable to parse kubelet version: %q", kubeletVersion) 127 return false 128 } 129 return semver.GE(minSummaryKubeletVersion) 130 } 131 132 const ( 133 RootFsKey = "/" 134 LogsKey = "logs" 135 ) 136 137 // For backwards compatibility, map summary system names into original names. 138 // TODO: Migrate to the new system names and remove this. 139 var systemNameMap = map[string]string{ 140 stats.SystemContainerRuntime: "docker-daemon", 141 stats.SystemContainerMisc: "system", 142 } 143 144 // decodeSummary translates the kubelet stats.Summary API into the flattened heapster MetricSet API. 145 func (this *summaryMetricsSource) decodeSummary(summary *stats.Summary) map[string]*MetricSet { 146 result := map[string]*MetricSet{} 147 148 labels := map[string]string{ 149 LabelNodename.Key: this.node.NodeName, 150 LabelHostname.Key: this.node.HostName, 151 LabelHostID.Key: this.node.HostID, 152 } 153 154 this.decodeNodeStats(result, labels, &summary.Node) 155 for _, pod := range summary.Pods { 156 this.decodePodStats(result, labels, &pod) 157 } 158 159 return result 160 } 161 162 // Convenience method for labels deep copy. 163 func (this *summaryMetricsSource) cloneLabels(labels map[string]string) map[string]string { 164 clone := make(map[string]string, len(labels)) 165 for k, v := range labels { 166 clone[k] = v 167 } 168 return clone 169 } 170 171 func (this *summaryMetricsSource) decodeNodeStats(metrics map[string]*MetricSet, labels map[string]string, node *stats.NodeStats) { 172 nodeMetrics := &MetricSet{ 173 Labels: this.cloneLabels(labels), 174 MetricValues: map[string]MetricValue{}, 175 LabeledMetrics: []LabeledMetric{}, 176 CreateTime: node.StartTime.Time, 177 ScrapeTime: this.getScrapeTime(node.CPU, node.Memory, node.Network), 178 } 179 nodeMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeNode 180 181 this.decodeUptime(nodeMetrics, node.StartTime.Time) 182 this.decodeCPUStats(nodeMetrics, node.CPU) 183 this.decodeMemoryStats(nodeMetrics, node.Memory) 184 this.decodeNetworkStats(nodeMetrics, node.Network) 185 this.decodeFsStats(nodeMetrics, RootFsKey, node.Fs) 186 metrics[NodeKey(node.NodeName)] = nodeMetrics 187 188 for _, container := range node.SystemContainers { 189 key := NodeContainerKey(node.NodeName, this.getContainerName(&container)) 190 containerMetrics := this.decodeContainerStats(labels, &container) 191 containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeSystemContainer 192 metrics[key] = containerMetrics 193 } 194 } 195 196 func (this *summaryMetricsSource) decodePodStats(metrics map[string]*MetricSet, nodeLabels map[string]string, pod *stats.PodStats) { 197 podMetrics := &MetricSet{ 198 Labels: this.cloneLabels(nodeLabels), 199 MetricValues: map[string]MetricValue{}, 200 LabeledMetrics: []LabeledMetric{}, 201 CreateTime: pod.StartTime.Time, 202 ScrapeTime: this.getScrapeTime(nil, nil, pod.Network), 203 } 204 ref := pod.PodRef 205 podMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePod 206 podMetrics.Labels[LabelPodId.Key] = ref.UID 207 podMetrics.Labels[LabelPodName.Key] = ref.Name 208 podMetrics.Labels[LabelNamespaceName.Key] = ref.Namespace 209 // Needed for backward compatibility 210 podMetrics.Labels[LabelPodNamespace.Key] = ref.Namespace 211 212 this.decodeUptime(podMetrics, pod.StartTime.Time) 213 this.decodeNetworkStats(podMetrics, pod.Network) 214 for _, vol := range pod.VolumeStats { 215 this.decodeFsStats(podMetrics, VolumeResourcePrefix+vol.Name, &vol.FsStats) 216 } 217 metrics[PodKey(ref.Namespace, ref.Name)] = podMetrics 218 219 for _, container := range pod.Containers { 220 key := PodContainerKey(ref.Namespace, ref.Name, container.Name) 221 metrics[key] = this.decodeContainerStats(podMetrics.Labels, &container) 222 } 223 } 224 225 func (this *summaryMetricsSource) decodeContainerStats(podLabels map[string]string, container *stats.ContainerStats) *MetricSet { 226 containerMetrics := &MetricSet{ 227 Labels: this.cloneLabels(podLabels), 228 MetricValues: map[string]MetricValue{}, 229 LabeledMetrics: []LabeledMetric{}, 230 CreateTime: container.StartTime.Time, 231 ScrapeTime: this.getScrapeTime(container.CPU, container.Memory, nil), 232 } 233 containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePodContainer 234 containerMetrics.Labels[LabelContainerName.Key] = this.getContainerName(container) 235 236 this.decodeUptime(containerMetrics, container.StartTime.Time) 237 this.decodeCPUStats(containerMetrics, container.CPU) 238 this.decodeMemoryStats(containerMetrics, container.Memory) 239 this.decodeFsStats(containerMetrics, RootFsKey, container.Rootfs) 240 this.decodeFsStats(containerMetrics, LogsKey, container.Logs) 241 this.decodeUserDefinedMetrics(containerMetrics, container.UserDefinedMetrics) 242 243 return containerMetrics 244 } 245 246 func (this *summaryMetricsSource) decodeUptime(metrics *MetricSet, startTime time.Time) { 247 if startTime.IsZero() { 248 return 249 } 250 251 uptime := uint64(time.Since(startTime).Nanoseconds() / time.Millisecond.Nanoseconds()) 252 this.addIntMetric(metrics, &MetricUptime, &uptime) 253 } 254 255 func (this *summaryMetricsSource) decodeCPUStats(metrics *MetricSet, cpu *stats.CPUStats) { 256 if cpu == nil { 257 return 258 } 259 260 this.addIntMetric(metrics, &MetricCpuUsage, cpu.UsageCoreNanoSeconds) 261 } 262 263 func (this *summaryMetricsSource) decodeMemoryStats(metrics *MetricSet, memory *stats.MemoryStats) { 264 if memory == nil { 265 return 266 } 267 268 this.addIntMetric(metrics, &MetricMemoryUsage, memory.UsageBytes) 269 this.addIntMetric(metrics, &MetricMemoryWorkingSet, memory.WorkingSetBytes) 270 this.addIntMetric(metrics, &MetricMemoryPageFaults, memory.PageFaults) 271 this.addIntMetric(metrics, &MetricMemoryMajorPageFaults, memory.MajorPageFaults) 272 } 273 274 func (this *summaryMetricsSource) decodeNetworkStats(metrics *MetricSet, network *stats.NetworkStats) { 275 if network == nil { 276 return 277 } 278 279 this.addIntMetric(metrics, &MetricNetworkRx, network.RxBytes) 280 this.addIntMetric(metrics, &MetricNetworkRxErrors, network.RxErrors) 281 this.addIntMetric(metrics, &MetricNetworkTx, network.TxBytes) 282 this.addIntMetric(metrics, &MetricNetworkTxErrors, network.TxErrors) 283 } 284 285 func (this *summaryMetricsSource) decodeFsStats(metrics *MetricSet, fsKey string, fs *stats.FsStats) { 286 if fs == nil { 287 return 288 } 289 290 fsLabels := map[string]string{LabelResourceID.Key: fsKey} 291 this.addLabeledIntMetric(metrics, &MetricFilesystemUsage, fsLabels, fs.UsedBytes) 292 this.addLabeledIntMetric(metrics, &MetricFilesystemLimit, fsLabels, fs.CapacityBytes) 293 this.addLabeledIntMetric(metrics, &MetricFilesystemAvailable, fsLabels, fs.AvailableBytes) 294 } 295 296 func (this *summaryMetricsSource) decodeUserDefinedMetrics(metrics *MetricSet, udm []stats.UserDefinedMetric) { 297 for _, metric := range udm { 298 mv := MetricValue{} 299 switch metric.Type { 300 case stats.MetricGauge: 301 mv.MetricType = MetricGauge 302 case stats.MetricCumulative: 303 mv.MetricType = MetricCumulative 304 case stats.MetricDelta: 305 mv.MetricType = MetricDelta 306 default: 307 glog.V(4).Infof("Skipping %s: unknown custom metric type: %v", metric.Name, metric.Type) 308 continue 309 } 310 311 // TODO: Handle double-precision values. 312 mv.ValueType = ValueFloat 313 mv.FloatValue = float32(metric.Value) 314 315 metrics.MetricValues[CustomMetricPrefix+metric.Name] = mv 316 } 317 } 318 319 func (this *summaryMetricsSource) getScrapeTime(cpu *stats.CPUStats, memory *stats.MemoryStats, network *stats.NetworkStats) time.Time { 320 // Assume CPU, memory and network scrape times are the same. 321 switch { 322 case cpu != nil && !cpu.Time.IsZero(): 323 return cpu.Time.Time 324 case memory != nil && !memory.Time.IsZero(): 325 return memory.Time.Time 326 case network != nil && !network.Time.IsZero(): 327 return network.Time.Time 328 default: 329 return time.Time{} 330 } 331 } 332 333 // addIntMetric is a convenience method for adding the metric and value to the metric set. 334 func (this *summaryMetricsSource) addIntMetric(metrics *MetricSet, metric *Metric, value *uint64) { 335 if value == nil { 336 return 337 } 338 val := MetricValue{ 339 ValueType: ValueInt64, 340 MetricType: metric.Type, 341 IntValue: int64(*value), 342 } 343 metrics.MetricValues[metric.Name] = val 344 } 345 346 // addLabeledIntMetric is a convenience method for adding the labeled metric and value to the metric set. 347 func (this *summaryMetricsSource) addLabeledIntMetric(metrics *MetricSet, metric *Metric, labels map[string]string, value *uint64) { 348 if value == nil { 349 return 350 } 351 352 val := LabeledMetric{ 353 Name: metric.Name, 354 Labels: labels, 355 MetricValue: MetricValue{ 356 ValueType: ValueInt64, 357 MetricType: metric.Type, 358 IntValue: int64(*value), 359 }, 360 } 361 metrics.LabeledMetrics = append(metrics.LabeledMetrics, val) 362 } 363 364 // Translate system container names to the legacy names for backwards compatibility. 365 func (this *summaryMetricsSource) getContainerName(c *stats.ContainerStats) string { 366 if legacyName, ok := systemNameMap[c.Name]; ok { 367 return legacyName 368 } 369 return c.Name 370 } 371 372 // TODO: The summaryProvider duplicates a lot of code from kubeletProvider, and should be refactored. 373 type summaryProvider struct { 374 nodeLister *cache.StoreToNodeLister 375 reflector *cache.Reflector 376 kubeletClient *kubelet.KubeletClient 377 } 378 379 func (this *summaryProvider) GetMetricsSources() []MetricsSource { 380 sources := []MetricsSource{} 381 nodes, err := this.nodeLister.List() 382 if err != nil { 383 glog.Errorf("error while listing nodes: %v", err) 384 return sources 385 } 386 387 for _, node := range nodes.Items { 388 info, err := this.getNodeInfo(&node) 389 if err != nil { 390 glog.Errorf("%v", err) 391 continue 392 } 393 fallback := kubelet.NewKubeletMetricsSource( 394 info.Host, 395 this.kubeletClient, 396 info.NodeName, 397 info.HostName, 398 info.HostID, 399 ) 400 sources = append(sources, NewSummaryMetricsSource(info, this.kubeletClient, fallback)) 401 } 402 return sources 403 } 404 405 func (this *summaryProvider) getNodeInfo(node *kube_api.Node) (NodeInfo, error) { 406 for _, c := range node.Status.Conditions { 407 if c.Type == kube_api.NodeReady && c.Status != kube_api.ConditionTrue { 408 return NodeInfo{}, fmt.Errorf("Node %v is not ready", node.Name) 409 } 410 } 411 info := NodeInfo{ 412 NodeName: node.Name, 413 HostName: node.Name, 414 HostID: node.Spec.ExternalID, 415 Host: kubelet.Host{ 416 Port: this.kubeletClient.GetPort(), 417 }, 418 KubeletVersion: node.Status.NodeInfo.KubeletVersion, 419 } 420 421 for _, addr := range node.Status.Addresses { 422 if addr.Type == kube_api.NodeHostName && addr.Address != "" { 423 info.HostName = addr.Address 424 } 425 if addr.Type == kube_api.NodeInternalIP && addr.Address != "" { 426 info.IP = addr.Address 427 } 428 if addr.Type == kube_api.NodeLegacyHostIP && addr.Address != "" && info.IP == "" { 429 info.IP = addr.Address 430 } 431 } 432 433 if info.IP == "" { 434 return info, fmt.Errorf("Node %v has no valid hostname and/or IP address: %v %v", node.Name, info.HostName, info.IP) 435 } 436 437 return info, nil 438 } 439 440 func NewSummaryProvider(uri *url.URL) (MetricsSourceProvider, error) { 441 // create clients 442 kubeConfig, kubeletConfig, err := kubelet.GetKubeConfigs(uri) 443 if err != nil { 444 return nil, err 445 } 446 kubeClient := kube_client.NewOrDie(kubeConfig) 447 kubeletClient, err := kubelet.NewKubeletClient(kubeletConfig) 448 if err != nil { 449 return nil, err 450 } 451 // watch nodes 452 nodeLister, reflector, _ := util.GetNodeLister(kubeClient) 453 454 return &summaryProvider{ 455 nodeLister: nodeLister, 456 reflector: reflector, 457 kubeletClient: kubeletClient, 458 }, nil 459 }