github.com/galamsiva2020/kubernetes-heapster-monitoring@v0.0.0-20210823134957-3c1baa7c1e70/metrics/sources/summary/summary.go (about) 1 // Copyright 2015 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package summary 16 17 import ( 18 "fmt" 19 "net/url" 20 "time" 21 22 . "k8s.io/heapster/metrics/core" 23 "k8s.io/heapster/metrics/sources/kubelet" 24 25 "github.com/golang/glog" 26 "github.com/prometheus/client_golang/prometheus" 27 kube_api "k8s.io/api/core/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 kube_client "k8s.io/client-go/kubernetes" 30 v1listers "k8s.io/client-go/listers/core/v1" 31 "k8s.io/client-go/tools/cache" 32 "k8s.io/heapster/metrics/util" 33 stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" 34 ) 35 36 var ( 37 summaryRequestLatency = prometheus.NewSummaryVec( 38 prometheus.SummaryOpts{ 39 Namespace: "heapster", 40 Subsystem: "kubelet_summary", 41 Name: "request_duration_milliseconds", 42 Help: "The Kubelet summary request latencies in milliseconds.", 43 }, 44 []string{"node"}, 45 ) 46 ) 47 48 // Prefix used for the LabelResourceID for volume metrics. 49 const VolumeResourcePrefix = "Volume:" 50 51 func init() { 52 prometheus.MustRegister(summaryRequestLatency) 53 } 54 55 type NodeInfo struct { 56 kubelet.Host 57 NodeName string 58 HostName string 59 HostID string 60 KubeletVersion string 61 } 62 63 // Kubelet-provided metrics for pod and system container. 64 type summaryMetricsSource struct { 65 node NodeInfo 66 kubeletClient *kubelet.KubeletClient 67 } 68 69 func NewSummaryMetricsSource(node NodeInfo, client *kubelet.KubeletClient) MetricsSource { 70 return &summaryMetricsSource{ 71 node: node, 72 kubeletClient: client, 73 } 74 } 75 76 func (this *summaryMetricsSource) Name() string { 77 return this.String() 78 } 79 80 func (this *summaryMetricsSource) String() string { 81 return fmt.Sprintf("kubelet_summary:%s:%d", this.node.IP, this.node.Port) 82 } 83 84 func (this *summaryMetricsSource) ScrapeMetrics(start, end time.Time) (*DataBatch, error) { 85 result := &DataBatch{ 86 Timestamp: time.Now(), 87 MetricSets: map[string]*MetricSet{}, 88 } 89 90 summary, err := func() (*stats.Summary, error) { 91 startTime := time.Now() 92 defer func() { 93 summaryRequestLatency.WithLabelValues(this.node.HostName).Observe(float64(time.Since(startTime)) / float64(time.Millisecond)) 94 }() 95 return this.kubeletClient.GetSummary(this.node.Host) 96 }() 97 98 if err != nil { 99 return nil, err 100 } 101 102 result.MetricSets = this.decodeSummary(summary) 103 104 return result, err 105 } 106 107 const ( 108 RootFsKey = "/" 109 LogsKey = "logs" 110 ) 111 112 // For backwards compatibility, map summary system names into original names. 113 // TODO: Migrate to the new system names and remove this. 114 var systemNameMap = map[string]string{ 115 stats.SystemContainerRuntime: "docker-daemon", 116 stats.SystemContainerMisc: "system", 117 } 118 119 // decodeSummary translates the kubelet statsSummary API into the flattened heapster MetricSet API. 120 func (this *summaryMetricsSource) decodeSummary(summary *stats.Summary) map[string]*MetricSet { 121 glog.V(9).Infof("Begin summary decode") 122 result := map[string]*MetricSet{} 123 124 labels := map[string]string{ 125 LabelNodename.Key: this.node.NodeName, 126 LabelHostname.Key: this.node.HostName, 127 LabelHostID.Key: this.node.HostID, 128 } 129 130 this.decodeNodeStats(result, labels, &summary.Node) 131 for _, pod := range summary.Pods { 132 this.decodePodStats(result, labels, &pod) 133 } 134 135 glog.V(9).Infof("End summary decode") 136 return result 137 } 138 139 // Convenience method for labels deep copy. 140 func (this *summaryMetricsSource) cloneLabels(labels map[string]string) map[string]string { 141 clone := make(map[string]string, len(labels)) 142 for k, v := range labels { 143 clone[k] = v 144 } 145 return clone 146 } 147 148 func (this *summaryMetricsSource) decodeNodeStats(metrics map[string]*MetricSet, labels map[string]string, node *stats.NodeStats) { 149 glog.V(9).Infof("Decoding node stats for node %s...", node.NodeName) 150 nodeMetrics := &MetricSet{ 151 Labels: this.cloneLabels(labels), 152 MetricValues: map[string]MetricValue{}, 153 LabeledMetrics: []LabeledMetric{}, 154 CollectionStartTime: node.StartTime.Time, 155 ScrapeTime: this.getScrapeTime(node.CPU, node.Memory, node.Network), 156 } 157 nodeMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeNode 158 159 this.decodeUptime(nodeMetrics, node.StartTime.Time) 160 this.decodeCPUStats(nodeMetrics, node.CPU) 161 this.decodeMemoryStats(nodeMetrics, node.Memory) 162 this.decodeNetworkStats(nodeMetrics, node.Network) 163 this.decodeFsStats(nodeMetrics, RootFsKey, node.Fs) 164 this.decodeEphemeralStorageStats(nodeMetrics, node.Fs) 165 metrics[NodeKey(node.NodeName)] = nodeMetrics 166 167 for _, container := range node.SystemContainers { 168 key := NodeContainerKey(node.NodeName, this.getSystemContainerName(&container)) 169 containerMetrics := this.decodeContainerStats(labels, &container, true) 170 containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypeSystemContainer 171 metrics[key] = containerMetrics 172 } 173 } 174 175 func (this *summaryMetricsSource) decodePodStats(metrics map[string]*MetricSet, nodeLabels map[string]string, pod *stats.PodStats) { 176 glog.V(9).Infof("Decoding pod stats for pod %s/%s (%s)...", pod.PodRef.Namespace, pod.PodRef.Name, pod.PodRef.UID) 177 podMetrics := &MetricSet{ 178 Labels: this.cloneLabels(nodeLabels), 179 MetricValues: map[string]MetricValue{}, 180 LabeledMetrics: []LabeledMetric{}, 181 CollectionStartTime: pod.StartTime.Time, 182 ScrapeTime: this.getScrapeTime(nil, nil, pod.Network), 183 } 184 ref := pod.PodRef 185 podMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePod 186 podMetrics.Labels[LabelPodId.Key] = ref.UID 187 podMetrics.Labels[LabelPodName.Key] = ref.Name 188 podMetrics.Labels[LabelNamespaceName.Key] = ref.Namespace 189 190 this.decodeUptime(podMetrics, pod.StartTime.Time) 191 this.decodeNetworkStats(podMetrics, pod.Network) 192 this.decodeCPUStats(podMetrics, pod.CPU) 193 this.decodeMemoryStats(podMetrics, pod.Memory) 194 this.decodeEphemeralStorageStats(podMetrics, pod.EphemeralStorage) 195 for _, vol := range pod.VolumeStats { 196 this.decodeFsStats(podMetrics, VolumeResourcePrefix+vol.Name, &vol.FsStats) 197 } 198 metrics[PodKey(ref.Namespace, ref.Name)] = podMetrics 199 200 for _, container := range pod.Containers { 201 key := PodContainerKey(ref.Namespace, ref.Name, container.Name) 202 // This check ensures that we are not replacing metrics of running container with metrics of terminated one if 203 // there are two exactly same containers reported by kubelet. 204 if _, exist := metrics[key]; exist { 205 glog.V(2).Infof("Metrics reported from two containers with the same key: %v. Create time of "+ 206 "containers are %v and %v. Metrics from the older container are going to be dropped.", key, 207 container.StartTime.Time, metrics[key].CollectionStartTime) 208 if container.StartTime.Time.Before(metrics[key].CollectionStartTime) { 209 continue 210 } 211 } 212 metrics[key] = this.decodeContainerStats(podMetrics.Labels, &container, false) 213 } 214 } 215 216 func (this *summaryMetricsSource) decodeContainerStats(podLabels map[string]string, container *stats.ContainerStats, isSystemContainer bool) *MetricSet { 217 glog.V(9).Infof("Decoding container stats stats for container %s...", container.Name) 218 containerMetrics := &MetricSet{ 219 Labels: this.cloneLabels(podLabels), 220 MetricValues: map[string]MetricValue{}, 221 LabeledMetrics: []LabeledMetric{}, 222 CollectionStartTime: container.StartTime.Time, 223 ScrapeTime: this.getScrapeTime(container.CPU, container.Memory, nil), 224 } 225 containerMetrics.Labels[LabelMetricSetType.Key] = MetricSetTypePodContainer 226 if isSystemContainer { 227 containerMetrics.Labels[LabelContainerName.Key] = this.getSystemContainerName(container) 228 } else { 229 containerMetrics.Labels[LabelContainerName.Key] = container.Name 230 } 231 232 this.decodeUptime(containerMetrics, container.StartTime.Time) 233 this.decodeCPUStats(containerMetrics, container.CPU) 234 this.decodeMemoryStats(containerMetrics, container.Memory) 235 this.decodeAcceleratorStats(containerMetrics, container.Accelerators) 236 this.decodeFsStats(containerMetrics, RootFsKey, container.Rootfs) 237 this.decodeFsStats(containerMetrics, LogsKey, container.Logs) 238 this.decodeEphemeralStorageStatsForContainer(containerMetrics, container.Rootfs, container.Logs) 239 this.decodeUserDefinedMetrics(containerMetrics, container.UserDefinedMetrics) 240 241 return containerMetrics 242 } 243 244 func (this *summaryMetricsSource) decodeUptime(metrics *MetricSet, startTime time.Time) { 245 if startTime.IsZero() { 246 glog.V(9).Infof("missing start time!") 247 return 248 } 249 250 uptime := uint64(time.Since(startTime).Nanoseconds() / time.Millisecond.Nanoseconds()) 251 this.addIntMetric(metrics, &MetricUptime, &uptime) 252 } 253 254 func (this *summaryMetricsSource) decodeCPUStats(metrics *MetricSet, cpu *stats.CPUStats) { 255 if cpu == nil { 256 glog.V(9).Infof("missing cpu usage metric!") 257 return 258 } 259 260 this.addIntMetric(metrics, &MetricCpuUsage, cpu.UsageCoreNanoSeconds) 261 } 262 263 func (this *summaryMetricsSource) decodeEphemeralStorageStats(metrics *MetricSet, storage *stats.FsStats) { 264 if storage == nil { 265 glog.V(9).Infof("missing storage usage metric!") 266 return 267 } 268 this.addIntMetric(metrics, &MetricEphemeralStorageUsage, storage.UsedBytes) 269 } 270 271 func (this *summaryMetricsSource) decodeEphemeralStorageStatsForContainer(metrics *MetricSet, rootfs *stats.FsStats, logs *stats.FsStats) { 272 if rootfs == nil || logs == nil { 273 glog.V(9).Infof("missing storage usage metric!") 274 return 275 } 276 usage := *rootfs.UsedBytes + *logs.UsedBytes 277 this.addIntMetric(metrics, &MetricEphemeralStorageUsage, &usage) 278 } 279 280 func (this *summaryMetricsSource) decodeMemoryStats(metrics *MetricSet, memory *stats.MemoryStats) { 281 if memory == nil { 282 glog.V(9).Infof("missing memory metrics!") 283 return 284 } 285 286 this.addIntMetric(metrics, &MetricMemoryUsage, memory.UsageBytes) 287 this.addIntMetric(metrics, &MetricMemoryWorkingSet, memory.WorkingSetBytes) 288 this.addIntMetric(metrics, &MetricMemoryRSS, memory.RSSBytes) 289 this.addIntMetric(metrics, &MetricMemoryPageFaults, memory.PageFaults) 290 this.addIntMetric(metrics, &MetricMemoryMajorPageFaults, memory.MajorPageFaults) 291 } 292 293 func (this *summaryMetricsSource) decodeAcceleratorStats(metrics *MetricSet, accelerators []stats.AcceleratorStats) { 294 for _, accelerator := range accelerators { 295 acceleratorLabels := map[string]string{ 296 LabelAcceleratorMake.Key: accelerator.Make, 297 LabelAcceleratorModel.Key: accelerator.Model, 298 LabelAcceleratorID.Key: accelerator.ID, 299 } 300 this.addLabeledIntMetric(metrics, &MetricAcceleratorMemoryTotal, acceleratorLabels, &accelerator.MemoryTotal) 301 this.addLabeledIntMetric(metrics, &MetricAcceleratorMemoryUsed, acceleratorLabels, &accelerator.MemoryUsed) 302 this.addLabeledIntMetric(metrics, &MetricAcceleratorDutyCycle, acceleratorLabels, &accelerator.DutyCycle) 303 } 304 } 305 306 func (this *summaryMetricsSource) decodeNetworkStats(metrics *MetricSet, network *stats.NetworkStats) { 307 if network == nil { 308 glog.V(9).Infof("missing network metrics!") 309 return 310 } 311 312 this.addIntMetric(metrics, &MetricNetworkRx, network.RxBytes) 313 this.addIntMetric(metrics, &MetricNetworkRxErrors, network.RxErrors) 314 this.addIntMetric(metrics, &MetricNetworkTx, network.TxBytes) 315 this.addIntMetric(metrics, &MetricNetworkTxErrors, network.TxErrors) 316 } 317 318 func (this *summaryMetricsSource) decodeFsStats(metrics *MetricSet, fsKey string, fs *stats.FsStats) { 319 if fs == nil { 320 glog.V(9).Infof("missing fs metrics!") 321 return 322 } 323 324 fsLabels := map[string]string{LabelResourceID.Key: fsKey} 325 this.addLabeledIntMetric(metrics, &MetricFilesystemUsage, fsLabels, fs.UsedBytes) 326 this.addLabeledIntMetric(metrics, &MetricFilesystemLimit, fsLabels, fs.CapacityBytes) 327 this.addLabeledIntMetric(metrics, &MetricFilesystemAvailable, fsLabels, fs.AvailableBytes) 328 this.addLabeledIntMetric(metrics, &MetricFilesystemInodes, fsLabels, fs.Inodes) 329 this.addLabeledIntMetric(metrics, &MetricFilesystemInodesFree, fsLabels, fs.InodesFree) 330 } 331 332 func (this *summaryMetricsSource) decodeUserDefinedMetrics(metrics *MetricSet, udm []stats.UserDefinedMetric) { 333 for _, metric := range udm { 334 mv := MetricValue{} 335 switch metric.Type { 336 case stats.MetricGauge: 337 mv.MetricType = MetricGauge 338 case stats.MetricCumulative: 339 mv.MetricType = MetricCumulative 340 case stats.MetricDelta: 341 mv.MetricType = MetricDelta 342 default: 343 glog.V(4).Infof("Skipping %s: unknown custom metric type: %v", metric.Name, metric.Type) 344 continue 345 } 346 347 // TODO: Handle double-precision values. 348 mv.ValueType = ValueFloat 349 mv.FloatValue = metric.Value 350 351 metrics.MetricValues[CustomMetricPrefix+metric.Name] = mv 352 } 353 } 354 355 func (this *summaryMetricsSource) getScrapeTime(cpu *stats.CPUStats, memory *stats.MemoryStats, network *stats.NetworkStats) time.Time { 356 // Assume CPU, memory and network scrape times are the same. 357 switch { 358 case cpu != nil && !cpu.Time.IsZero(): 359 return cpu.Time.Time 360 case memory != nil && !memory.Time.IsZero(): 361 return memory.Time.Time 362 case network != nil && !network.Time.IsZero(): 363 return network.Time.Time 364 default: 365 return time.Time{} 366 } 367 } 368 369 // addIntMetric is a convenience method for adding the metric and value to the metric set. 370 func (this *summaryMetricsSource) addIntMetric(metrics *MetricSet, metric *Metric, value *uint64) { 371 if value == nil { 372 glog.V(9).Infof("skipping metric %s because the value was nil", metric.Name) 373 return 374 } 375 val := MetricValue{ 376 ValueType: ValueInt64, 377 MetricType: metric.Type, 378 IntValue: int64(*value), 379 } 380 metrics.MetricValues[metric.Name] = val 381 } 382 383 // addLabeledIntMetric is a convenience method for adding the labeled metric and value to the metric set. 384 func (this *summaryMetricsSource) addLabeledIntMetric(metrics *MetricSet, metric *Metric, labels map[string]string, value *uint64) { 385 if value == nil { 386 glog.V(9).Infof("skipping labeled metric %s (%v) because the value was nil", metric.Name, labels) 387 return 388 } 389 390 val := LabeledMetric{ 391 Name: metric.Name, 392 Labels: labels, 393 MetricValue: MetricValue{ 394 ValueType: ValueInt64, 395 MetricType: metric.Type, 396 IntValue: int64(*value), 397 }, 398 } 399 metrics.LabeledMetrics = append(metrics.LabeledMetrics, val) 400 } 401 402 // Translate system container names to the legacy names for backwards compatibility. 403 func (this *summaryMetricsSource) getSystemContainerName(c *stats.ContainerStats) string { 404 if legacyName, ok := systemNameMap[c.Name]; ok { 405 return legacyName 406 } 407 return c.Name 408 } 409 410 // TODO: The summaryProvider duplicates a lot of code from kubeletProvider, and should be refactored. 411 type summaryProvider struct { 412 nodeLister v1listers.NodeLister 413 reflector *cache.Reflector 414 kubeletClient *kubelet.KubeletClient 415 hostIDAnnotation string 416 } 417 418 func (this *summaryProvider) GetMetricsSources() []MetricsSource { 419 sources := []MetricsSource{} 420 nodes, err := this.nodeLister.List(labels.Everything()) 421 if err != nil { 422 glog.Errorf("error while listing nodes: %v", err) 423 return sources 424 } 425 426 for _, node := range nodes { 427 info, err := this.getNodeInfo(node) 428 if err != nil { 429 glog.Errorf("%v", err) 430 continue 431 } 432 sources = append(sources, NewSummaryMetricsSource(info, this.kubeletClient)) 433 } 434 return sources 435 } 436 437 func (this *summaryProvider) getNodeInfo(node *kube_api.Node) (NodeInfo, error) { 438 hostname, ip, err := kubelet.GetNodeHostnameAndIP(node) 439 if err != nil { 440 return NodeInfo{}, err 441 } 442 443 if hostname == "" { 444 hostname = node.Name 445 } 446 hostID := node.Spec.ExternalID 447 if hostID == "" && this.hostIDAnnotation != "" { 448 hostID = node.Annotations[this.hostIDAnnotation] 449 } 450 info := NodeInfo{ 451 NodeName: node.Name, 452 HostName: hostname, 453 HostID: hostID, 454 Host: kubelet.Host{ 455 IP: ip, 456 Port: this.kubeletClient.GetPort(), 457 }, 458 KubeletVersion: node.Status.NodeInfo.KubeletVersion, 459 } 460 return info, nil 461 } 462 463 func NewSummaryProvider(uri *url.URL) (MetricsSourceProvider, error) { 464 opts := uri.Query() 465 466 hostIDAnnotation := "" 467 if len(opts["host_id_annotation"]) > 0 { 468 hostIDAnnotation = opts["host_id_annotation"][0] 469 } 470 // create clients 471 kubeConfig, kubeletConfig, err := kubelet.GetKubeConfigs(uri) 472 if err != nil { 473 return nil, err 474 } 475 kubeClient := kube_client.NewForConfigOrDie(kubeConfig) 476 kubeletClient, err := kubelet.NewKubeletClient(kubeletConfig) 477 if err != nil { 478 return nil, err 479 } 480 // watch nodes 481 nodeLister, reflector, _ := util.GetNodeLister(kubeClient) 482 483 return &summaryProvider{ 484 nodeLister: nodeLister, 485 reflector: reflector, 486 kubeletClient: kubeletClient, 487 hostIDAnnotation: hostIDAnnotation, 488 }, nil 489 }