k8s.io/kubernetes@v1.29.3/test/e2e/framework/debug/resource_usage_gatherer.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package debug 18 19 import ( 20 "bufio" 21 "bytes" 22 "context" 23 "encoding/json" 24 "fmt" 25 "math" 26 "regexp" 27 "sort" 28 "strconv" 29 "strings" 30 "sync" 31 "text/tabwriter" 32 "time" 33 34 v1 "k8s.io/api/core/v1" 35 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 36 "k8s.io/apimachinery/pkg/fields" 37 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 38 clientset "k8s.io/client-go/kubernetes" 39 kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1" 40 41 "k8s.io/kubernetes/test/e2e/framework" 42 e2essh "k8s.io/kubernetes/test/e2e/framework/ssh" 43 ) 44 45 // ResourceConstraint is a struct to hold constraints. 46 type ResourceConstraint struct { 47 CPUConstraint float64 48 MemoryConstraint uint64 49 } 50 51 // SingleContainerSummary is a struct to hold single container summary. 52 type SingleContainerSummary struct { 53 Name string 54 CPU float64 55 Mem uint64 56 } 57 58 // ContainerResourceUsage is a structure for gathering container resource usage. 59 type ContainerResourceUsage struct { 60 Name string 61 Timestamp time.Time 62 CPUUsageInCores float64 63 MemoryUsageInBytes uint64 64 MemoryWorkingSetInBytes uint64 65 MemoryRSSInBytes uint64 66 // The interval used to calculate CPUUsageInCores. 67 CPUInterval time.Duration 68 } 69 70 // ResourceUsagePerContainer is map of ContainerResourceUsage 71 type ResourceUsagePerContainer map[string]*ContainerResourceUsage 72 73 // ResourceUsageSummary is a struct to hold resource usage summary. 74 // we can't have int here, as JSON does not accept integer keys. 75 type ResourceUsageSummary map[string][]SingleContainerSummary 76 77 // PrintHumanReadable prints resource usage summary in human readable. 78 func (s *ResourceUsageSummary) PrintHumanReadable() string { 79 buf := &bytes.Buffer{} 80 w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0) 81 for perc, summaries := range *s { 82 buf.WriteString(fmt.Sprintf("%v percentile:\n", perc)) 83 fmt.Fprintf(w, "container\tcpu(cores)\tmemory(MB)\n") 84 for _, summary := range summaries { 85 fmt.Fprintf(w, "%q\t%.3f\t%.2f\n", summary.Name, summary.CPU, float64(summary.Mem)/(1024*1024)) 86 } 87 w.Flush() 88 } 89 return buf.String() 90 } 91 92 // PrintJSON prints resource usage summary in JSON. 93 func (s *ResourceUsageSummary) PrintJSON() string { 94 return framework.PrettyPrintJSON(*s) 95 } 96 97 // SummaryKind returns string of ResourceUsageSummary 98 func (s *ResourceUsageSummary) SummaryKind() string { 99 return "ResourceUsageSummary" 100 } 101 102 type uint64arr []uint64 103 104 func (a uint64arr) Len() int { return len(a) } 105 func (a uint64arr) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 106 func (a uint64arr) Less(i, j int) bool { return a[i] < a[j] } 107 108 type usageDataPerContainer struct { 109 cpuData []float64 110 memUseData []uint64 111 memWorkSetData []uint64 112 } 113 114 func computePercentiles(timeSeries []ResourceUsagePerContainer, percentilesToCompute []int) map[int]ResourceUsagePerContainer { 115 if len(timeSeries) == 0 { 116 return make(map[int]ResourceUsagePerContainer) 117 } 118 dataMap := make(map[string]*usageDataPerContainer) 119 for i := range timeSeries { 120 for name, data := range timeSeries[i] { 121 if dataMap[name] == nil { 122 dataMap[name] = &usageDataPerContainer{ 123 cpuData: make([]float64, 0, len(timeSeries)), 124 memUseData: make([]uint64, 0, len(timeSeries)), 125 memWorkSetData: make([]uint64, 0, len(timeSeries)), 126 } 127 } 128 dataMap[name].cpuData = append(dataMap[name].cpuData, data.CPUUsageInCores) 129 dataMap[name].memUseData = append(dataMap[name].memUseData, data.MemoryUsageInBytes) 130 dataMap[name].memWorkSetData = append(dataMap[name].memWorkSetData, data.MemoryWorkingSetInBytes) 131 } 132 } 133 for _, v := range dataMap { 134 sort.Float64s(v.cpuData) 135 sort.Sort(uint64arr(v.memUseData)) 136 sort.Sort(uint64arr(v.memWorkSetData)) 137 } 138 139 result := make(map[int]ResourceUsagePerContainer) 140 for _, perc := range percentilesToCompute { 141 data := make(ResourceUsagePerContainer) 142 for k, v := range dataMap { 143 percentileIndex := int(math.Ceil(float64(len(v.cpuData)*perc)/100)) - 1 144 data[k] = &ContainerResourceUsage{ 145 Name: k, 146 CPUUsageInCores: v.cpuData[percentileIndex], 147 MemoryUsageInBytes: v.memUseData[percentileIndex], 148 MemoryWorkingSetInBytes: v.memWorkSetData[percentileIndex], 149 } 150 } 151 result[perc] = data 152 } 153 return result 154 } 155 156 func leftMergeData(left, right map[int]ResourceUsagePerContainer) map[int]ResourceUsagePerContainer { 157 result := make(map[int]ResourceUsagePerContainer) 158 for percentile, data := range left { 159 result[percentile] = data 160 if _, ok := right[percentile]; !ok { 161 continue 162 } 163 for k, v := range right[percentile] { 164 result[percentile][k] = v 165 } 166 } 167 return result 168 } 169 170 type resourceGatherWorker struct { 171 c clientset.Interface 172 nodeName string 173 wg *sync.WaitGroup 174 containerIDs []string 175 stopCh chan struct{} 176 dataSeries []ResourceUsagePerContainer 177 finished bool 178 inKubemark bool 179 resourceDataGatheringPeriod time.Duration 180 probeDuration time.Duration 181 printVerboseLogs bool 182 } 183 184 func (w *resourceGatherWorker) singleProbe(ctx context.Context) { 185 data := make(ResourceUsagePerContainer) 186 if w.inKubemark { 187 kubemarkData := getKubemarkMasterComponentsResourceUsage(ctx) 188 if kubemarkData == nil { 189 return 190 } 191 for k, v := range kubemarkData { 192 data[k] = &ContainerResourceUsage{ 193 Name: v.Name, 194 MemoryWorkingSetInBytes: v.MemoryWorkingSetInBytes, 195 CPUUsageInCores: v.CPUUsageInCores, 196 } 197 } 198 } else { 199 nodeUsage, err := getOneTimeResourceUsageOnNode(w.c, w.nodeName, w.probeDuration, func() []string { return w.containerIDs }) 200 if err != nil { 201 framework.Logf("Error while reading data from %v: %v", w.nodeName, err) 202 return 203 } 204 for k, v := range nodeUsage { 205 data[k] = v 206 if w.printVerboseLogs { 207 framework.Logf("Get container %v usage on node %v. CPUUsageInCores: %v, MemoryUsageInBytes: %v, MemoryWorkingSetInBytes: %v", k, w.nodeName, v.CPUUsageInCores, v.MemoryUsageInBytes, v.MemoryWorkingSetInBytes) 208 } 209 } 210 } 211 w.dataSeries = append(w.dataSeries, data) 212 } 213 214 // getOneTimeResourceUsageOnNode queries the node's /stats/summary endpoint 215 // and returns the resource usage of all containerNames for the past 216 // cpuInterval. 217 // The acceptable range of the interval is 2s~120s. Be warned that as the 218 // interval (and #containers) increases, the size of kubelet's response 219 // could be significant. E.g., the 60s interval stats for ~20 containers is 220 // ~1.5MB. Don't hammer the node with frequent, heavy requests. 221 // 222 // cadvisor records cumulative cpu usage in nanoseconds, so we need to have two 223 // stats points to compute the cpu usage over the interval. Assuming cadvisor 224 // polls every second, we'd need to get N stats points for N-second interval. 225 // Note that this is an approximation and may not be accurate, hence we also 226 // write the actual interval used for calculation (based on the timestamps of 227 // the stats points in ContainerResourceUsage.CPUInterval. 228 // 229 // containerNames is a function returning a collection of container names in which 230 // user is interested in. 231 func getOneTimeResourceUsageOnNode( 232 c clientset.Interface, 233 nodeName string, 234 cpuInterval time.Duration, 235 containerNames func() []string, 236 ) (ResourceUsagePerContainer, error) { 237 const ( 238 // cadvisor records stats about every second. 239 cadvisorStatsPollingIntervalInSeconds float64 = 1.0 240 // cadvisor caches up to 2 minutes of stats (configured by kubelet). 241 maxNumStatsToRequest int = 120 242 ) 243 244 numStats := int(float64(cpuInterval.Seconds()) / cadvisorStatsPollingIntervalInSeconds) 245 if numStats < 2 || numStats > maxNumStatsToRequest { 246 return nil, fmt.Errorf("numStats needs to be > 1 and < %d", maxNumStatsToRequest) 247 } 248 // Get information of all containers on the node. 249 summary, err := getStatsSummary(c, nodeName) 250 if err != nil { 251 return nil, err 252 } 253 254 f := func(name string, newStats *kubeletstatsv1alpha1.ContainerStats) *ContainerResourceUsage { 255 if newStats == nil || newStats.CPU == nil || newStats.Memory == nil { 256 return nil 257 } 258 return &ContainerResourceUsage{ 259 Name: name, 260 Timestamp: newStats.StartTime.Time, 261 CPUUsageInCores: float64(removeUint64Ptr(newStats.CPU.UsageNanoCores)) / 1000000000, 262 MemoryUsageInBytes: removeUint64Ptr(newStats.Memory.UsageBytes), 263 MemoryWorkingSetInBytes: removeUint64Ptr(newStats.Memory.WorkingSetBytes), 264 MemoryRSSInBytes: removeUint64Ptr(newStats.Memory.RSSBytes), 265 CPUInterval: 0, 266 } 267 } 268 // Process container infos that are relevant to us. 269 containers := containerNames() 270 usageMap := make(ResourceUsagePerContainer, len(containers)) 271 for _, pod := range summary.Pods { 272 for _, container := range pod.Containers { 273 isInteresting := false 274 for _, interestingContainerName := range containers { 275 if container.Name == interestingContainerName { 276 isInteresting = true 277 break 278 } 279 } 280 if !isInteresting { 281 continue 282 } 283 if usage := f(pod.PodRef.Name+"/"+container.Name, &container); usage != nil { 284 usageMap[pod.PodRef.Name+"/"+container.Name] = usage 285 } 286 } 287 } 288 return usageMap, nil 289 } 290 291 // getStatsSummary contacts kubelet for the container information. 292 func getStatsSummary(c clientset.Interface, nodeName string) (*kubeletstatsv1alpha1.Summary, error) { 293 ctx, cancel := context.WithTimeout(context.Background(), framework.SingleCallTimeout) 294 defer cancel() 295 296 data, err := c.CoreV1().RESTClient().Get(). 297 Resource("nodes"). 298 SubResource("proxy"). 299 Name(fmt.Sprintf("%v:%v", nodeName, framework.KubeletPort)). 300 Suffix("stats/summary"). 301 Do(ctx).Raw() 302 303 if err != nil { 304 return nil, err 305 } 306 307 summary := kubeletstatsv1alpha1.Summary{} 308 err = json.Unmarshal(data, &summary) 309 if err != nil { 310 return nil, err 311 } 312 return &summary, nil 313 } 314 315 func removeUint64Ptr(ptr *uint64) uint64 { 316 if ptr == nil { 317 return 0 318 } 319 return *ptr 320 } 321 322 func (w *resourceGatherWorker) gather(ctx context.Context, initialSleep time.Duration) { 323 defer utilruntime.HandleCrash() 324 defer w.wg.Done() 325 defer framework.Logf("Closing worker for %v", w.nodeName) 326 defer func() { w.finished = true }() 327 select { 328 case <-time.After(initialSleep): 329 w.singleProbe(ctx) 330 for { 331 select { 332 case <-time.After(w.resourceDataGatheringPeriod): 333 w.singleProbe(ctx) 334 case <-ctx.Done(): 335 return 336 case <-w.stopCh: 337 return 338 } 339 } 340 case <-ctx.Done(): 341 return 342 case <-w.stopCh: 343 return 344 } 345 } 346 347 // ContainerResourceGatherer is a struct for gathering container resource. 348 type ContainerResourceGatherer struct { 349 client clientset.Interface 350 stopCh chan struct{} 351 workers []resourceGatherWorker 352 workerWg sync.WaitGroup 353 containerIDs []string 354 options ResourceGathererOptions 355 } 356 357 // ResourceGathererOptions is a struct to hold options for resource. 358 type ResourceGathererOptions struct { 359 InKubemark bool 360 Nodes NodesSet 361 ResourceDataGatheringPeriod time.Duration 362 ProbeDuration time.Duration 363 PrintVerboseLogs bool 364 } 365 366 // NodesSet is a value of nodes set. 367 type NodesSet int 368 369 const ( 370 // AllNodes means all containers on all nodes. 371 AllNodes NodesSet = 0 372 // MasterNodes means all containers on Master nodes only. 373 MasterNodes NodesSet = 1 374 // MasterAndDNSNodes means all containers on Master nodes and DNS containers on other nodes. 375 MasterAndDNSNodes NodesSet = 2 376 ) 377 378 // nodeHasControlPlanePods returns true if specified node has control plane pods 379 // (kube-scheduler and/or kube-controller-manager). 380 func nodeHasControlPlanePods(ctx context.Context, c clientset.Interface, nodeName string) (bool, error) { 381 regKubeScheduler := regexp.MustCompile("kube-scheduler-.*") 382 regKubeControllerManager := regexp.MustCompile("kube-controller-manager-.*") 383 384 podList, err := c.CoreV1().Pods(metav1.NamespaceSystem).List(ctx, metav1.ListOptions{ 385 FieldSelector: fields.OneTermEqualSelector("spec.nodeName", nodeName).String(), 386 }) 387 if err != nil { 388 return false, err 389 } 390 if len(podList.Items) < 1 { 391 framework.Logf("Can't find any pods in namespace %s to grab metrics from", metav1.NamespaceSystem) 392 } 393 for _, pod := range podList.Items { 394 if regKubeScheduler.MatchString(pod.Name) || regKubeControllerManager.MatchString(pod.Name) { 395 return true, nil 396 } 397 } 398 return false, nil 399 } 400 401 // NewResourceUsageGatherer returns a new ContainerResourceGatherer. 402 func NewResourceUsageGatherer(ctx context.Context, c clientset.Interface, options ResourceGathererOptions, pods *v1.PodList) (*ContainerResourceGatherer, error) { 403 g := ContainerResourceGatherer{ 404 client: c, 405 stopCh: make(chan struct{}), 406 containerIDs: make([]string, 0), 407 options: options, 408 } 409 410 if options.InKubemark { 411 g.workerWg.Add(1) 412 g.workers = append(g.workers, resourceGatherWorker{ 413 inKubemark: true, 414 stopCh: g.stopCh, 415 wg: &g.workerWg, 416 finished: false, 417 resourceDataGatheringPeriod: options.ResourceDataGatheringPeriod, 418 probeDuration: options.ProbeDuration, 419 printVerboseLogs: options.PrintVerboseLogs, 420 }) 421 return &g, nil 422 } 423 424 // Tracks kube-system pods if no valid PodList is passed in. 425 var err error 426 if pods == nil { 427 pods, err = c.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{}) 428 if err != nil { 429 framework.Logf("Error while listing Pods: %v", err) 430 return nil, err 431 } 432 } 433 dnsNodes := make(map[string]bool) 434 for _, pod := range pods.Items { 435 if options.Nodes == MasterNodes { 436 isControlPlane, err := nodeHasControlPlanePods(ctx, c, pod.Spec.NodeName) 437 if err != nil { 438 return nil, err 439 } 440 if !isControlPlane { 441 continue 442 } 443 } 444 if options.Nodes == MasterAndDNSNodes { 445 isControlPlane, err := nodeHasControlPlanePods(ctx, c, pod.Spec.NodeName) 446 if err != nil { 447 return nil, err 448 } 449 if !isControlPlane && pod.Labels["k8s-app"] != "kube-dns" { 450 continue 451 } 452 } 453 for _, container := range pod.Status.InitContainerStatuses { 454 g.containerIDs = append(g.containerIDs, container.Name) 455 } 456 for _, container := range pod.Status.ContainerStatuses { 457 g.containerIDs = append(g.containerIDs, container.Name) 458 } 459 if options.Nodes == MasterAndDNSNodes { 460 dnsNodes[pod.Spec.NodeName] = true 461 } 462 } 463 nodeList, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 464 if err != nil { 465 framework.Logf("Error while listing Nodes: %v", err) 466 return nil, err 467 } 468 469 for _, node := range nodeList.Items { 470 isControlPlane, err := nodeHasControlPlanePods(ctx, c, node.Name) 471 if err != nil { 472 return nil, err 473 } 474 if options.Nodes == AllNodes || isControlPlane || dnsNodes[node.Name] { 475 g.workerWg.Add(1) 476 g.workers = append(g.workers, resourceGatherWorker{ 477 c: c, 478 nodeName: node.Name, 479 wg: &g.workerWg, 480 containerIDs: g.containerIDs, 481 stopCh: g.stopCh, 482 finished: false, 483 inKubemark: false, 484 resourceDataGatheringPeriod: options.ResourceDataGatheringPeriod, 485 probeDuration: options.ProbeDuration, 486 printVerboseLogs: options.PrintVerboseLogs, 487 }) 488 if options.Nodes == MasterNodes { 489 break 490 } 491 } 492 } 493 return &g, nil 494 } 495 496 // StartGatheringData starts a stat gathering worker blocks for each node to track, 497 // and blocks until StopAndSummarize is called. 498 func (g *ContainerResourceGatherer) StartGatheringData(ctx context.Context) { 499 if len(g.workers) == 0 { 500 return 501 } 502 delayPeriod := g.options.ResourceDataGatheringPeriod / time.Duration(len(g.workers)) 503 delay := time.Duration(0) 504 for i := range g.workers { 505 go g.workers[i].gather(ctx, delay) 506 delay += delayPeriod 507 } 508 g.workerWg.Wait() 509 } 510 511 // StopAndSummarize stops stat gathering workers, processes the collected stats, 512 // generates resource summary for the passed-in percentiles, and returns the summary. 513 // It returns an error if the resource usage at any percentile is beyond the 514 // specified resource constraints. 515 func (g *ContainerResourceGatherer) StopAndSummarize(percentiles []int, constraints map[string]ResourceConstraint) (*ResourceUsageSummary, error) { 516 close(g.stopCh) 517 framework.Logf("Closed stop channel. Waiting for %v workers", len(g.workers)) 518 finished := make(chan struct{}, 1) 519 go func() { 520 g.workerWg.Wait() 521 finished <- struct{}{} 522 }() 523 select { 524 case <-finished: 525 framework.Logf("Waitgroup finished.") 526 case <-time.After(2 * time.Minute): 527 unfinished := make([]string, 0) 528 for i := range g.workers { 529 if !g.workers[i].finished { 530 unfinished = append(unfinished, g.workers[i].nodeName) 531 } 532 } 533 framework.Logf("Timed out while waiting for waitgroup, some workers failed to finish: %v", unfinished) 534 } 535 536 if len(percentiles) == 0 { 537 framework.Logf("Warning! Empty percentile list for stopAndPrintData.") 538 return &ResourceUsageSummary{}, fmt.Errorf("Failed to get any resource usage data") 539 } 540 data := make(map[int]ResourceUsagePerContainer) 541 for i := range g.workers { 542 if g.workers[i].finished { 543 stats := computePercentiles(g.workers[i].dataSeries, percentiles) 544 data = leftMergeData(stats, data) 545 } 546 } 547 548 // Workers has been stopped. We need to gather data stored in them. 549 sortedKeys := []string{} 550 for name := range data[percentiles[0]] { 551 sortedKeys = append(sortedKeys, name) 552 } 553 sort.Strings(sortedKeys) 554 violatedConstraints := make([]string, 0) 555 summary := make(ResourceUsageSummary) 556 for _, perc := range percentiles { 557 for _, name := range sortedKeys { 558 usage := data[perc][name] 559 summary[strconv.Itoa(perc)] = append(summary[strconv.Itoa(perc)], SingleContainerSummary{ 560 Name: name, 561 CPU: usage.CPUUsageInCores, 562 Mem: usage.MemoryWorkingSetInBytes, 563 }) 564 565 // Verifying 99th percentile of resource usage 566 if perc != 99 { 567 continue 568 } 569 // Name has a form: <pod_name>/<container_name> 570 containerName := strings.Split(name, "/")[1] 571 constraint, ok := constraints[containerName] 572 if !ok { 573 continue 574 } 575 if usage.CPUUsageInCores > constraint.CPUConstraint { 576 violatedConstraints = append( 577 violatedConstraints, 578 fmt.Sprintf("Container %v is using %v/%v CPU", 579 name, 580 usage.CPUUsageInCores, 581 constraint.CPUConstraint, 582 ), 583 ) 584 } 585 if usage.MemoryWorkingSetInBytes > constraint.MemoryConstraint { 586 violatedConstraints = append( 587 violatedConstraints, 588 fmt.Sprintf("Container %v is using %v/%v MB of memory", 589 name, 590 float64(usage.MemoryWorkingSetInBytes)/(1024*1024), 591 float64(constraint.MemoryConstraint)/(1024*1024), 592 ), 593 ) 594 } 595 } 596 } 597 if len(violatedConstraints) > 0 { 598 return &summary, fmt.Errorf(strings.Join(violatedConstraints, "\n")) 599 } 600 return &summary, nil 601 } 602 603 // kubemarkResourceUsage is a struct for tracking the resource usage of kubemark. 604 type kubemarkResourceUsage struct { 605 Name string 606 MemoryWorkingSetInBytes uint64 607 CPUUsageInCores float64 608 } 609 610 func getMasterUsageByPrefix(ctx context.Context, prefix string) (string, error) { 611 sshResult, err := e2essh.SSH(ctx, fmt.Sprintf("ps ax -o %%cpu,rss,command | tail -n +2 | grep %v | sed 's/\\s+/ /g'", prefix), framework.APIAddress()+":22", framework.TestContext.Provider) 612 if err != nil { 613 return "", err 614 } 615 return sshResult.Stdout, nil 616 } 617 618 // getKubemarkMasterComponentsResourceUsage returns the resource usage of kubemark which contains multiple combinations of cpu and memory usage for each pod name. 619 func getKubemarkMasterComponentsResourceUsage(ctx context.Context) map[string]*kubemarkResourceUsage { 620 result := make(map[string]*kubemarkResourceUsage) 621 // Get kubernetes component resource usage 622 sshResult, err := getMasterUsageByPrefix(ctx, "kube") 623 if err != nil { 624 framework.Logf("Error when trying to SSH to master machine. Skipping probe. %v", err) 625 return nil 626 } 627 scanner := bufio.NewScanner(strings.NewReader(sshResult)) 628 for scanner.Scan() { 629 var cpu float64 630 var mem uint64 631 var name string 632 fmt.Sscanf(strings.TrimSpace(scanner.Text()), "%f %d /usr/local/bin/kube-%s", &cpu, &mem, &name) 633 if name != "" { 634 // Gatherer expects pod_name/container_name format 635 fullName := name + "/" + name 636 result[fullName] = &kubemarkResourceUsage{Name: fullName, MemoryWorkingSetInBytes: mem * 1024, CPUUsageInCores: cpu / 100} 637 } 638 } 639 // Get etcd resource usage 640 sshResult, err = getMasterUsageByPrefix(ctx, "bin/etcd") 641 if err != nil { 642 framework.Logf("Error when trying to SSH to master machine. Skipping probe") 643 return nil 644 } 645 scanner = bufio.NewScanner(strings.NewReader(sshResult)) 646 for scanner.Scan() { 647 var cpu float64 648 var mem uint64 649 var etcdKind string 650 fmt.Sscanf(strings.TrimSpace(scanner.Text()), "%f %d /bin/sh -c /usr/local/bin/etcd", &cpu, &mem) 651 dataDirStart := strings.Index(scanner.Text(), "--data-dir") 652 if dataDirStart < 0 { 653 continue 654 } 655 fmt.Sscanf(scanner.Text()[dataDirStart:], "--data-dir=/var/%s", &etcdKind) 656 if etcdKind != "" { 657 // Gatherer expects pod_name/container_name format 658 fullName := "etcd/" + etcdKind 659 result[fullName] = &kubemarkResourceUsage{Name: fullName, MemoryWorkingSetInBytes: mem * 1024, CPUUsageInCores: cpu / 100} 660 } 661 } 662 return result 663 }