github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/k8s/metrics.go (about) 1 // Package k8s: initialization, client, and misc. helpers 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package k8s 6 7 import ( 8 "context" 9 "fmt" 10 "sync" 11 12 "github.com/NVIDIA/aistore/cmn/cos" 13 "github.com/NVIDIA/aistore/cmn/debug" 14 "k8s.io/apimachinery/pkg/api/errors" 15 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 "k8s.io/client-go/tools/clientcmd" 17 metrics "k8s.io/metrics/pkg/client/clientset/versioned" 18 ) 19 20 type metricsClient struct { 21 client *metrics.Clientset 22 err error 23 } 24 25 var ( 26 once sync.Once 27 _mc *metricsClient 28 ) 29 30 func InitMetricsClient() { once.Do(_initmc) } 31 32 func _initmc() { 33 config, err := clientcmd.BuildConfigFromFlags("", "") 34 if err != nil { 35 _mc = &metricsClient{ 36 err: fmt.Errorf("failed to retrieve metrics client config: %w", err), 37 } 38 return 39 } 40 mc, err := metrics.NewForConfig(config) 41 if err != nil { 42 _mc = &metricsClient{ 43 err: fmt.Errorf("failed to create metrics client: %w", err), 44 } 45 return 46 } 47 _mc = &metricsClient{ 48 client: mc, 49 } 50 } 51 52 func Metrics(podName string) (float64 /*cores*/, int64 /*mem*/, error) { 53 var ( 54 totalCPU, totalMem int64 55 fracCPU float64 56 ) 57 if _mc.err != nil { 58 return 0, 0, _mc.err 59 } 60 debug.Assert(_mc.client != nil) 61 62 var ( 63 mc = _mc.client 64 msgetter = mc.MetricsV1beta1().PodMetricses(metav1.NamespaceDefault) 65 ms, err = msgetter.Get(context.Background(), podName, metav1.GetOptions{}) 66 ) 67 if err != nil { 68 if statusErr, ok := err.(*errors.StatusError); ok && statusErr.Status().Reason == metav1.StatusReasonNotFound { 69 err = cos.NewErrNotFound(nil, "metrics for pod "+podName) 70 } 71 return 0, 0, err 72 } 73 74 for _, metric := range ms.Containers { 75 cpuNanoCores, ok := metric.Usage.Cpu().AsInt64() 76 if !ok { 77 cpuNanoCores = metric.Usage.Cpu().AsDec().UnscaledBig().Int64() 78 } 79 totalCPU += cpuNanoCores 80 81 memInt, ok := metric.Usage.Memory().AsInt64() 82 if !ok { 83 memInt = metric.Usage.Memory().AsDec().UnscaledBig().Int64() 84 } 85 totalMem += memInt 86 } 87 88 // Kubernetes reports CPU in nanocores, see https://godoc.org/k8s.io/api/core/v1#ResourceName 89 fracCPU = float64(totalCPU) / float64(1_000_000_000) 90 return fracCPU, totalMem, nil 91 }