github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/k8s/metrics.go (about)

     1  // Package k8s: initialization, client, and misc. helpers
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package k8s
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"sync"
    11  
    12  	"github.com/NVIDIA/aistore/cmn/cos"
    13  	"github.com/NVIDIA/aistore/cmn/debug"
    14  	"k8s.io/apimachinery/pkg/api/errors"
    15  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    16  	"k8s.io/client-go/tools/clientcmd"
    17  	metrics "k8s.io/metrics/pkg/client/clientset/versioned"
    18  )
    19  
    20  type metricsClient struct {
    21  	client *metrics.Clientset
    22  	err    error
    23  }
    24  
    25  var (
    26  	once sync.Once
    27  	_mc  *metricsClient
    28  )
    29  
    30  func InitMetricsClient() { once.Do(_initmc) }
    31  
    32  func _initmc() {
    33  	config, err := clientcmd.BuildConfigFromFlags("", "")
    34  	if err != nil {
    35  		_mc = &metricsClient{
    36  			err: fmt.Errorf("failed to retrieve metrics client config: %w", err),
    37  		}
    38  		return
    39  	}
    40  	mc, err := metrics.NewForConfig(config)
    41  	if err != nil {
    42  		_mc = &metricsClient{
    43  			err: fmt.Errorf("failed to create metrics client: %w", err),
    44  		}
    45  		return
    46  	}
    47  	_mc = &metricsClient{
    48  		client: mc,
    49  	}
    50  }
    51  
    52  func Metrics(podName string) (float64 /*cores*/, int64 /*mem*/, error) {
    53  	var (
    54  		totalCPU, totalMem int64
    55  		fracCPU            float64
    56  	)
    57  	if _mc.err != nil {
    58  		return 0, 0, _mc.err
    59  	}
    60  	debug.Assert(_mc.client != nil)
    61  
    62  	var (
    63  		mc       = _mc.client
    64  		msgetter = mc.MetricsV1beta1().PodMetricses(metav1.NamespaceDefault)
    65  		ms, err  = msgetter.Get(context.Background(), podName, metav1.GetOptions{})
    66  	)
    67  	if err != nil {
    68  		if statusErr, ok := err.(*errors.StatusError); ok && statusErr.Status().Reason == metav1.StatusReasonNotFound {
    69  			err = cos.NewErrNotFound(nil, "metrics for pod "+podName)
    70  		}
    71  		return 0, 0, err
    72  	}
    73  
    74  	for _, metric := range ms.Containers {
    75  		cpuNanoCores, ok := metric.Usage.Cpu().AsInt64()
    76  		if !ok {
    77  			cpuNanoCores = metric.Usage.Cpu().AsDec().UnscaledBig().Int64()
    78  		}
    79  		totalCPU += cpuNanoCores
    80  
    81  		memInt, ok := metric.Usage.Memory().AsInt64()
    82  		if !ok {
    83  			memInt = metric.Usage.Memory().AsDec().UnscaledBig().Int64()
    84  		}
    85  		totalMem += memInt
    86  	}
    87  
    88  	// Kubernetes reports CPU in nanocores, see https://godoc.org/k8s.io/api/core/v1#ResourceName
    89  	fracCPU = float64(totalCPU) / float64(1_000_000_000)
    90  	return fracCPU, totalMem, nil
    91  }