volcano.sh/volcano@v1.9.0/pkg/scheduler/metrics/source/metrics_client_prometheus.go (about)

     1  /*
     2   Copyright 2023 The Volcano Authors.
     3  
     4   Licensed under the Apache License, Version 2.0 (the "License");
     5   you may not use this file except in compliance with the License.
     6   You may obtain a copy of the License at
     7  
     8       http://www.apache.org/licenses/LICENSE-2.0
     9  
    10   Unless required by applicable law or agreed to in writing, software
    11   distributed under the License is distributed on an "AS IS" BASIS,
    12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   See the License for the specific language governing permissions and
    14   limitations under the License.
    15  */
    16  
    17  package source
    18  
    19  import (
    20  	"context"
    21  	"crypto/tls"
    22  	"errors"
    23  	"fmt"
    24  	"net/http"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/prometheus/client_golang/api"
    30  	prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
    31  	pmodel "github.com/prometheus/common/model"
    32  	"k8s.io/klog/v2"
    33  )
    34  
    35  type PrometheusMetricsClient struct {
    36  	address string
    37  	conf    map[string]string
    38  }
    39  
    40  func NewPrometheusMetricsClient(conf map[string]string) (*PrometheusMetricsClient, error) {
    41  	address := conf["address"]
    42  	if len(address) == 0 {
    43  		return nil, errors.New("metrics address is empty")
    44  	}
    45  	return &PrometheusMetricsClient{address: address, conf: conf}, nil
    46  }
    47  
    48  func (p *PrometheusMetricsClient) NodesMetricsAvg(ctx context.Context, nodeMetricsMap map[string]*NodeMetrics) error {
    49  	for nodeName := range nodeMetricsMap {
    50  		nodeMetrics, err := p.NodeMetricsAvg(ctx, nodeName)
    51  		if err != nil {
    52  			return err
    53  		}
    54  		nodeMetricsMap[nodeName] = nodeMetrics
    55  	}
    56  	return nil
    57  }
    58  
    59  func (p *PrometheusMetricsClient) NodeMetricsAvg(ctx context.Context, nodeName string) (*NodeMetrics, error) {
    60  	klog.V(4).Infof("Get node metrics from Prometheus: %s", p.address)
    61  	var client api.Client
    62  	var err error
    63  	insecureSkipVerify := p.conf["tls.insecureSkipVerify"] == "true"
    64  	tr := &http.Transport{
    65  		TLSClientConfig: &tls.Config{
    66  			InsecureSkipVerify: insecureSkipVerify,
    67  		},
    68  	}
    69  	client, err = api.NewClient(api.Config{
    70  		Address:      p.address,
    71  		RoundTripper: tr,
    72  	})
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	v1api := prometheusv1.NewAPI(client)
    77  	nodeMetrics := &NodeMetrics{}
    78  	cpuQueryStr := fmt.Sprintf("avg_over_time((100 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\",instance=\"%s\"}[5m])) * 100))[%s:30s])", nodeName, NODE_METRICS_PERIOD)
    79  	memQueryStr := fmt.Sprintf("100*avg_over_time(((1-node_memory_MemAvailable_bytes{instance=\"%s\"}/node_memory_MemTotal_bytes{instance=\"%s\"}))[%s:30s])", nodeName, nodeName, NODE_METRICS_PERIOD)
    80  
    81  	for _, metric := range []string{cpuQueryStr, memQueryStr} {
    82  		res, warnings, err := v1api.Query(ctx, metric, time.Now())
    83  		if err != nil {
    84  			klog.Errorf("Error querying Prometheus: %v", err)
    85  		}
    86  		if len(warnings) > 0 {
    87  			klog.V(3).Infof("Warning querying Prometheus: %v", warnings)
    88  		}
    89  		if res == nil || res.String() == "" {
    90  			klog.Warningf("Warning querying Prometheus: no data found for %s", metric)
    91  			continue
    92  		}
    93  		// plugin.usage only need type pmodel.ValVector in Prometheus.rulues
    94  		if res.Type() != pmodel.ValVector {
    95  			continue
    96  		}
    97  		// only method res.String() can get data, dataType []pmodel.ValVector, eg: "{k1:v1, ...} => #[value] @#[timespace]\n {k2:v2, ...} => ..."
    98  		firstRowValVector := strings.Split(res.String(), "\n")[0]
    99  		rowValues := strings.Split(strings.TrimSpace(firstRowValVector), "=>")
   100  		value := strings.Split(strings.TrimSpace(rowValues[1]), " ")
   101  		switch metric {
   102  		case cpuQueryStr:
   103  			cpuUsage, _ := strconv.ParseFloat(value[0], 64)
   104  			nodeMetrics.CPU = cpuUsage
   105  		case memQueryStr:
   106  			memUsage, _ := strconv.ParseFloat(value[0], 64)
   107  			nodeMetrics.Memory = memUsage
   108  		}
   109  	}
   110  	nodeMetrics.MetricsTime = time.Now()
   111  	return nodeMetrics, nil
   112  }