github.com/kubewharf/katalyst-core@v0.5.3/pkg/custom-metric/provider/provider.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package provider
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"k8s.io/apimachinery/pkg/labels"
    25  	"k8s.io/apimachinery/pkg/runtime/schema"
    26  	apitypes "k8s.io/apimachinery/pkg/types"
    27  	"k8s.io/klog/v2"
    28  	"k8s.io/metrics/pkg/apis/custom_metrics"
    29  	"k8s.io/metrics/pkg/apis/external_metrics"
    30  	"sigs.k8s.io/custom-metrics-apiserver/pkg/provider"
    31  
    32  	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
    33  	"github.com/kubewharf/katalyst-core/pkg/custom-metric/store"
    34  	"github.com/kubewharf/katalyst-core/pkg/custom-metric/store/data/types"
    35  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    36  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    37  )
    38  
    39  const (
    40  	metricsNameKCMASProviderReqCosts = "kcmas_provider_req_costs"
    41  	metricsNameKCMASProviderReqCount = "kcmas_provider_req_count"
    42  
    43  	metricsNameKCMASProviderDataCount = "kcmas_provider_data_count"
    44  	metricsNameKCMASProviderDataEmpty = "kcmas_provider_data_empty"
    45  
    46  	metricsNameKCMASProviderCustomMetricLatency   = "kcmas_provider_custom_metric_latency"
    47  	metricsNameKCMASProviderExternalMetricLatency = "kcmas_provider_external_metric_latency"
    48  )
    49  
    50  // MetricProvider is a standard interface to query metric data;
    51  // to simplify the implementation, MetricProvider use the standard kubernetes interface for
    52  // custom-metrics and external-metrics; and provider.MetricsProvider may have different
    53  // explanations for their parameters, we make the following appoints
    54  //
    55  // - GetMetricByName
    56  // --- if metric name is nominated, ignore the selector;
    57  // --- otherwise, return all the metrics matched with the metric selector;
    58  // --- in all cases, we should check whether the objects (that own this metric) is matched (with name)
    59  // --- only returns the latest value
    60  //
    61  // - GetMetricBySelector
    62  // --- if metric name is nominated, ignore the selector;
    63  // --- otherwise, return all the metrics matched with the metric selector;
    64  // --- in all cases, we should check whether the objects (that own this metric) is matched (with label selector)
    65  //
    66  // - GetExternalMetric
    67  // --- if metric name is nominated, ignore the metric selector;
    68  // --- otherwise, return all the metrics matched with the metric selector;
    69  type MetricProvider interface {
    70  	provider.MetricsProvider
    71  }
    72  
    73  type MetricProviderImp struct {
    74  	ctx            context.Context
    75  	metricsEmitter metrics.MetricEmitter
    76  	storeImp       store.MetricStore
    77  }
    78  
    79  func NewMetricProviderImp(ctx context.Context, baseCtx *katalyst_base.GenericContext, storeImp store.MetricStore) *MetricProviderImp {
    80  	metricsEmitter := baseCtx.EmitterPool.GetDefaultMetricsEmitter()
    81  	if metricsEmitter == nil {
    82  		metricsEmitter = metrics.DummyMetrics{}
    83  	}
    84  
    85  	return &MetricProviderImp{
    86  		ctx:            ctx,
    87  		metricsEmitter: metricsEmitter,
    88  		storeImp:       storeImp,
    89  	}
    90  }
    91  
    92  func (m *MetricProviderImp) GetMetricByName(ctx context.Context, namespacedName apitypes.NamespacedName,
    93  	info provider.CustomMetricInfo, metricSelector labels.Selector,
    94  ) (*custom_metrics.MetricValue, error) {
    95  	klog.Infof("GetMetricByName: metric name %v, object %v, namespace %v, object name %v, metricSelector %v, context-test %v",
    96  		info.Metric, info.GroupResource, namespacedName.Namespace, namespacedName.Name, metricSelector, ctx.Value("context-test"))
    97  	var (
    98  		metricList []types.Metric
    99  		err        error
   100  		start      = time.Now()
   101  	)
   102  	defer func() {
   103  		m.emitMetrics("GetMetricByName", info.Metric, namespacedName.Name, start, 1, err)
   104  	}()
   105  
   106  	metricList, err = m.storeImp.GetMetric(ctx, namespacedName.Namespace, info.Metric, namespacedName.Name,
   107  		&info.GroupResource, nil, metricSelector, true)
   108  	if err != nil {
   109  		klog.Errorf("GetMetric err: %v", err)
   110  		return nil, err
   111  	}
   112  
   113  	var res *custom_metrics.MetricValue
   114  	for _, metric := range metricList {
   115  		if metric.GetObjectKind() == "" || metric.GetObjectName() == "" {
   116  			klog.Errorf("custom metric %v doesn't have object %v/%v",
   117  				metric.GetName(), metric.GetObjectKind(), metric.GetObjectName())
   118  			continue
   119  		}
   120  
   121  		if res == nil {
   122  			res = findMetricValueLatest(metric.GetName(), PackMetricValueList(metric, metricSelector))
   123  		} else {
   124  			res = findMetricValueLatest(metric.GetName(), append(PackMetricValueList(metric, metricSelector), *res))
   125  		}
   126  	}
   127  
   128  	if res == nil {
   129  		return nil, fmt.Errorf("no mtaching metric exists")
   130  	}
   131  
   132  	m.emitCustomMetricLatency(res)
   133  	return res, nil
   134  }
   135  
   136  func (m *MetricProviderImp) GetMetricBySelector(ctx context.Context, namespace string, objSelector labels.Selector,
   137  	info provider.CustomMetricInfo, metricSelector labels.Selector,
   138  ) (*custom_metrics.MetricValueList, error) {
   139  	klog.Infof("GetMetricBySelector: metric name %v, object %v, namespace %v, objSelector %v, metricSelector %v",
   140  		info.Metric, info.GroupResource, namespace, objSelector, metricSelector)
   141  	var (
   142  		metricList  []types.Metric
   143  		resultCount int
   144  		err         error
   145  		start       = time.Now()
   146  	)
   147  	defer func() {
   148  		m.emitMetrics("GetMetricBySelector", info.Metric, "", start, resultCount, err)
   149  	}()
   150  
   151  	metricList, err = m.storeImp.GetMetric(ctx, namespace, info.Metric, "",
   152  		&info.GroupResource, objSelector, metricSelector, false)
   153  	if err != nil {
   154  		klog.Errorf("GetMetric err: %v", err)
   155  		return nil, err
   156  	}
   157  
   158  	var items []custom_metrics.MetricValue
   159  	for _, metric := range metricList {
   160  		if metric.GetObjectKind() == "" || metric.GetObjectName() == "" {
   161  			klog.Errorf("custom metric %v doesn't have object %v/%v",
   162  				metric.GetName(), metric.GetObjectKind(), metric.GetObjectName())
   163  			continue
   164  		}
   165  
   166  		m.emitCustomMetricLatencyByRawMetrics(metric)
   167  		resultCount += metric.Len()
   168  		items = append(items, PackMetricValueList(metric, metricSelector)...)
   169  	}
   170  
   171  	return &custom_metrics.MetricValueList{
   172  		Items: items,
   173  	}, nil
   174  }
   175  
   176  func (m *MetricProviderImp) ListAllMetrics() []provider.CustomMetricInfo {
   177  	klog.V(6).Info("ListAllMetrics")
   178  	var (
   179  		metricTypeList []types.MetricMeta
   180  		resultCount    int
   181  		err            error
   182  		start          = time.Now()
   183  	)
   184  	defer func() {
   185  		m.emitMetrics("ListAllMetrics", "", "", start, resultCount, err)
   186  	}()
   187  
   188  	metricTypeList, err = m.storeImp.ListMetricMeta(context.Background(), true)
   189  	if err != nil {
   190  		klog.Errorf("ListAllMetrics err: %v", err)
   191  		return []provider.CustomMetricInfo{}
   192  	}
   193  
   194  	infoMap := make(map[provider.CustomMetricInfo]interface{})
   195  	for _, metricType := range metricTypeList {
   196  		if metricType.GetObjectKind() == "" {
   197  			continue
   198  		}
   199  
   200  		_, gr := schema.ParseResourceArg(metricType.GetObjectKind())
   201  		infoMap[provider.CustomMetricInfo{
   202  			GroupResource: gr,
   203  			Namespaced:    metricType.GetNamespaced(),
   204  			Metric:        metricType.GetName(),
   205  		}] = struct{}{}
   206  	}
   207  
   208  	var res []provider.CustomMetricInfo
   209  	for info := range infoMap {
   210  		resultCount++
   211  		res = append(res, info)
   212  	}
   213  	return res
   214  }
   215  
   216  func (m *MetricProviderImp) GetExternalMetric(ctx context.Context, namespace string, metricSelector labels.Selector,
   217  	info provider.ExternalMetricInfo,
   218  ) (*external_metrics.ExternalMetricValueList, error) {
   219  	klog.Infof("GetExternalMetric: metric name %v, namespace %v, metricSelector %v", info.Metric, namespace, metricSelector)
   220  	var (
   221  		metricList  []types.Metric
   222  		resultCount int
   223  		err         error
   224  		start       = time.Now()
   225  	)
   226  	defer func() {
   227  		m.emitMetrics("GetExternalMetric", info.Metric, "", start, resultCount, err)
   228  	}()
   229  
   230  	metricList, err = m.storeImp.GetMetric(ctx, namespace, info.Metric, "", nil, nil, metricSelector, true)
   231  	if err != nil {
   232  		klog.Errorf("GetMetric err: %v", err)
   233  		return nil, err
   234  	}
   235  
   236  	var items []external_metrics.ExternalMetricValue
   237  	for _, metric := range metricList {
   238  		if metric.GetObjectKind() != "" || metric.GetObjectName() != "" {
   239  			klog.Errorf("internal metric %v has object %v/%v unexpectedly",
   240  				metric.GetName(), metric.GetObjectKind(), metric.GetObjectName())
   241  			continue
   242  		}
   243  
   244  		resultCount += metric.Len()
   245  		items = append(items, PackExternalMetricValueList(metric)...)
   246  	}
   247  
   248  	for i := range items {
   249  		m.emitExternalMetricLatency(&items[i])
   250  	}
   251  
   252  	return &external_metrics.ExternalMetricValueList{
   253  		Items: items,
   254  	}, nil
   255  }
   256  
   257  func (m *MetricProviderImp) ListAllExternalMetrics() []provider.ExternalMetricInfo {
   258  	klog.V(6).Info("ListAllExternalMetrics")
   259  	var (
   260  		metricTypeList []types.MetricMeta
   261  		resultCount    int
   262  		err            error
   263  		start          = time.Now()
   264  	)
   265  	defer func() {
   266  		m.emitMetrics("ListAllExternalMetrics", "", "", start, resultCount, err)
   267  	}()
   268  
   269  	metricTypeList, err = m.storeImp.ListMetricMeta(context.Background(), false)
   270  	if err != nil {
   271  		klog.Errorf("ListAllExternalMetrics err: %v", err)
   272  		return []provider.ExternalMetricInfo{}
   273  	}
   274  
   275  	infoMap := make(map[provider.ExternalMetricInfo]interface{})
   276  	for _, internal := range metricTypeList {
   277  		if internal.GetObjectKind() != "" {
   278  			continue
   279  		}
   280  
   281  		infoMap[provider.ExternalMetricInfo{
   282  			Metric: internal.GetName(),
   283  		}] = struct{}{}
   284  	}
   285  
   286  	var res []provider.ExternalMetricInfo
   287  	for info := range infoMap {
   288  		resultCount++
   289  		res = append(res, info)
   290  	}
   291  	return res
   292  }
   293  
   294  func (m *MetricProviderImp) emitCustomMetricLatencyByRawMetrics(metric types.Metric) {
   295  	items := metric.GetItemList()
   296  	latestItem := items[len(items)-1]
   297  	dataLatency := time.Now().Sub(time.UnixMilli(latestItem.GetTimestamp())).Microseconds()
   298  	general.Infof("query custom metrics, metric name:%v, object name:%v, object kind: %v, latest timestamp: %v(parsed: %v), data latency: %v(microseconds)", metric.GetName(),
   299  		metric.GetObjectName(), metric.GetObjectKind(), latestItem.GetTimestamp(), time.UnixMilli(latestItem.GetTimestamp()), dataLatency)
   300  	tags := []metrics.MetricTag{
   301  		{Key: "metric_name", Val: metric.GetName()},
   302  		{Key: "object_kind", Val: metric.GetObjectKind()},
   303  	}
   304  
   305  	_ = m.metricsEmitter.StoreInt64(metricsNameKCMASProviderCustomMetricLatency, dataLatency, metrics.MetricTypeNameRaw, tags...)
   306  }
   307  
   308  func (m *MetricProviderImp) emitCustomMetricLatency(metric *custom_metrics.MetricValue) {
   309  	dataLatency := time.Now().Sub(metric.Timestamp.Time).Microseconds()
   310  	general.Infof("query custom metrics, metric name:%v, object name:%v, object kind: %v, latest timestamp: %v(parsed: %v), data latency: %v(microseconds)", metric.Metric.Name,
   311  		metric.Metric.Name, metric.GetObjectKind(), metric.Timestamp.UnixMilli(), metric.Timestamp.Time, dataLatency)
   312  	tags := []metrics.MetricTag{
   313  		{Key: "metric_name", Val: metric.Metric.Name},
   314  		{Key: "object_kind", Val: metric.DescribedObject.Kind},
   315  	}
   316  
   317  	_ = m.metricsEmitter.StoreInt64(metricsNameKCMASProviderCustomMetricLatency, dataLatency, metrics.MetricTypeNameRaw, tags...)
   318  }
   319  
   320  func (m *MetricProviderImp) emitExternalMetricLatency(metric *external_metrics.ExternalMetricValue) {
   321  	dataLatency := time.Now().Sub(metric.Timestamp.Time).Microseconds()
   322  	tags := []metrics.MetricTag{
   323  		{Key: "metric_name", Val: metric.MetricName},
   324  	}
   325  
   326  	_ = m.metricsEmitter.StoreInt64(metricsNameKCMASProviderExternalMetricLatency, dataLatency, metrics.MetricTypeNameRaw, tags...)
   327  }
   328  
   329  // emitMetrics provides a unified way to emit metrics about the running states for each interface.
   330  func (m *MetricProviderImp) emitMetrics(function string, metricName, objName string, reqStart time.Time, resultCount int, err error) {
   331  	now := time.Now()
   332  	var (
   333  		reqCosts = now.Sub(reqStart).Microseconds()
   334  		success  = err == nil
   335  	)
   336  	klog.V(3).Infof("[provider] function [%v]: costs %v(ms), resultCount %v, success: %v", function, reqCosts, resultCount, success)
   337  
   338  	if metricName == "" {
   339  		metricName = "empty"
   340  	}
   341  	if objName == "" {
   342  		objName = "empty"
   343  	}
   344  	tags := []metrics.MetricTag{
   345  		{Key: "function", Val: fmt.Sprintf("%v", function)},
   346  		{Key: "metric_name", Val: metricName},
   347  		{Key: "object_name", Val: objName},
   348  	}
   349  
   350  	_ = m.metricsEmitter.StoreInt64(metricsNameKCMASProviderReqCosts, reqCosts, metrics.MetricTypeNameRaw, append(tags,
   351  		metrics.MetricTag{Key: "success", Val: fmt.Sprintf("%v", success)})...)
   352  	_ = m.metricsEmitter.StoreInt64(metricsNameKCMASProviderReqCount, 1, metrics.MetricTypeNameCount, append(tags,
   353  		metrics.MetricTag{Key: "success", Val: fmt.Sprintf("%v", success)})...)
   354  
   355  	if success {
   356  		_ = m.metricsEmitter.StoreInt64(metricsNameKCMASProviderDataCount, int64(resultCount), metrics.MetricTypeNameRaw, tags...)
   357  		if resultCount == 0 {
   358  			_ = m.metricsEmitter.StoreInt64(metricsNameKCMASProviderDataEmpty, 1, metrics.MetricTypeNameCount, tags...)
   359  		}
   360  	}
   361  }