github.com/kubevela/workflow@v0.6.0/pkg/providers/metrics/prom_check.go (about)

     1  /*
     2  Copyright 2022 The KubeVela Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"time"
    23  
    24  	monitorContext "github.com/kubevela/pkg/monitor/context"
    25  	wfContext "github.com/kubevela/workflow/pkg/context"
    26  	"github.com/kubevela/workflow/pkg/cue/model/value"
    27  	"github.com/kubevela/workflow/pkg/types"
    28  	"github.com/prometheus/client_golang/api"
    29  	v1 "github.com/prometheus/client_golang/api/prometheus/v1"
    30  	"github.com/prometheus/common/model"
    31  )
    32  
    33  const (
    34  	// ProviderName is provider name for install.
    35  	ProviderName = "metrics"
    36  )
    37  
    38  type provider struct{}
    39  
    40  // PromCheck do health check from metrics from prometheus
    41  func (h *provider) PromCheck(ctx monitorContext.Context, wfCtx wfContext.Context, v *value.Value, act types.Action) error {
    42  	stepID, err := v.GetString("stepID")
    43  	if err != nil {
    44  		return err
    45  	}
    46  
    47  	valueStr, err := getQueryResult(ctx, v)
    48  	if err != nil {
    49  		return err
    50  	}
    51  
    52  	conditionStr, err := v.GetString("condition")
    53  	if err != nil {
    54  		return err
    55  	}
    56  
    57  	res, err := compareValueWithCondition(valueStr, conditionStr, v)
    58  
    59  	if err != nil {
    60  		return err
    61  	}
    62  
    63  	if res {
    64  		// meet the condition
    65  		return handleSuccessCompare(wfCtx, stepID, v, conditionStr, valueStr)
    66  	}
    67  	return handleFailCompare(wfCtx, stepID, v, conditionStr, valueStr)
    68  }
    69  
    70  func handleSuccessCompare(wfCtx wfContext.Context, stepID string, v *value.Value, conditionStr, valueStr string) error {
    71  	// clean up fail timeStamp
    72  	setMetricsStatusTime(wfCtx, stepID, "fail", 0)
    73  	d, err := v.GetString("duration")
    74  	if err != nil {
    75  		return err
    76  	}
    77  	duration, err := time.ParseDuration(d)
    78  	if err != nil {
    79  		return err
    80  	}
    81  
    82  	st := getMetricsStatusTime(wfCtx, stepID, "success")
    83  	if st == 0 {
    84  		// first success
    85  		if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, and the query result is %s, indicating success.", conditionStr, valueStr), "message"); err != nil {
    86  			return err
    87  		}
    88  		setMetricsStatusTime(wfCtx, stepID, "success", time.Now().Unix())
    89  		return v.FillObject(false, "result")
    90  	}
    91  	successTime := time.Unix(st, 0)
    92  	if successTime.Add(duration).Before(time.Now()) {
    93  		if err = v.FillObject("The metric check has passed successfully.", "message"); err != nil {
    94  			return err
    95  		}
    96  		return v.FillObject(true, "result")
    97  	}
    98  	if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, and the query result is %s, indicating success. The success has persisted for %s, with success duration being %s.", conditionStr, valueStr, time.Since(successTime).String(), duration), "message"); err != nil {
    99  		return err
   100  	}
   101  	return v.FillObject(false, "result")
   102  }
   103  
   104  func handleFailCompare(wfCtx wfContext.Context, stepID string, v *value.Value, conditionStr, valueStr string) error {
   105  	// clean up success timeStamp
   106  	setMetricsStatusTime(wfCtx, stepID, "success", 0)
   107  	ft := getMetricsStatusTime(wfCtx, stepID, "")
   108  	d, err := v.GetString("failDuration")
   109  	if err != nil {
   110  		return err
   111  	}
   112  	failDuration, err := time.ParseDuration(d)
   113  	if err != nil {
   114  		return err
   115  	}
   116  
   117  	if ft == 0 {
   118  		// first failed
   119  		setMetricsStatusTime(wfCtx, stepID, "fail", time.Now().Unix())
   120  		if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure, with the failure duration being %s. This is first failed checking.", conditionStr, valueStr, failDuration), "message"); err != nil {
   121  			return err
   122  		}
   123  		return v.FillObject(false, "result")
   124  	}
   125  
   126  	failTime := time.Unix(ft, 0)
   127  	if failTime.Add(failDuration).Before(time.Now()) {
   128  		if err = v.FillObject(true, "failed"); err != nil {
   129  			return err
   130  		}
   131  		if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure. The failure has persisted for %s, with the failure duration being %s. The check has terminated.", conditionStr, valueStr, time.Since(failTime).String(), failDuration), "message"); err != nil {
   132  			return err
   133  		}
   134  		return v.FillObject(false, "result")
   135  	}
   136  	if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure. The failure has persisted for %s, with the failure duration being %s.", conditionStr, valueStr, time.Since(failTime).String(), failDuration), "message"); err != nil {
   137  		return err
   138  	}
   139  	return v.FillObject(false, "result")
   140  }
   141  
   142  func getQueryResult(ctx monitorContext.Context, v *value.Value) (string, error) {
   143  	addr, err := v.GetString("metricEndpoint")
   144  	if err != nil {
   145  		return "", err
   146  	}
   147  	c, err := api.NewClient(api.Config{
   148  		Address: addr,
   149  	})
   150  	if err != nil {
   151  		return "", err
   152  	}
   153  	promCli := v1.NewAPI(c)
   154  	query, err := v.GetString("query")
   155  	if err != nil {
   156  		return "", err
   157  	}
   158  	resp, _, err := promCli.Query(ctx, query, time.Now())
   159  	if err != nil {
   160  		return "", err
   161  	}
   162  
   163  	var valueStr string
   164  	switch v := resp.(type) {
   165  	case *model.Scalar:
   166  		valueStr = v.Value.String()
   167  	case model.Vector:
   168  		if len(v) != 1 {
   169  			return "", fmt.Errorf(fmt.Sprintf("ehe query is returning %d results when it should only return one. Please review the query to identify and fix the issue", len(v)))
   170  		}
   171  		valueStr = v[0].Value.String()
   172  	default:
   173  		return "", fmt.Errorf("cannot handle the not query value")
   174  	}
   175  	return valueStr, nil
   176  }
   177  
   178  func compareValueWithCondition(valueStr string, conditionStr string, v *value.Value) (bool, error) {
   179  	template := fmt.Sprintf("if: %s %s", valueStr, conditionStr)
   180  	cueValue, err := value.NewValue(template, nil, "")
   181  	if err != nil {
   182  		return false, err
   183  	}
   184  	res, err := cueValue.GetBool("if")
   185  	if err != nil {
   186  		return false, err
   187  	}
   188  	return res, nil
   189  }
   190  
   191  func setMetricsStatusTime(wfCtx wfContext.Context, stepID string, status string, time int64) {
   192  	wfCtx.SetMutableValue(strconv.FormatInt(time, 10), stepID, "metrics", status, "time")
   193  }
   194  
   195  func getMetricsStatusTime(wfCtx wfContext.Context, stepID string, status string) int64 {
   196  	str := wfCtx.GetMutableValue(stepID, "metrics", status, "time")
   197  	if len(str) == 0 {
   198  		return 0
   199  	}
   200  	t, _ := strconv.ParseInt(str, 10, 64)
   201  	return t
   202  }
   203  
   204  // Install register handlers to provider discover.
   205  func Install(p types.Providers) {
   206  	prd := &provider{}
   207  	p.Register(ProviderName, map[string]types.Handler{
   208  		"promCheck": prd.PromCheck,
   209  	})
   210  }