github.com/kubevela/workflow@v0.6.0/pkg/providers/metrics/prom_check.go (about) 1 /* 2 Copyright 2022 The KubeVela Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package metrics 18 19 import ( 20 "fmt" 21 "strconv" 22 "time" 23 24 monitorContext "github.com/kubevela/pkg/monitor/context" 25 wfContext "github.com/kubevela/workflow/pkg/context" 26 "github.com/kubevela/workflow/pkg/cue/model/value" 27 "github.com/kubevela/workflow/pkg/types" 28 "github.com/prometheus/client_golang/api" 29 v1 "github.com/prometheus/client_golang/api/prometheus/v1" 30 "github.com/prometheus/common/model" 31 ) 32 33 const ( 34 // ProviderName is provider name for install. 35 ProviderName = "metrics" 36 ) 37 38 type provider struct{} 39 40 // PromCheck do health check from metrics from prometheus 41 func (h *provider) PromCheck(ctx monitorContext.Context, wfCtx wfContext.Context, v *value.Value, act types.Action) error { 42 stepID, err := v.GetString("stepID") 43 if err != nil { 44 return err 45 } 46 47 valueStr, err := getQueryResult(ctx, v) 48 if err != nil { 49 return err 50 } 51 52 conditionStr, err := v.GetString("condition") 53 if err != nil { 54 return err 55 } 56 57 res, err := compareValueWithCondition(valueStr, conditionStr, v) 58 59 if err != nil { 60 return err 61 } 62 63 if res { 64 // meet the condition 65 return handleSuccessCompare(wfCtx, stepID, v, conditionStr, valueStr) 66 } 67 return handleFailCompare(wfCtx, stepID, v, conditionStr, valueStr) 68 } 69 70 func handleSuccessCompare(wfCtx wfContext.Context, stepID string, v *value.Value, conditionStr, valueStr string) error { 71 // clean up fail timeStamp 72 setMetricsStatusTime(wfCtx, stepID, "fail", 0) 73 d, err := v.GetString("duration") 74 if err != nil { 75 return err 76 } 77 duration, err := time.ParseDuration(d) 78 if err != nil { 79 return err 80 } 81 82 st := getMetricsStatusTime(wfCtx, stepID, "success") 83 if st == 0 { 84 // first success 85 if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, and the query result is %s, indicating success.", conditionStr, valueStr), "message"); err != nil { 86 return err 87 } 88 setMetricsStatusTime(wfCtx, stepID, "success", time.Now().Unix()) 89 return v.FillObject(false, "result") 90 } 91 successTime := time.Unix(st, 0) 92 if successTime.Add(duration).Before(time.Now()) { 93 if err = v.FillObject("The metric check has passed successfully.", "message"); err != nil { 94 return err 95 } 96 return v.FillObject(true, "result") 97 } 98 if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, and the query result is %s, indicating success. The success has persisted for %s, with success duration being %s.", conditionStr, valueStr, time.Since(successTime).String(), duration), "message"); err != nil { 99 return err 100 } 101 return v.FillObject(false, "result") 102 } 103 104 func handleFailCompare(wfCtx wfContext.Context, stepID string, v *value.Value, conditionStr, valueStr string) error { 105 // clean up success timeStamp 106 setMetricsStatusTime(wfCtx, stepID, "success", 0) 107 ft := getMetricsStatusTime(wfCtx, stepID, "") 108 d, err := v.GetString("failDuration") 109 if err != nil { 110 return err 111 } 112 failDuration, err := time.ParseDuration(d) 113 if err != nil { 114 return err 115 } 116 117 if ft == 0 { 118 // first failed 119 setMetricsStatusTime(wfCtx, stepID, "fail", time.Now().Unix()) 120 if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure, with the failure duration being %s. This is first failed checking.", conditionStr, valueStr, failDuration), "message"); err != nil { 121 return err 122 } 123 return v.FillObject(false, "result") 124 } 125 126 failTime := time.Unix(ft, 0) 127 if failTime.Add(failDuration).Before(time.Now()) { 128 if err = v.FillObject(true, "failed"); err != nil { 129 return err 130 } 131 if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure. The failure has persisted for %s, with the failure duration being %s. The check has terminated.", conditionStr, valueStr, time.Since(failTime).String(), failDuration), "message"); err != nil { 132 return err 133 } 134 return v.FillObject(false, "result") 135 } 136 if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure. The failure has persisted for %s, with the failure duration being %s.", conditionStr, valueStr, time.Since(failTime).String(), failDuration), "message"); err != nil { 137 return err 138 } 139 return v.FillObject(false, "result") 140 } 141 142 func getQueryResult(ctx monitorContext.Context, v *value.Value) (string, error) { 143 addr, err := v.GetString("metricEndpoint") 144 if err != nil { 145 return "", err 146 } 147 c, err := api.NewClient(api.Config{ 148 Address: addr, 149 }) 150 if err != nil { 151 return "", err 152 } 153 promCli := v1.NewAPI(c) 154 query, err := v.GetString("query") 155 if err != nil { 156 return "", err 157 } 158 resp, _, err := promCli.Query(ctx, query, time.Now()) 159 if err != nil { 160 return "", err 161 } 162 163 var valueStr string 164 switch v := resp.(type) { 165 case *model.Scalar: 166 valueStr = v.Value.String() 167 case model.Vector: 168 if len(v) != 1 { 169 return "", fmt.Errorf(fmt.Sprintf("ehe query is returning %d results when it should only return one. Please review the query to identify and fix the issue", len(v))) 170 } 171 valueStr = v[0].Value.String() 172 default: 173 return "", fmt.Errorf("cannot handle the not query value") 174 } 175 return valueStr, nil 176 } 177 178 func compareValueWithCondition(valueStr string, conditionStr string, v *value.Value) (bool, error) { 179 template := fmt.Sprintf("if: %s %s", valueStr, conditionStr) 180 cueValue, err := value.NewValue(template, nil, "") 181 if err != nil { 182 return false, err 183 } 184 res, err := cueValue.GetBool("if") 185 if err != nil { 186 return false, err 187 } 188 return res, nil 189 } 190 191 func setMetricsStatusTime(wfCtx wfContext.Context, stepID string, status string, time int64) { 192 wfCtx.SetMutableValue(strconv.FormatInt(time, 10), stepID, "metrics", status, "time") 193 } 194 195 func getMetricsStatusTime(wfCtx wfContext.Context, stepID string, status string) int64 { 196 str := wfCtx.GetMutableValue(stepID, "metrics", status, "time") 197 if len(str) == 0 { 198 return 0 199 } 200 t, _ := strconv.ParseInt(str, 10, 64) 201 return t 202 } 203 204 // Install register handlers to provider discover. 205 func Install(p types.Providers) { 206 prd := &provider{} 207 p.Register(ProviderName, map[string]types.Handler{ 208 "promCheck": prd.PromCheck, 209 }) 210 }