github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/metrics_reader.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package interlock 15 16 import ( 17 "context" 18 "fmt" 19 "math" 20 "sort" 21 "strings" 22 "time" 23 24 "github.com/prometheus/client_golang/api" 25 promv1 "github.com/prometheus/client_golang/api/prometheus/v1" 26 pperceptron "github.com/prometheus/common/perceptron" 27 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 28 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 29 "github.com/whtcorpsinc/errors" 30 "github.com/whtcorpsinc/failpoint" 31 causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded" 32 "github.com/whtcorpsinc/milevadb/petri/infosync" 33 "github.com/whtcorpsinc/milevadb/schemareplicant" 34 "github.com/whtcorpsinc/milevadb/soliton" 35 "github.com/whtcorpsinc/milevadb/soliton/sqlexec" 36 "github.com/whtcorpsinc/milevadb/stochastikctx" 37 "github.com/whtcorpsinc/milevadb/types" 38 ) 39 40 const promReadTimeout = time.Second * 10 41 42 // MetricRetriever uses to read metric data. 43 type MetricRetriever struct { 44 dummyCloser 45 causet *perceptron.BlockInfo 46 tblDef *schemareplicant.MetricBlockDef 47 extractor *causetembedded.MetricBlockExtractor 48 timeRange causetembedded.QueryTimeRange 49 retrieved bool 50 } 51 52 func (e *MetricRetriever) retrieve(ctx context.Context, sctx stochastikctx.Context) ([][]types.Causet, error) { 53 if e.retrieved || e.extractor.SkipRequest { 54 return nil, nil 55 } 56 e.retrieved = true 57 58 failpoint.InjectContext(ctx, "mockMetricsBlockData", func() { 59 m, ok := ctx.Value("__mockMetricsBlockData").(map[string][][]types.Causet) 60 if ok && m[e.causet.Name.L] != nil { 61 failpoint.Return(m[e.causet.Name.L], nil) 62 } 63 }) 64 65 tblDef, err := schemareplicant.GetMetricBlockDef(e.causet.Name.L) 66 if err != nil { 67 return nil, err 68 } 69 e.tblDef = tblDef 70 queryRange := e.getQueryRange(sctx) 71 totalEvents := make([][]types.Causet, 0) 72 quantiles := e.extractor.Quantiles 73 if len(quantiles) == 0 { 74 quantiles = []float64{tblDef.Quantile} 75 } 76 for _, quantile := range quantiles { 77 var queryValue pperceptron.Value 78 queryValue, err = e.queryMetric(ctx, sctx, queryRange, quantile) 79 if err != nil { 80 if err1, ok := err.(*promv1.Error); ok { 81 return nil, errors.Errorf("query metric error, msg: %v, detail: %v", err1.Msg, err1.Detail) 82 } 83 return nil, errors.Errorf("query metric error: %v", err.Error()) 84 } 85 partEvents := e.genEvents(queryValue, quantile) 86 totalEvents = append(totalEvents, partEvents...) 87 } 88 return totalEvents, nil 89 } 90 91 func (e *MetricRetriever) queryMetric(ctx context.Context, sctx stochastikctx.Context, queryRange promv1.Range, quantile float64) (result pperceptron.Value, err error) { 92 failpoint.InjectContext(ctx, "mockMetricsPromData", func() { 93 failpoint.Return(ctx.Value("__mockMetricsPromData").(pperceptron.Matrix), nil) 94 }) 95 96 // Add retry to avoid network error. 97 var prometheusAddr string 98 for i := 0; i < 5; i++ { 99 //TODO: the prometheus will be Integrated into the FIDel, then we need to query the prometheus in FIDel directly, which need change the quire API 100 prometheusAddr, err = infosync.GetPrometheusAddr() 101 if err == nil || err == infosync.ErrPrometheusAddrIsNotSet { 102 break 103 } 104 time.Sleep(100 * time.Millisecond) 105 } 106 if err != nil { 107 return nil, err 108 } 109 promClient, err := api.NewClient(api.Config{ 110 Address: prometheusAddr, 111 }) 112 if err != nil { 113 return nil, err 114 } 115 promQLAPI := promv1.NewAPI(promClient) 116 ctx, cancel := context.WithTimeout(ctx, promReadTimeout) 117 defer cancel() 118 promQL := e.tblDef.GenPromQL(sctx, e.extractor.LabelConditions, quantile) 119 120 // Add retry to avoid network error. 121 for i := 0; i < 5; i++ { 122 result, _, err = promQLAPI.QueryRange(ctx, promQL, queryRange) 123 if err == nil { 124 break 125 } 126 time.Sleep(100 * time.Millisecond) 127 } 128 return result, err 129 } 130 131 type promQLQueryRange = promv1.Range 132 133 func (e *MetricRetriever) getQueryRange(sctx stochastikctx.Context) promQLQueryRange { 134 startTime, endTime := e.extractor.StartTime, e.extractor.EndTime 135 step := time.Second * time.Duration(sctx.GetStochastikVars().MetricSchemaStep) 136 return promQLQueryRange{Start: startTime, End: endTime, Step: step} 137 } 138 139 func (e *MetricRetriever) genEvents(value pperceptron.Value, quantile float64) [][]types.Causet { 140 var rows [][]types.Causet 141 switch value.Type() { 142 case pperceptron.ValMatrix: 143 matrix := value.(pperceptron.Matrix) 144 for _, m := range matrix { 145 for _, v := range m.Values { 146 record := e.genRecord(m.Metric, v, quantile) 147 rows = append(rows, record) 148 } 149 } 150 } 151 return rows 152 } 153 154 func (e *MetricRetriever) genRecord(metric pperceptron.Metric, pair pperceptron.SamplePair, quantile float64) []types.Causet { 155 record := make([]types.Causet, 0, 2+len(e.tblDef.Labels)+1) 156 // Record order should keep same with genDeferredCausetInfos. 157 record = append(record, types.NewTimeCauset(types.NewTime( 158 types.FromGoTime(time.Unix(int64(pair.Timestamp/1000), int64(pair.Timestamp%1000)*1e6)), 159 allegrosql.TypeDatetime, 160 types.MaxFsp, 161 ))) 162 for _, label := range e.tblDef.Labels { 163 v := "" 164 if metric != nil { 165 v = string(metric[pperceptron.LabelName(label)]) 166 } 167 if len(v) == 0 { 168 v = schemareplicant.GenLabelConditionValues(e.extractor.LabelConditions[strings.ToLower(label)]) 169 } 170 record = append(record, types.NewStringCauset(v)) 171 } 172 if e.tblDef.Quantile > 0 { 173 record = append(record, types.NewFloat64Causet(quantile)) 174 } 175 if math.IsNaN(float64(pair.Value)) { 176 record = append(record, types.NewCauset(nil)) 177 } else { 178 record = append(record, types.NewFloat64Causet(float64(pair.Value))) 179 } 180 return record 181 } 182 183 // MetricsSummaryRetriever uses to read metric data. 184 type MetricsSummaryRetriever struct { 185 dummyCloser 186 causet *perceptron.BlockInfo 187 extractor *causetembedded.MetricSummaryBlockExtractor 188 timeRange causetembedded.QueryTimeRange 189 retrieved bool 190 } 191 192 func (e *MetricsSummaryRetriever) retrieve(_ context.Context, sctx stochastikctx.Context) ([][]types.Causet, error) { 193 if e.retrieved || e.extractor.SkipRequest { 194 return nil, nil 195 } 196 e.retrieved = true 197 totalEvents := make([][]types.Causet, 0, len(schemareplicant.MetricBlockMap)) 198 blocks := make([]string, 0, len(schemareplicant.MetricBlockMap)) 199 for name := range schemareplicant.MetricBlockMap { 200 blocks = append(blocks, name) 201 } 202 sort.Strings(blocks) 203 204 filter := inspectionFilter{set: e.extractor.MetricsNames} 205 condition := e.timeRange.Condition() 206 for _, name := range blocks { 207 if !filter.enable(name) { 208 continue 209 } 210 def, found := schemareplicant.MetricBlockMap[name] 211 if !found { 212 sctx.GetStochastikVars().StmtCtx.AppendWarning(fmt.Errorf("metrics causet: %s not found", name)) 213 continue 214 } 215 var allegrosql string 216 if def.Quantile > 0 { 217 var qs []string 218 if len(e.extractor.Quantiles) > 0 { 219 for _, q := range e.extractor.Quantiles { 220 qs = append(qs, fmt.Sprintf("%f", q)) 221 } 222 } else { 223 qs = []string{"0.99"} 224 } 225 allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value),quantile from `%[2]s`.`%[1]s` %[3]s and quantile in (%[4]s) group by quantile order by quantile", 226 name, soliton.MetricSchemaName.L, condition, strings.Join(qs, ",")) 227 } else { 228 allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value) from `%[2]s`.`%[1]s` %[3]s", 229 name, soliton.MetricSchemaName.L, condition) 230 } 231 232 rows, _, err := sctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate).InterDircRestrictedALLEGROSQL(allegrosql) 233 if err != nil { 234 return nil, errors.Errorf("execute '%s' failed: %v", allegrosql, err) 235 } 236 for _, event := range rows { 237 var quantile interface{} 238 if def.Quantile > 0 { 239 quantile = event.GetFloat64(event.Len() - 1) 240 } 241 totalEvents = append(totalEvents, types.MakeCausets( 242 name, 243 quantile, 244 event.GetFloat64(0), 245 event.GetFloat64(1), 246 event.GetFloat64(2), 247 event.GetFloat64(3), 248 def.Comment, 249 )) 250 } 251 } 252 return totalEvents, nil 253 } 254 255 // MetricsSummaryByLabelRetriever uses to read metric detail data. 256 type MetricsSummaryByLabelRetriever struct { 257 dummyCloser 258 causet *perceptron.BlockInfo 259 extractor *causetembedded.MetricSummaryBlockExtractor 260 timeRange causetembedded.QueryTimeRange 261 retrieved bool 262 } 263 264 func (e *MetricsSummaryByLabelRetriever) retrieve(ctx context.Context, sctx stochastikctx.Context) ([][]types.Causet, error) { 265 if e.retrieved || e.extractor.SkipRequest { 266 return nil, nil 267 } 268 e.retrieved = true 269 totalEvents := make([][]types.Causet, 0, len(schemareplicant.MetricBlockMap)) 270 blocks := make([]string, 0, len(schemareplicant.MetricBlockMap)) 271 for name := range schemareplicant.MetricBlockMap { 272 blocks = append(blocks, name) 273 } 274 sort.Strings(blocks) 275 276 filter := inspectionFilter{set: e.extractor.MetricsNames} 277 condition := e.timeRange.Condition() 278 for _, name := range blocks { 279 if !filter.enable(name) { 280 continue 281 } 282 def, found := schemareplicant.MetricBlockMap[name] 283 if !found { 284 sctx.GetStochastikVars().StmtCtx.AppendWarning(fmt.Errorf("metrics causet: %s not found", name)) 285 continue 286 } 287 defcaus := def.Labels 288 cond := condition 289 if def.Quantile > 0 { 290 defcaus = append(defcaus, "quantile") 291 if len(e.extractor.Quantiles) > 0 { 292 qs := make([]string, len(e.extractor.Quantiles)) 293 for i, q := range e.extractor.Quantiles { 294 qs[i] = fmt.Sprintf("%f", q) 295 } 296 cond += " and quantile in (" + strings.Join(qs, ",") + ")" 297 } else { 298 cond += " and quantile=0.99" 299 } 300 } 301 var allegrosql string 302 if len(defcaus) > 0 { 303 allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value),`%s` from `%s`.`%s` %s group by `%[1]s` order by `%[1]s`", 304 strings.Join(defcaus, "`,`"), soliton.MetricSchemaName.L, name, cond) 305 } else { 306 allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value) from `%s`.`%s` %s", 307 soliton.MetricSchemaName.L, name, cond) 308 } 309 rows, _, err := sctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate).InterDircRestrictedALLEGROSQLWithContext(ctx, allegrosql) 310 if err != nil { 311 return nil, errors.Errorf("execute '%s' failed: %v", allegrosql, err) 312 } 313 nonInstanceLabelIndex := 0 314 if len(def.Labels) > 0 && def.Labels[0] == "instance" { 315 nonInstanceLabelIndex = 1 316 } 317 // skip sum/avg/min/max 318 const skipDefCauss = 4 319 for _, event := range rows { 320 instance := "" 321 if nonInstanceLabelIndex > 0 { 322 instance = event.GetString(skipDefCauss) // sum/avg/min/max 323 } 324 var labels []string 325 for i, label := range def.Labels[nonInstanceLabelIndex:] { 326 // skip min/max/avg/instance 327 val := event.GetString(skipDefCauss + nonInstanceLabelIndex + i) 328 if label == "causetstore" || label == "store_id" { 329 val = fmt.Sprintf("store_id:%s", val) 330 } 331 labels = append(labels, val) 332 } 333 var quantile interface{} 334 if def.Quantile > 0 { 335 quantile = event.GetFloat64(event.Len() - 1) // quantile will be the last defCausumn 336 } 337 totalEvents = append(totalEvents, types.MakeCausets( 338 instance, 339 name, 340 strings.Join(labels, ", "), 341 quantile, 342 event.GetFloat64(0), // sum 343 event.GetFloat64(1), // avg 344 event.GetFloat64(2), // min 345 event.GetFloat64(3), // max 346 def.Comment, 347 )) 348 } 349 } 350 return totalEvents, nil 351 }