github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/metrics_reader.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"math"
    20  	"sort"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/prometheus/client_golang/api"
    25  	promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
    26  	pperceptron "github.com/prometheus/common/perceptron"
    27  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    28  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    29  	"github.com/whtcorpsinc/errors"
    30  	"github.com/whtcorpsinc/failpoint"
    31  	causetembedded "github.com/whtcorpsinc/milevadb/causet/embedded"
    32  	"github.com/whtcorpsinc/milevadb/petri/infosync"
    33  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    34  	"github.com/whtcorpsinc/milevadb/soliton"
    35  	"github.com/whtcorpsinc/milevadb/soliton/sqlexec"
    36  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    37  	"github.com/whtcorpsinc/milevadb/types"
    38  )
    39  
    40  const promReadTimeout = time.Second * 10
    41  
    42  // MetricRetriever uses to read metric data.
    43  type MetricRetriever struct {
    44  	dummyCloser
    45  	causet    *perceptron.BlockInfo
    46  	tblDef    *schemareplicant.MetricBlockDef
    47  	extractor *causetembedded.MetricBlockExtractor
    48  	timeRange causetembedded.QueryTimeRange
    49  	retrieved bool
    50  }
    51  
    52  func (e *MetricRetriever) retrieve(ctx context.Context, sctx stochastikctx.Context) ([][]types.Causet, error) {
    53  	if e.retrieved || e.extractor.SkipRequest {
    54  		return nil, nil
    55  	}
    56  	e.retrieved = true
    57  
    58  	failpoint.InjectContext(ctx, "mockMetricsBlockData", func() {
    59  		m, ok := ctx.Value("__mockMetricsBlockData").(map[string][][]types.Causet)
    60  		if ok && m[e.causet.Name.L] != nil {
    61  			failpoint.Return(m[e.causet.Name.L], nil)
    62  		}
    63  	})
    64  
    65  	tblDef, err := schemareplicant.GetMetricBlockDef(e.causet.Name.L)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	e.tblDef = tblDef
    70  	queryRange := e.getQueryRange(sctx)
    71  	totalEvents := make([][]types.Causet, 0)
    72  	quantiles := e.extractor.Quantiles
    73  	if len(quantiles) == 0 {
    74  		quantiles = []float64{tblDef.Quantile}
    75  	}
    76  	for _, quantile := range quantiles {
    77  		var queryValue pperceptron.Value
    78  		queryValue, err = e.queryMetric(ctx, sctx, queryRange, quantile)
    79  		if err != nil {
    80  			if err1, ok := err.(*promv1.Error); ok {
    81  				return nil, errors.Errorf("query metric error, msg: %v, detail: %v", err1.Msg, err1.Detail)
    82  			}
    83  			return nil, errors.Errorf("query metric error: %v", err.Error())
    84  		}
    85  		partEvents := e.genEvents(queryValue, quantile)
    86  		totalEvents = append(totalEvents, partEvents...)
    87  	}
    88  	return totalEvents, nil
    89  }
    90  
    91  func (e *MetricRetriever) queryMetric(ctx context.Context, sctx stochastikctx.Context, queryRange promv1.Range, quantile float64) (result pperceptron.Value, err error) {
    92  	failpoint.InjectContext(ctx, "mockMetricsPromData", func() {
    93  		failpoint.Return(ctx.Value("__mockMetricsPromData").(pperceptron.Matrix), nil)
    94  	})
    95  
    96  	// Add retry to avoid network error.
    97  	var prometheusAddr string
    98  	for i := 0; i < 5; i++ {
    99  		//TODO: the prometheus will be Integrated into the FIDel, then we need to query the prometheus in FIDel directly, which need change the quire API
   100  		prometheusAddr, err = infosync.GetPrometheusAddr()
   101  		if err == nil || err == infosync.ErrPrometheusAddrIsNotSet {
   102  			break
   103  		}
   104  		time.Sleep(100 * time.Millisecond)
   105  	}
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  	promClient, err := api.NewClient(api.Config{
   110  		Address: prometheusAddr,
   111  	})
   112  	if err != nil {
   113  		return nil, err
   114  	}
   115  	promQLAPI := promv1.NewAPI(promClient)
   116  	ctx, cancel := context.WithTimeout(ctx, promReadTimeout)
   117  	defer cancel()
   118  	promQL := e.tblDef.GenPromQL(sctx, e.extractor.LabelConditions, quantile)
   119  
   120  	// Add retry to avoid network error.
   121  	for i := 0; i < 5; i++ {
   122  		result, _, err = promQLAPI.QueryRange(ctx, promQL, queryRange)
   123  		if err == nil {
   124  			break
   125  		}
   126  		time.Sleep(100 * time.Millisecond)
   127  	}
   128  	return result, err
   129  }
   130  
   131  type promQLQueryRange = promv1.Range
   132  
   133  func (e *MetricRetriever) getQueryRange(sctx stochastikctx.Context) promQLQueryRange {
   134  	startTime, endTime := e.extractor.StartTime, e.extractor.EndTime
   135  	step := time.Second * time.Duration(sctx.GetStochastikVars().MetricSchemaStep)
   136  	return promQLQueryRange{Start: startTime, End: endTime, Step: step}
   137  }
   138  
   139  func (e *MetricRetriever) genEvents(value pperceptron.Value, quantile float64) [][]types.Causet {
   140  	var rows [][]types.Causet
   141  	switch value.Type() {
   142  	case pperceptron.ValMatrix:
   143  		matrix := value.(pperceptron.Matrix)
   144  		for _, m := range matrix {
   145  			for _, v := range m.Values {
   146  				record := e.genRecord(m.Metric, v, quantile)
   147  				rows = append(rows, record)
   148  			}
   149  		}
   150  	}
   151  	return rows
   152  }
   153  
   154  func (e *MetricRetriever) genRecord(metric pperceptron.Metric, pair pperceptron.SamplePair, quantile float64) []types.Causet {
   155  	record := make([]types.Causet, 0, 2+len(e.tblDef.Labels)+1)
   156  	// Record order should keep same with genDeferredCausetInfos.
   157  	record = append(record, types.NewTimeCauset(types.NewTime(
   158  		types.FromGoTime(time.Unix(int64(pair.Timestamp/1000), int64(pair.Timestamp%1000)*1e6)),
   159  		allegrosql.TypeDatetime,
   160  		types.MaxFsp,
   161  	)))
   162  	for _, label := range e.tblDef.Labels {
   163  		v := ""
   164  		if metric != nil {
   165  			v = string(metric[pperceptron.LabelName(label)])
   166  		}
   167  		if len(v) == 0 {
   168  			v = schemareplicant.GenLabelConditionValues(e.extractor.LabelConditions[strings.ToLower(label)])
   169  		}
   170  		record = append(record, types.NewStringCauset(v))
   171  	}
   172  	if e.tblDef.Quantile > 0 {
   173  		record = append(record, types.NewFloat64Causet(quantile))
   174  	}
   175  	if math.IsNaN(float64(pair.Value)) {
   176  		record = append(record, types.NewCauset(nil))
   177  	} else {
   178  		record = append(record, types.NewFloat64Causet(float64(pair.Value)))
   179  	}
   180  	return record
   181  }
   182  
   183  // MetricsSummaryRetriever uses to read metric data.
   184  type MetricsSummaryRetriever struct {
   185  	dummyCloser
   186  	causet    *perceptron.BlockInfo
   187  	extractor *causetembedded.MetricSummaryBlockExtractor
   188  	timeRange causetembedded.QueryTimeRange
   189  	retrieved bool
   190  }
   191  
   192  func (e *MetricsSummaryRetriever) retrieve(_ context.Context, sctx stochastikctx.Context) ([][]types.Causet, error) {
   193  	if e.retrieved || e.extractor.SkipRequest {
   194  		return nil, nil
   195  	}
   196  	e.retrieved = true
   197  	totalEvents := make([][]types.Causet, 0, len(schemareplicant.MetricBlockMap))
   198  	blocks := make([]string, 0, len(schemareplicant.MetricBlockMap))
   199  	for name := range schemareplicant.MetricBlockMap {
   200  		blocks = append(blocks, name)
   201  	}
   202  	sort.Strings(blocks)
   203  
   204  	filter := inspectionFilter{set: e.extractor.MetricsNames}
   205  	condition := e.timeRange.Condition()
   206  	for _, name := range blocks {
   207  		if !filter.enable(name) {
   208  			continue
   209  		}
   210  		def, found := schemareplicant.MetricBlockMap[name]
   211  		if !found {
   212  			sctx.GetStochastikVars().StmtCtx.AppendWarning(fmt.Errorf("metrics causet: %s not found", name))
   213  			continue
   214  		}
   215  		var allegrosql string
   216  		if def.Quantile > 0 {
   217  			var qs []string
   218  			if len(e.extractor.Quantiles) > 0 {
   219  				for _, q := range e.extractor.Quantiles {
   220  					qs = append(qs, fmt.Sprintf("%f", q))
   221  				}
   222  			} else {
   223  				qs = []string{"0.99"}
   224  			}
   225  			allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value),quantile from `%[2]s`.`%[1]s` %[3]s and quantile in (%[4]s) group by quantile order by quantile",
   226  				name, soliton.MetricSchemaName.L, condition, strings.Join(qs, ","))
   227  		} else {
   228  			allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value) from `%[2]s`.`%[1]s` %[3]s",
   229  				name, soliton.MetricSchemaName.L, condition)
   230  		}
   231  
   232  		rows, _, err := sctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate).InterDircRestrictedALLEGROSQL(allegrosql)
   233  		if err != nil {
   234  			return nil, errors.Errorf("execute '%s' failed: %v", allegrosql, err)
   235  		}
   236  		for _, event := range rows {
   237  			var quantile interface{}
   238  			if def.Quantile > 0 {
   239  				quantile = event.GetFloat64(event.Len() - 1)
   240  			}
   241  			totalEvents = append(totalEvents, types.MakeCausets(
   242  				name,
   243  				quantile,
   244  				event.GetFloat64(0),
   245  				event.GetFloat64(1),
   246  				event.GetFloat64(2),
   247  				event.GetFloat64(3),
   248  				def.Comment,
   249  			))
   250  		}
   251  	}
   252  	return totalEvents, nil
   253  }
   254  
   255  // MetricsSummaryByLabelRetriever uses to read metric detail data.
   256  type MetricsSummaryByLabelRetriever struct {
   257  	dummyCloser
   258  	causet    *perceptron.BlockInfo
   259  	extractor *causetembedded.MetricSummaryBlockExtractor
   260  	timeRange causetembedded.QueryTimeRange
   261  	retrieved bool
   262  }
   263  
   264  func (e *MetricsSummaryByLabelRetriever) retrieve(ctx context.Context, sctx stochastikctx.Context) ([][]types.Causet, error) {
   265  	if e.retrieved || e.extractor.SkipRequest {
   266  		return nil, nil
   267  	}
   268  	e.retrieved = true
   269  	totalEvents := make([][]types.Causet, 0, len(schemareplicant.MetricBlockMap))
   270  	blocks := make([]string, 0, len(schemareplicant.MetricBlockMap))
   271  	for name := range schemareplicant.MetricBlockMap {
   272  		blocks = append(blocks, name)
   273  	}
   274  	sort.Strings(blocks)
   275  
   276  	filter := inspectionFilter{set: e.extractor.MetricsNames}
   277  	condition := e.timeRange.Condition()
   278  	for _, name := range blocks {
   279  		if !filter.enable(name) {
   280  			continue
   281  		}
   282  		def, found := schemareplicant.MetricBlockMap[name]
   283  		if !found {
   284  			sctx.GetStochastikVars().StmtCtx.AppendWarning(fmt.Errorf("metrics causet: %s not found", name))
   285  			continue
   286  		}
   287  		defcaus := def.Labels
   288  		cond := condition
   289  		if def.Quantile > 0 {
   290  			defcaus = append(defcaus, "quantile")
   291  			if len(e.extractor.Quantiles) > 0 {
   292  				qs := make([]string, len(e.extractor.Quantiles))
   293  				for i, q := range e.extractor.Quantiles {
   294  					qs[i] = fmt.Sprintf("%f", q)
   295  				}
   296  				cond += " and quantile in (" + strings.Join(qs, ",") + ")"
   297  			} else {
   298  				cond += " and quantile=0.99"
   299  			}
   300  		}
   301  		var allegrosql string
   302  		if len(defcaus) > 0 {
   303  			allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value),`%s` from `%s`.`%s` %s group by `%[1]s` order by `%[1]s`",
   304  				strings.Join(defcaus, "`,`"), soliton.MetricSchemaName.L, name, cond)
   305  		} else {
   306  			allegrosql = fmt.Sprintf("select sum(value),avg(value),min(value),max(value) from `%s`.`%s` %s",
   307  				soliton.MetricSchemaName.L, name, cond)
   308  		}
   309  		rows, _, err := sctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate).InterDircRestrictedALLEGROSQLWithContext(ctx, allegrosql)
   310  		if err != nil {
   311  			return nil, errors.Errorf("execute '%s' failed: %v", allegrosql, err)
   312  		}
   313  		nonInstanceLabelIndex := 0
   314  		if len(def.Labels) > 0 && def.Labels[0] == "instance" {
   315  			nonInstanceLabelIndex = 1
   316  		}
   317  		// skip sum/avg/min/max
   318  		const skipDefCauss = 4
   319  		for _, event := range rows {
   320  			instance := ""
   321  			if nonInstanceLabelIndex > 0 {
   322  				instance = event.GetString(skipDefCauss) // sum/avg/min/max
   323  			}
   324  			var labels []string
   325  			for i, label := range def.Labels[nonInstanceLabelIndex:] {
   326  				// skip min/max/avg/instance
   327  				val := event.GetString(skipDefCauss + nonInstanceLabelIndex + i)
   328  				if label == "causetstore" || label == "store_id" {
   329  					val = fmt.Sprintf("store_id:%s", val)
   330  				}
   331  				labels = append(labels, val)
   332  			}
   333  			var quantile interface{}
   334  			if def.Quantile > 0 {
   335  				quantile = event.GetFloat64(event.Len() - 1) // quantile will be the last defCausumn
   336  			}
   337  			totalEvents = append(totalEvents, types.MakeCausets(
   338  				instance,
   339  				name,
   340  				strings.Join(labels, ", "),
   341  				quantile,
   342  				event.GetFloat64(0), // sum
   343  				event.GetFloat64(1), // avg
   344  				event.GetFloat64(2), // min
   345  				event.GetFloat64(3), // max
   346  				def.Comment,
   347  			))
   348  		}
   349  	}
   350  	return totalEvents, nil
   351  }