github.com/matrixorigin/matrixone@v1.2.0/pkg/util/metric/mometric/metric_collector.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package mometric
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"math"
    22  	"runtime"
    23  	"time"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/logutil"
    27  	pb "github.com/matrixorigin/matrixone/pkg/pb/metric"
    28  	bp "github.com/matrixorigin/matrixone/pkg/util/batchpipe"
    29  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    30  	ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor"
    31  )
    32  
    33  const CHAN_CAPACITY = 10000
    34  
    35  type MetricCollector interface {
    36  	SendMetrics(context.Context, []*pb.MetricFamily) error
    37  	Start(context.Context) bool
    38  	Stop(graceful bool) (<-chan struct{}, bool)
    39  }
    40  
    41  type collectorOpts struct {
    42  	// if a MetricFamily has `metricThreshold` Metrics or more
    43  	// it deserves a flush operation
    44  	metricThreshold int
    45  	// if a RawHist MetricFamily has `sampleThreshold` Samples or more
    46  	// it deserves a flush operation
    47  	sampleThreshold int
    48  	// if we can't flush a MetricFamily for the reason of `metricThreshold` or `sampleThreshold`
    49  	// after `flushInterval`, we will flush it anyway
    50  	flushInterval time.Duration
    51  	// the number of goroutines to execute insert into sql, default is runtime.NumCPU()
    52  	sqlWorkerNum int
    53  }
    54  
    55  func defaultCollectorOpts() collectorOpts {
    56  	var defaultSqlWorkerNum = int(math.Ceil(float64(runtime.NumCPU()) * 0.1))
    57  	return collectorOpts{
    58  		metricThreshold: 1000,
    59  		sampleThreshold: 4096,
    60  		flushInterval:   15 * time.Second,
    61  		sqlWorkerNum:    defaultSqlWorkerNum,
    62  	}
    63  }
    64  
    65  type collectorOpt interface {
    66  	ApplyTo(*collectorOpts)
    67  }
    68  
    69  type WithMetricThreshold int
    70  
    71  func (x WithMetricThreshold) ApplyTo(o *collectorOpts) {
    72  	o.metricThreshold = int(x)
    73  }
    74  
    75  type WithSampleThreshold int
    76  
    77  func (x WithSampleThreshold) ApplyTo(o *collectorOpts) {
    78  	o.sampleThreshold = int(x)
    79  }
    80  
    81  type WithSqlWorkerNum int
    82  
    83  func (x WithSqlWorkerNum) ApplyTo(o *collectorOpts) {
    84  	o.sqlWorkerNum = int(x)
    85  }
    86  
    87  type WithFlushInterval time.Duration
    88  
    89  func (x WithFlushInterval) ApplyTo(o *collectorOpts) {
    90  	o.flushInterval = time.Duration(x)
    91  }
    92  
    93  var _ MetricCollector = (*metricCollector)(nil)
    94  
    95  type metricCollector struct {
    96  	*bp.BaseBatchPipe[*pb.MetricFamily, string]
    97  	ieFactory func() ie.InternalExecutor
    98  	opts      collectorOpts
    99  }
   100  
   101  func newMetricCollector(factory func() ie.InternalExecutor, opts ...collectorOpt) MetricCollector {
   102  	initOpts := defaultCollectorOpts()
   103  	for _, o := range opts {
   104  		o.ApplyTo(&initOpts)
   105  	}
   106  	c := &metricCollector{
   107  		ieFactory: factory,
   108  		opts:      initOpts,
   109  	}
   110  	base := bp.NewBaseBatchPipe[*pb.MetricFamily, string](c, bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum))
   111  	c.BaseBatchPipe = base
   112  	return c
   113  }
   114  
   115  func (c *metricCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error {
   116  	for _, mf := range mfs {
   117  		if err := c.SendItem(ctx, mf); err != nil {
   118  			return err
   119  		}
   120  	}
   121  	return nil
   122  }
   123  
   124  func (c *metricCollector) NewItemBatchHandler(ctx context.Context) func(batch string) {
   125  	exec := c.ieFactory()
   126  	exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish())
   127  	return func(batch string) {
   128  		if err := exec.Exec(ctx, batch, ie.NewOptsBuilder().Finish()); err != nil {
   129  			logutil.Errorf("[Metric] insert error. sql: %s; err: %v", batch, err)
   130  		}
   131  	}
   132  }
   133  
   134  func (c *metricCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, string] {
   135  	return &mfset{
   136  		Reminder:        bp.NewConstantClock(c.opts.flushInterval),
   137  		metricThreshold: c.opts.metricThreshold,
   138  		sampleThreshold: c.opts.sampleThreshold,
   139  	}
   140  }
   141  
   142  type mfset struct {
   143  	bp.Reminder
   144  	mfs             []*pb.MetricFamily
   145  	typ             pb.MetricType
   146  	rows            int // how many buffered rows
   147  	metricThreshold int // haw many rows should be flushed as a batch
   148  	sampleThreshold int // treat rawhist samples differently because it has higher generate rate
   149  }
   150  
   151  func (s *mfset) Add(mf *pb.MetricFamily) {
   152  	if s.typ == mf.GetType() {
   153  		s.typ = mf.GetType()
   154  	}
   155  	switch s.typ {
   156  	case pb.MetricType_COUNTER, pb.MetricType_GAUGE:
   157  		s.rows += len(mf.Metric)
   158  	case pb.MetricType_RAWHIST:
   159  		for _, m := range mf.Metric {
   160  			s.rows += len(m.RawHist.Samples)
   161  		}
   162  	}
   163  	s.mfs = append(s.mfs, mf)
   164  }
   165  
   166  func (s *mfset) ShouldFlush() bool {
   167  	switch s.typ {
   168  	case pb.MetricType_COUNTER, pb.MetricType_GAUGE:
   169  		return s.rows > s.metricThreshold
   170  	case pb.MetricType_RAWHIST:
   171  		return s.rows > s.sampleThreshold
   172  	default:
   173  		return false
   174  	}
   175  }
   176  
   177  func (s *mfset) Reset() {
   178  	s.mfs = s.mfs[:0]
   179  	s.typ = pb.MetricType_COUNTER // 0
   180  	s.rows = 0
   181  	s.RemindReset()
   182  }
   183  
   184  func (s *mfset) IsEmpty() bool {
   185  	return len(s.mfs) == 0
   186  }
   187  
   188  // GetBatch
   189  // getSql extracts a insert sql from a set of MetricFamily. the bytes.Buffer is
   190  // used to mitigate memory allocation
   191  func (s *mfset) GetBatch(ctx context.Context, buf *bytes.Buffer) string {
   192  	buf.Reset()
   193  	buf.WriteString(fmt.Sprintf("insert into %s.%s values ", MetricDBConst, s.mfs[0].GetName()))
   194  	lblsBuf := new(bytes.Buffer)
   195  	writeValues := func(t string, v float64, lbls string) {
   196  		buf.WriteString("(")
   197  		buf.WriteString(fmt.Sprintf("%q, %f", t, v))
   198  		buf.WriteString(lbls)
   199  		buf.WriteString("),")
   200  	}
   201  	for _, mf := range s.mfs {
   202  		for _, metric := range mf.Metric {
   203  			// reserved labels
   204  			lblsBuf.WriteString(fmt.Sprintf(",%q,%q", mf.GetNode(), mf.GetRole()))
   205  			// custom labels
   206  			for _, lbl := range metric.Label {
   207  				lblsBuf.WriteString(",\"")
   208  				lblsBuf.WriteString(lbl.GetValue())
   209  				lblsBuf.WriteRune('"')
   210  			}
   211  			lbls := lblsBuf.String()
   212  			lblsBuf.Reset()
   213  
   214  			switch mf.GetType() {
   215  			case pb.MetricType_COUNTER:
   216  				time := localTimeStr(metric.GetCollecttime())
   217  				writeValues(time, metric.Counter.GetValue(), lbls)
   218  			case pb.MetricType_GAUGE:
   219  				time := localTimeStr(metric.GetCollecttime())
   220  				writeValues(time, metric.Gauge.GetValue(), lbls)
   221  			case pb.MetricType_RAWHIST:
   222  				for _, sample := range metric.RawHist.Samples {
   223  					time := localTimeStr(sample.GetDatetime())
   224  					writeValues(time, sample.GetValue(), lbls)
   225  				}
   226  			default:
   227  				panic(fmt.Sprintf("unsupported metric type %v", mf.GetType()))
   228  			}
   229  		}
   230  	}
   231  	sql := buf.String()
   232  	// metric has at least one row, so we can remove the tail comma safely
   233  	sql = sql[:len(sql)-1]
   234  	return sql
   235  }
   236  
   237  var _ MetricCollector = (*metricFSCollector)(nil)
   238  
   239  type metricFSCollector struct {
   240  	*bp.BaseBatchPipe[*pb.MetricFamily, table.ExportRequests]
   241  	writerFactory table.WriterFactory
   242  	opts          collectorOpts
   243  }
   244  
   245  func (c *metricFSCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error {
   246  	for _, mf := range mfs {
   247  		if err := c.SendItem(ctx, mf); err != nil {
   248  			return err
   249  		}
   250  	}
   251  	return nil
   252  }
   253  
   254  func newMetricFSCollector(writerFactory table.WriterFactory, opts ...collectorOpt) MetricCollector {
   255  	initOpts := defaultCollectorOpts()
   256  	for _, o := range opts {
   257  		o.ApplyTo(&initOpts)
   258  	}
   259  	c := &metricFSCollector{
   260  		writerFactory: writerFactory,
   261  		opts:          initOpts,
   262  	}
   263  	pipeOpts := []bp.BaseBatchPipeOpt{bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum),
   264  		bp.PipeWithBufferWorkerNum(1), // only one table
   265  		bp.PipeWithItemNameFormatter(func(bp.HasName) string {
   266  			return SingleMetricTable.GetName()
   267  		}),
   268  	}
   269  	base := bp.NewBaseBatchPipe[*pb.MetricFamily, table.ExportRequests](c, pipeOpts...)
   270  	c.BaseBatchPipe = base
   271  	return c
   272  }
   273  
   274  func (c *metricFSCollector) NewItemBatchHandler(ctx context.Context) func(batch table.ExportRequests) {
   275  	return func(batchs table.ExportRequests) {
   276  		for _, batch := range batchs {
   277  			if _, err := batch.Handle(); err != nil {
   278  				logutil.Errorf("[Metric] failed to write, err: %v", err)
   279  			}
   280  		}
   281  	}
   282  }
   283  
   284  func (c *metricFSCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, table.ExportRequests] {
   285  	return &mfsetETL{
   286  		mfset: mfset{
   287  			Reminder:        bp.NewConstantClock(c.opts.flushInterval),
   288  			metricThreshold: c.opts.metricThreshold,
   289  			sampleThreshold: c.opts.sampleThreshold,
   290  		},
   291  		collector: c,
   292  	}
   293  }
   294  
   295  type mfsetETL struct {
   296  	mfset
   297  	collector *metricFSCollector
   298  }
   299  
   300  // GetBatch implements table.Table.GetBatch.
   301  // Write metric into two tables: one for metric table, another for sql_statement_cu table.
   302  func (s *mfsetETL) GetBatch(ctx context.Context, buf *bytes.Buffer) table.ExportRequests {
   303  	buf.Reset()
   304  
   305  	ts := time.Now()
   306  	buffer := make(map[string]table.RowWriter, 2)
   307  	writeValues := func(row *table.Row) error {
   308  		w, exist := buffer[row.Table.GetName()]
   309  		if !exist {
   310  			w = s.collector.writerFactory.GetRowWriter(ctx, row.GetAccount(), row.Table, ts)
   311  			buffer[row.Table.GetName()] = w
   312  		}
   313  		if err := w.WriteRow(row); err != nil {
   314  			return err
   315  		}
   316  		return nil
   317  	}
   318  	rows := make(map[string]*table.Row, 2)
   319  	defer func() {
   320  		for _, r := range rows {
   321  			r.Free()
   322  		}
   323  	}()
   324  	getRow := func(metricName string) *table.Row {
   325  		tbl := SingleMetricTable
   326  		if metricName == SqlStatementCUTable.GetName() {
   327  			tbl = SqlStatementCUTable
   328  		}
   329  		row, exist := rows[tbl.GetName()]
   330  		if !exist {
   331  			row = tbl.GetRow(ctx)
   332  			rows[tbl.GetName()] = row
   333  		}
   334  		return row
   335  	}
   336  
   337  	for _, mf := range s.mfs {
   338  		for _, metric := range mf.Metric {
   339  			// reserved labels
   340  			row := getRow(mf.GetName())
   341  			row.Reset()
   342  			// table `metric` NEED column `metric_name`
   343  			// table `sql_statement_cu` NO column `metric_name`
   344  			if row.Table.GetName() == SingleMetricTable.GetName() {
   345  				row.SetColumnVal(metricNameColumn, table.StringField(mf.GetName()))
   346  			}
   347  			row.SetColumnVal(metricNodeColumn, table.StringField(mf.GetNode()))
   348  			row.SetColumnVal(metricRoleColumn, table.StringField(mf.GetRole()))
   349  			// custom labels
   350  			for _, lbl := range metric.Label {
   351  				row.SetVal(lbl.GetName(), table.StringField(lbl.GetValue()))
   352  			}
   353  
   354  			switch mf.GetType() {
   355  			case pb.MetricType_COUNTER:
   356  				time := localTime(metric.GetCollecttime())
   357  				row.SetColumnVal(metricCollectTimeColumn, table.TimeField(time))
   358  				row.SetColumnVal(metricValueColumn, table.Float64Field(metric.Counter.GetValue()))
   359  				_ = writeValues(row)
   360  			case pb.MetricType_GAUGE:
   361  				time := localTime(metric.GetCollecttime())
   362  				row.SetColumnVal(metricCollectTimeColumn, table.TimeField(time))
   363  				row.SetColumnVal(metricValueColumn, table.Float64Field(metric.Gauge.GetValue()))
   364  				_ = writeValues(row)
   365  			case pb.MetricType_RAWHIST:
   366  				for _, sample := range metric.RawHist.Samples {
   367  					time := localTime(sample.GetDatetime())
   368  					row.SetColumnVal(metricCollectTimeColumn, table.TimeField(time))
   369  					row.SetColumnVal(metricValueColumn, table.Float64Field(sample.GetValue()))
   370  					_ = writeValues(row)
   371  				}
   372  			default:
   373  				panic(moerr.NewInternalError(ctx, "unsupported metric type %v", mf.GetType()))
   374  			}
   375  		}
   376  	}
   377  
   378  	reqs := make([]table.WriteRequest, 0, len(buffer))
   379  	for _, w := range buffer {
   380  		reqs = append(reqs, table.NewRowRequest(w))
   381  	}
   382  
   383  	return reqs
   384  }
   385  
   386  func localTime(value int64) time.Time {
   387  	return time.UnixMicro(value).In(time.Local)
   388  }
   389  
   390  func localTimeStr(value int64) string {
   391  	return time.UnixMicro(value).In(time.Local).Format("2006-01-02 15:04:05.000000")
   392  }