github.com/matrixorigin/matrixone@v0.7.0/pkg/util/metric/metric_collector.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metric
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"runtime"
    22  	"time"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/logutil"
    26  	pb "github.com/matrixorigin/matrixone/pkg/pb/metric"
    27  	bp "github.com/matrixorigin/matrixone/pkg/util/batchpipe"
    28  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    29  	ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor"
    30  )
    31  
    32  const CHAN_CAPACITY = 10000
    33  
    34  type MetricCollector interface {
    35  	SendMetrics(context.Context, []*pb.MetricFamily) error
    36  	Start(context.Context) bool
    37  	Stop(graceful bool) (<-chan struct{}, bool)
    38  }
    39  
    40  type collectorOpts struct {
    41  	// if a MetricFamily has `metricThreshold` Metrics or more
    42  	// it deserves a flush operation
    43  	metricThreshold int
    44  	// if a RawHist MetricFamily has `sampleThreshold` Samples or more
    45  	// it deserves a flush operation
    46  	sampleThreshold int
    47  	// if we can't flush a MetricFamily for the reason of `metricThreshold` or `sampleThreshold`
    48  	// after `flushInterval`, we will flush it anyway
    49  	flushInterval time.Duration
    50  	// the number of goroutines to execute insert into sql, default is runtime.NumCPU()
    51  	sqlWorkerNum int
    52  	// multiTable
    53  	multiTable bool
    54  }
    55  
    56  func defaultCollectorOpts() collectorOpts {
    57  	return collectorOpts{
    58  		metricThreshold: 1000,
    59  		sampleThreshold: 4096,
    60  		flushInterval:   15 * time.Second,
    61  		sqlWorkerNum:    runtime.NumCPU(),
    62  	}
    63  }
    64  
    65  type collectorOpt interface {
    66  	ApplyTo(*collectorOpts)
    67  }
    68  
    69  type WithMetricThreshold int
    70  
    71  func (x WithMetricThreshold) ApplyTo(o *collectorOpts) {
    72  	o.metricThreshold = int(x)
    73  }
    74  
    75  type WithSampleThreshold int
    76  
    77  func (x WithSampleThreshold) ApplyTo(o *collectorOpts) {
    78  	o.sampleThreshold = int(x)
    79  }
    80  
    81  type WithSqlWorkerNum int
    82  
    83  func (x WithSqlWorkerNum) ApplyTo(o *collectorOpts) {
    84  	o.sqlWorkerNum = int(x)
    85  }
    86  
    87  type WithFlushInterval time.Duration
    88  
    89  func (x WithFlushInterval) ApplyTo(o *collectorOpts) {
    90  	o.flushInterval = time.Duration(x)
    91  }
    92  
    93  type ExportMultiTable bool
    94  
    95  func (x ExportMultiTable) ApplyTo(o *collectorOpts) {
    96  	o.multiTable = bool(x)
    97  }
    98  
    99  var _ MetricCollector = (*metricCollector)(nil)
   100  
   101  type metricCollector struct {
   102  	*bp.BaseBatchPipe[*pb.MetricFamily, string]
   103  	ieFactory func() ie.InternalExecutor
   104  	opts      collectorOpts
   105  }
   106  
   107  func newMetricCollector(factory func() ie.InternalExecutor, opts ...collectorOpt) MetricCollector {
   108  	initOpts := defaultCollectorOpts()
   109  	for _, o := range opts {
   110  		o.ApplyTo(&initOpts)
   111  	}
   112  	c := &metricCollector{
   113  		ieFactory: factory,
   114  		opts:      initOpts,
   115  	}
   116  	base := bp.NewBaseBatchPipe[*pb.MetricFamily, string](c, bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum))
   117  	c.BaseBatchPipe = base
   118  	return c
   119  }
   120  
   121  func (c *metricCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error {
   122  	for _, mf := range mfs {
   123  		if err := c.SendItem(ctx, mf); err != nil {
   124  			return err
   125  		}
   126  	}
   127  	return nil
   128  }
   129  
   130  func (c *metricCollector) NewItemBatchHandler(ctx context.Context) func(batch string) {
   131  	exec := c.ieFactory()
   132  	exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish())
   133  	return func(batch string) {
   134  		if err := exec.Exec(ctx, batch, ie.NewOptsBuilder().Finish()); err != nil {
   135  			logutil.Errorf("[Metric] insert error. sql: %s; err: %v", batch, err)
   136  		}
   137  	}
   138  }
   139  
   140  func (c *metricCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, string] {
   141  	return &mfset{
   142  		Reminder:        bp.NewConstantClock(c.opts.flushInterval),
   143  		metricThreshold: c.opts.metricThreshold,
   144  		sampleThreshold: c.opts.sampleThreshold,
   145  	}
   146  }
   147  
   148  type mfset struct {
   149  	bp.Reminder
   150  	mfs             []*pb.MetricFamily
   151  	typ             pb.MetricType
   152  	rows            int // how many buffered rows
   153  	metricThreshold int // haw many rows should be flushed as a batch
   154  	sampleThreshold int // treat rawhist samples differently because it has higher generate rate
   155  }
   156  
   157  func (s *mfset) Add(mf *pb.MetricFamily) {
   158  	if s.typ == mf.GetType() {
   159  		s.typ = mf.GetType()
   160  	}
   161  	switch s.typ {
   162  	case pb.MetricType_COUNTER, pb.MetricType_GAUGE:
   163  		s.rows += len(mf.Metric)
   164  	case pb.MetricType_RAWHIST:
   165  		for _, m := range mf.Metric {
   166  			s.rows += len(m.RawHist.Samples)
   167  		}
   168  	}
   169  	s.mfs = append(s.mfs, mf)
   170  }
   171  
   172  func (s *mfset) ShouldFlush() bool {
   173  	switch s.typ {
   174  	case pb.MetricType_COUNTER, pb.MetricType_GAUGE:
   175  		return s.rows > s.metricThreshold
   176  	case pb.MetricType_RAWHIST:
   177  		return s.rows > s.sampleThreshold
   178  	default:
   179  		return false
   180  	}
   181  }
   182  
   183  func (s *mfset) Reset() {
   184  	s.mfs = s.mfs[:0]
   185  	s.typ = pb.MetricType_COUNTER // 0
   186  	s.rows = 0
   187  	s.RemindReset()
   188  }
   189  
   190  func (s *mfset) IsEmpty() bool {
   191  	return len(s.mfs) == 0
   192  }
   193  
   194  // GetBatch
   195  // getSql extracts a insert sql from a set of MetricFamily. the bytes.Buffer is
   196  // used to mitigate memory allocation
   197  func (s *mfset) GetBatch(ctx context.Context, buf *bytes.Buffer) string {
   198  	buf.Reset()
   199  	buf.WriteString(fmt.Sprintf("insert into %s.%s values ", MetricDBConst, s.mfs[0].GetName()))
   200  	lblsBuf := new(bytes.Buffer)
   201  	writeValues := func(t string, v float64, lbls string) {
   202  		buf.WriteString("(")
   203  		buf.WriteString(fmt.Sprintf("%q, %f", t, v))
   204  		buf.WriteString(lbls)
   205  		buf.WriteString("),")
   206  	}
   207  	for _, mf := range s.mfs {
   208  		for _, metric := range mf.Metric {
   209  			// reserved labels
   210  			lblsBuf.WriteString(fmt.Sprintf(",%q,%q", mf.GetNode(), mf.GetRole()))
   211  			// custom labels
   212  			for _, lbl := range metric.Label {
   213  				lblsBuf.WriteString(",\"")
   214  				lblsBuf.WriteString(lbl.GetValue())
   215  				lblsBuf.WriteRune('"')
   216  			}
   217  			lbls := lblsBuf.String()
   218  			lblsBuf.Reset()
   219  
   220  			switch mf.GetType() {
   221  			case pb.MetricType_COUNTER:
   222  				time := localTimeStr(metric.GetCollecttime())
   223  				writeValues(time, metric.Counter.GetValue(), lbls)
   224  			case pb.MetricType_GAUGE:
   225  				time := localTimeStr(metric.GetCollecttime())
   226  				writeValues(time, metric.Gauge.GetValue(), lbls)
   227  			case pb.MetricType_RAWHIST:
   228  				for _, sample := range metric.RawHist.Samples {
   229  					time := localTimeStr(sample.GetDatetime())
   230  					writeValues(time, sample.GetValue(), lbls)
   231  				}
   232  			default:
   233  				panic(fmt.Sprintf("unsupported metric type %v", mf.GetType()))
   234  			}
   235  		}
   236  	}
   237  	sql := buf.String()
   238  	// metric has at least one row, so we can remove the tail comma safely
   239  	sql = sql[:len(sql)-1]
   240  	return sql
   241  }
   242  
   243  var _ MetricCollector = (*metricFSCollector)(nil)
   244  
   245  type metricFSCollector struct {
   246  	*bp.BaseBatchPipe[*pb.MetricFamily, table.ExportRequests]
   247  	writerFactory table.WriterFactory
   248  	opts          collectorOpts
   249  }
   250  
   251  func (c *metricFSCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error {
   252  	for _, mf := range mfs {
   253  		if err := c.SendItem(ctx, mf); err != nil {
   254  			return err
   255  		}
   256  	}
   257  	return nil
   258  }
   259  
   260  func newMetricFSCollector(writerFactory table.WriterFactory, opts ...collectorOpt) MetricCollector {
   261  	initOpts := defaultCollectorOpts()
   262  	for _, o := range opts {
   263  		o.ApplyTo(&initOpts)
   264  	}
   265  	c := &metricFSCollector{
   266  		writerFactory: writerFactory,
   267  		opts:          initOpts,
   268  	}
   269  	pipeOpts := []bp.BaseBatchPipeOpt{bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum)}
   270  	if !initOpts.multiTable {
   271  		pipeOpts = append(pipeOpts,
   272  			bp.PipeWithBufferWorkerNum(1),
   273  			bp.PipeWithItemNameFormatter(func(bp.HasName) string {
   274  				return SingleMetricTable.GetName()
   275  			}))
   276  	}
   277  	base := bp.NewBaseBatchPipe[*pb.MetricFamily, table.ExportRequests](c, pipeOpts...)
   278  	c.BaseBatchPipe = base
   279  	return c
   280  }
   281  
   282  func (c *metricFSCollector) NewItemBatchHandler(ctx context.Context) func(batch table.ExportRequests) {
   283  	return func(batchs table.ExportRequests) {
   284  		for _, batch := range batchs {
   285  			if _, err := batch.Handle(); err != nil {
   286  				logutil.Errorf("[Metric] failed to write, err: %v", err)
   287  			}
   288  		}
   289  	}
   290  }
   291  
   292  func (c *metricFSCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, table.ExportRequests] {
   293  	return &mfsetETL{
   294  		mfset: mfset{
   295  			Reminder:        bp.NewConstantClock(c.opts.flushInterval),
   296  			metricThreshold: c.opts.metricThreshold,
   297  			sampleThreshold: c.opts.sampleThreshold,
   298  		},
   299  		collector: c,
   300  	}
   301  }
   302  
   303  type mfsetETL struct {
   304  	mfset
   305  	collector *metricFSCollector
   306  }
   307  
   308  func (s *mfsetETL) GetBatch(ctx context.Context, buf *bytes.Buffer) table.ExportRequests {
   309  	buf.Reset()
   310  
   311  	ts := time.Now()
   312  	buffer := make(map[string]table.RowWriter, 2)
   313  	writeValues := func(row *table.Row) error {
   314  		w, exist := buffer[row.GetAccount()]
   315  		if !exist {
   316  			w = s.collector.writerFactory(ctx, row.GetAccount(), SingleMetricTable, ts)
   317  			buffer[row.GetAccount()] = w
   318  		}
   319  		if err := w.WriteRow(row); err != nil {
   320  			return err
   321  		}
   322  		return nil
   323  	}
   324  
   325  	row := SingleMetricTable.GetRow(ctx)
   326  	defer row.Free()
   327  	for _, mf := range s.mfs {
   328  		for _, metric := range mf.Metric {
   329  			// reserved labels
   330  			row.Reset()
   331  			row.SetColumnVal(metricNameColumn, mf.GetName())
   332  			row.SetColumnVal(metricNodeColumn, mf.GetNode())
   333  			row.SetColumnVal(metricRoleColumn, mf.GetRole())
   334  			// custom labels
   335  			for _, lbl := range metric.Label {
   336  				row.SetVal(lbl.GetName(), lbl.GetValue())
   337  			}
   338  
   339  			switch mf.GetType() {
   340  			case pb.MetricType_COUNTER:
   341  				time := localTime(metric.GetCollecttime())
   342  				row.SetColumnVal(metricCollectTimeColumn, time)
   343  				row.SetColumnVal(metricValueColumn, metric.Counter.GetValue())
   344  				_ = writeValues(row)
   345  			case pb.MetricType_GAUGE:
   346  				time := localTime(metric.GetCollecttime())
   347  				row.SetColumnVal(metricCollectTimeColumn, time)
   348  				row.SetColumnVal(metricValueColumn, metric.Gauge.GetValue())
   349  				_ = writeValues(row)
   350  			case pb.MetricType_RAWHIST:
   351  				for _, sample := range metric.RawHist.Samples {
   352  					time := localTime(sample.GetDatetime())
   353  					row.SetColumnVal(metricCollectTimeColumn, time)
   354  					row.SetColumnVal(metricValueColumn, sample.GetValue())
   355  					_ = writeValues(row)
   356  				}
   357  			default:
   358  				panic(moerr.NewInternalError(ctx, "unsupported metric type %v", mf.GetType()))
   359  			}
   360  		}
   361  	}
   362  
   363  	reqs := make([]table.WriteRequest, 0, len(buffer))
   364  	for _, w := range buffer {
   365  		reqs = append(reqs, table.NewRowRequest(w))
   366  	}
   367  
   368  	return reqs
   369  }
   370  
   371  func localTime(value int64) time.Time {
   372  	return time.UnixMicro(value).In(time.Local)
   373  }
   374  
   375  func localTimeStr(value int64) string {
   376  	return time.UnixMicro(value).In(time.Local).Format("2006-01-02 15:04:05.000000")
   377  }