github.com/matrixorigin/matrixone@v0.7.0/pkg/util/metric/metric.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metric
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"net/http"
    22  	"strings"
    23  	"sync"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    28  	"github.com/matrixorigin/matrixone/pkg/config"
    29  	"github.com/matrixorigin/matrixone/pkg/logutil"
    30  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    31  	ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor"
    32  	"github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace"
    33  
    34  	prom "github.com/prometheus/client_golang/prometheus"
    35  	"github.com/prometheus/client_golang/prometheus/promhttp"
    36  	dto "github.com/prometheus/client_model/go"
    37  )
    38  
    39  const (
    40  	MetricDBConst    = "system_metrics"
    41  	sqlCreateDBConst = "create database if not exists " + MetricDBConst
    42  	sqlDropDBConst   = "drop database if exists " + MetricDBConst
    43  	ALL_IN_ONE_MODE  = "monolithic"
    44  )
    45  
    46  var (
    47  	lblNodeConst  = "node"
    48  	lblRoleConst  = "role"
    49  	lblValueConst = "value"
    50  	lblTimeConst  = "collecttime"
    51  	occupiedLbls  = map[string]struct{}{lblTimeConst: {}, lblValueConst: {}, lblNodeConst: {}, lblRoleConst: {}}
    52  )
    53  
    54  type Collector interface {
    55  	prom.Collector
    56  	// cancelToProm remove the cost introduced by being compatible with prometheus
    57  	CancelToProm()
    58  	// collectorForProm returns a collector used in prometheus scrape registry
    59  	CollectorToProm() prom.Collector
    60  }
    61  
    62  type selfAsPromCollector struct {
    63  	self prom.Collector
    64  }
    65  
    66  func (s *selfAsPromCollector) init(self prom.Collector)        { s.self = self }
    67  func (s *selfAsPromCollector) CancelToProm()                   {}
    68  func (s *selfAsPromCollector) CollectorToProm() prom.Collector { return s.self }
    69  
    70  type statusServer struct {
    71  	*http.Server
    72  	sync.WaitGroup
    73  }
    74  
    75  var registry *prom.Registry
    76  var moExporter MetricExporter
    77  var moCollector MetricCollector
    78  var statusSvr *statusServer
    79  var multiTable = false // need set before newMetricFSCollector and initTables
    80  
    81  var inited uint32
    82  
    83  func InitMetric(ctx context.Context, ieFactory func() ie.InternalExecutor, SV *config.ObservabilityParameters, nodeUUID, role string, opts ...InitOption) {
    84  	// fix multi-init in standalone
    85  	if !atomic.CompareAndSwapUint32(&inited, 0, 1) {
    86  		return
    87  	}
    88  	var initOpts InitOptions
    89  	opts = append(opts,
    90  		withExportInterval(SV.MetricExportInterval),
    91  		withUpdateInterval(SV.MetricUpdateStorageUsageInterval.Duration),
    92  		withMultiTable(SV.MetricMultiTable),
    93  	)
    94  	for _, opt := range opts {
    95  		opt.ApplyTo(&initOpts)
    96  	}
    97  	// init global variables
    98  	initConfigByParamaterUnit(SV)
    99  	registry = prom.NewRegistry()
   100  	if initOpts.writerFactory != nil {
   101  		moCollector = newMetricFSCollector(initOpts.writerFactory, WithFlushInterval(initOpts.exportInterval), ExportMultiTable(initOpts.multiTable))
   102  	} else {
   103  		moCollector = newMetricCollector(ieFactory, WithFlushInterval(initOpts.exportInterval))
   104  	}
   105  	moExporter = newMetricExporter(registry, moCollector, nodeUUID, role)
   106  
   107  	// register metrics and create tables
   108  	registerAllMetrics()
   109  	multiTable = initOpts.multiTable
   110  	if initOpts.needInitTable {
   111  		initTables(ctx, ieFactory, SV.BatchProcessor)
   112  	}
   113  
   114  	// start the data flow
   115  	serviceCtx := context.Background()
   116  	moCollector.Start(serviceCtx)
   117  	moExporter.Start(serviceCtx)
   118  
   119  	if getExportToProm() {
   120  		// http.HandleFunc("/query", makeDebugHandleFunc(ieFactory))
   121  		mux := http.NewServeMux()
   122  		mux.Handle("/metrics", promhttp.HandlerFor(prom.DefaultGatherer, promhttp.HandlerOpts{}))
   123  		addr := fmt.Sprintf("%s:%d", SV.Host, SV.StatusPort)
   124  		statusSvr = &statusServer{Server: &http.Server{Addr: addr, Handler: mux}}
   125  		statusSvr.Add(1)
   126  		go func() {
   127  			defer statusSvr.Done()
   128  			if err := statusSvr.ListenAndServe(); err != http.ErrServerClosed {
   129  				panic(fmt.Sprintf("status server error: %v", err))
   130  			}
   131  		}()
   132  		logutil.Infof("[Metric] metrics scrape endpoint is ready at http://%s/metrics", addr)
   133  	}
   134  
   135  	SetUpdateStorageUsageInterval(initOpts.updateInterval)
   136  	logutil.Infof("metric with ExportInterval: %v", initOpts.exportInterval)
   137  	logutil.Infof("metric with UpdateStorageUsageInterval: %v", initOpts.updateInterval)
   138  }
   139  
   140  func StopMetricSync() {
   141  	if !atomic.CompareAndSwapUint32(&inited, 1, 0) {
   142  		return
   143  	}
   144  	if moCollector != nil {
   145  		if ch, effect := moCollector.Stop(true); effect {
   146  			<-ch
   147  		}
   148  		moCollector = nil
   149  	}
   150  	if moExporter != nil {
   151  		if ch, effect := moExporter.Stop(true); effect {
   152  			<-ch
   153  		}
   154  		moExporter = nil
   155  	}
   156  	if statusSvr != nil {
   157  		_ = statusSvr.Shutdown(context.TODO())
   158  		statusSvr = nil
   159  	}
   160  	logutil.Info("Shutdown metric complete.")
   161  }
   162  
   163  func mustRegiterToProm(collector prom.Collector) {
   164  	if err := prom.Register(collector); err != nil {
   165  		// err is either registering a collector more than once or metrics have duplicate description.
   166  		// in any case, we respect the existing collectors in the prom registry
   167  		logutil.Debugf("[Metric] register to prom register: %v", err)
   168  	}
   169  }
   170  
   171  func mustRegister(collector Collector) {
   172  	registry.MustRegister(collector)
   173  	if getExportToProm() {
   174  		mustRegiterToProm(collector.CollectorToProm())
   175  	} else {
   176  		collector.CancelToProm()
   177  	}
   178  }
   179  
   180  func InitSchema(ctx context.Context, ieFactory func() ie.InternalExecutor) error {
   181  	initTables(ctx, ieFactory, motrace.FileService)
   182  	return nil
   183  }
   184  
   185  // initTables gathers all metrics and extract metadata to format create table sql
   186  func initTables(ctx context.Context, ieFactory func() ie.InternalExecutor, batchProcessMode string) {
   187  	exec := ieFactory()
   188  	exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish())
   189  	mustExec := func(sql string) {
   190  		if err := exec.Exec(ctx, sql, ie.NewOptsBuilder().Finish()); err != nil {
   191  			panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql))
   192  		}
   193  	}
   194  	if getForceInit() {
   195  		mustExec(sqlDropDBConst)
   196  	}
   197  	mustExec(sqlCreateDBConst)
   198  	var createCost time.Duration
   199  	defer func() {
   200  		logutil.Debugf(
   201  			"[Metric] init metrics tables: create cost %d ms",
   202  			createCost.Milliseconds())
   203  	}()
   204  	instant := time.Now()
   205  
   206  	descChan := make(chan *prom.Desc, 10)
   207  
   208  	go func() {
   209  		for _, c := range initCollectors {
   210  			c.Describe(descChan)
   211  		}
   212  		close(descChan)
   213  	}()
   214  
   215  	if !multiTable {
   216  		mustExec(SingleMetricTable.ToCreateSql(ctx, true))
   217  		for desc := range descChan {
   218  			view := getView(ctx, desc)
   219  			sql := view.ToCreateSql(ctx, true)
   220  			mustExec(sql)
   221  		}
   222  	} else {
   223  		optFactory := table.GetOptionFactory(ctx, table.ExternalTableEngine)
   224  		buf := new(bytes.Buffer)
   225  		for desc := range descChan {
   226  			sql := createTableSqlFromMetricFamily(desc, buf, optFactory)
   227  			mustExec(sql)
   228  		}
   229  	}
   230  
   231  	createCost = time.Since(instant)
   232  }
   233  
   234  type optionsFactory func(db, tbl, account string) table.TableOptions
   235  
   236  // instead MetricFamily, Desc is used to create tables because we don't want collect errors come into the picture.
   237  func createTableSqlFromMetricFamily(desc *prom.Desc, buf *bytes.Buffer, optionsFactory optionsFactory) string {
   238  	buf.Reset()
   239  	extra := newDescExtra(desc)
   240  	opts := optionsFactory(MetricDBConst, extra.fqName, table.AccountAll)
   241  	buf.WriteString("create ")
   242  	buf.WriteString(opts.GetCreateOptions())
   243  	buf.WriteString(fmt.Sprintf(
   244  		"table if not exists %s.%s (`%s` datetime(6), `%s` double, `%s` varchar(36), `%s` varchar(20)",
   245  		MetricDBConst, extra.fqName, lblTimeConst, lblValueConst, lblNodeConst, lblRoleConst,
   246  	))
   247  	for _, lbl := range extra.labels {
   248  		buf.WriteString(", `")
   249  		buf.WriteString(lbl.GetName())
   250  		buf.WriteString("` varchar(20)")
   251  	}
   252  	buf.WriteRune(')')
   253  	buf.WriteString(opts.GetTableOptions(nil))
   254  	return buf.String()
   255  }
   256  
   257  func getView(ctx context.Context, desc *prom.Desc) *table.View {
   258  	extra := newDescExtra(desc)
   259  	var labelNames = make([]string, 0, len(extra.labels))
   260  	for _, lbl := range extra.labels {
   261  		labelNames = append(labelNames, lbl.GetName())
   262  	}
   263  	return GetMetricViewWithLabels(ctx, extra.fqName, labelNames)
   264  }
   265  
   266  type descExtra struct {
   267  	orig   *prom.Desc
   268  	fqName string
   269  	labels []*dto.LabelPair
   270  }
   271  
   272  // decode inner infomation of a prom.Desc
   273  func newDescExtra(desc *prom.Desc) *descExtra {
   274  	str := desc.String()[14:] // strip Desc{fqName: "
   275  	fqName := str[:strings.Index(str, "\"")]
   276  	str = str[strings.Index(str, "variableLabels: [")+17:] // spot varlbl list
   277  	str = str[:strings.Index(str, "]")]
   278  	varLblCnt := len(strings.Split(str, " "))
   279  	labels := prom.MakeLabelPairs(desc, make([]string, varLblCnt))
   280  	return &descExtra{orig: desc, fqName: fqName, labels: labels}
   281  }
   282  
   283  func mustValidLbls(name string, consts prom.Labels, vars []string) {
   284  	mustNotOccupied := func(lblName string) {
   285  		if _, ok := occupiedLbls[strings.ToLower(lblName)]; ok {
   286  			panic(fmt.Sprintf("%s contains a occupied label: %s", name, lblName))
   287  		}
   288  	}
   289  	for k := range consts {
   290  		mustNotOccupied(k)
   291  	}
   292  	for _, v := range vars {
   293  		mustNotOccupied(v)
   294  	}
   295  }
   296  
   297  type SubSystem struct {
   298  	Name              string
   299  	Comment           string
   300  	SupportUserAccess bool
   301  }
   302  
   303  var SubSystemSql = &SubSystem{"sql", "base on query action", true}
   304  var SubSystemServer = &SubSystem{"server", "MO Server status, observe from inside", true}
   305  var SubSystemProcess = &SubSystem{"process", "MO process status", false}
   306  var SubSystemSys = &SubSystem{"sys", "OS status", false}
   307  
   308  var allSubSystem = map[string]*SubSystem{
   309  	SubSystemSql.Name:     SubSystemSql,
   310  	SubSystemServer.Name:  SubSystemServer,
   311  	SubSystemProcess.Name: SubSystemProcess,
   312  	SubSystemSys.Name:     SubSystemSys,
   313  }
   314  
   315  type InitOptions struct {
   316  	writerFactory table.WriterFactory // see WithWriterFactory
   317  	// needInitTable control to do the initTables
   318  	needInitTable bool // see WithInitAction
   319  	// initSingleTable
   320  	multiTable bool // see WithMultiTable
   321  	// exportInterval
   322  	exportInterval time.Duration // see withExportInterval
   323  	// updateInterval, update StorageUsage interval
   324  	// set by withUpdateInterval
   325  	updateInterval time.Duration
   326  }
   327  
   328  type InitOption func(*InitOptions)
   329  
   330  func (f InitOption) ApplyTo(opts *InitOptions) {
   331  	f(opts)
   332  }
   333  
   334  func WithWriterFactory(factory table.WriterFactory) InitOption {
   335  	return InitOption(func(options *InitOptions) {
   336  		options.writerFactory = factory
   337  	})
   338  }
   339  
   340  func WithInitAction(init bool) InitOption {
   341  	return InitOption(func(options *InitOptions) {
   342  		options.needInitTable = init
   343  	})
   344  }
   345  
   346  func withMultiTable(multi bool) InitOption {
   347  	return InitOption(func(options *InitOptions) {
   348  		options.multiTable = multi
   349  	})
   350  }
   351  
   352  func withExportInterval(sec int) InitOption {
   353  	return InitOption(func(options *InitOptions) {
   354  		options.exportInterval = time.Second * time.Duration(sec)
   355  	})
   356  }
   357  
   358  func withUpdateInterval(interval time.Duration) InitOption {
   359  	return InitOption(func(opts *InitOptions) {
   360  		opts.updateInterval = interval
   361  	})
   362  }
   363  
   364  var (
   365  	metricNameColumn        = table.StringDefaultColumn(`metric_name`, `sys`, `metric name, like: sql_statement_total, server_connections, process_cpu_percent, sys_memory_used, ...`)
   366  	metricCollectTimeColumn = table.DatetimeColumn(`collecttime`, `metric data collect time`)
   367  	metricValueColumn       = table.ValueColumn(`value`, `metric value`)
   368  	metricNodeColumn        = table.StringDefaultColumn(`node`, ALL_IN_ONE_MODE, `mo node uuid`)
   369  	metricRoleColumn        = table.StringDefaultColumn(`role`, ALL_IN_ONE_MODE, `mo node role, like: CN, DN, LOG`)
   370  	metricAccountColumn     = table.StringDefaultColumn(`account`, `sys`, `account name`)
   371  	metricTypeColumn        = table.StringColumn(`type`, `sql type, like: insert, select, ...`)
   372  )
   373  
   374  var SingleMetricTable = &table.Table{
   375  	Account:          table.AccountAll,
   376  	Database:         MetricDBConst,
   377  	Table:            `metric`,
   378  	Columns:          []table.Column{metricNameColumn, metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn, metricAccountColumn, metricTypeColumn},
   379  	PrimaryKeyColumn: []table.Column{},
   380  	Engine:           table.ExternalTableEngine,
   381  	Comment:          `metric data`,
   382  	PathBuilder:      table.NewAccountDatePathBuilder(),
   383  	AccountColumn:    &metricAccountColumn,
   384  	// SupportUserAccess
   385  	SupportUserAccess: true,
   386  }
   387  
   388  func NewMetricView(tbl string, opts ...table.ViewOption) *table.View {
   389  	view := &table.View{
   390  		Database:    MetricDBConst,
   391  		Table:       tbl,
   392  		OriginTable: SingleMetricTable,
   393  		Columns:     []table.Column{metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn},
   394  		Condition:   &table.ViewSingleCondition{Column: metricNameColumn, Table: tbl},
   395  	}
   396  	for _, opt := range opts {
   397  		opt.Apply(view)
   398  	}
   399  	return view
   400  }
   401  
   402  func NewMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View {
   403  	var options []table.ViewOption
   404  	// check SubSystem
   405  	var subSystem *SubSystem = nil
   406  	for _, ss := range allSubSystem {
   407  		if strings.Index(tbl, ss.Name) == 0 {
   408  			subSystem = ss
   409  			break
   410  		}
   411  	}
   412  	if subSystem == nil {
   413  		panic(moerr.NewNotSupported(ctx, "metric unknown SubSystem: %s", tbl))
   414  	}
   415  	options = append(options, table.SupportUserAccess(subSystem.SupportUserAccess))
   416  	// construct columns
   417  	for _, label := range lbls {
   418  		for _, col := range SingleMetricTable.Columns {
   419  			if strings.EqualFold(label, col.Name) {
   420  				options = append(options, table.WithColumn(col))
   421  			}
   422  		}
   423  	}
   424  	return NewMetricView(tbl, options...)
   425  }
   426  
   427  var gView struct {
   428  	content map[string]*table.View
   429  	mu      sync.Mutex
   430  }
   431  
   432  func GetMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View {
   433  	gView.mu.Lock()
   434  	defer gView.mu.Unlock()
   435  	if len(gView.content) == 0 {
   436  		gView.content = make(map[string]*table.View)
   437  	}
   438  	view, exist := gView.content[tbl]
   439  	if !exist {
   440  		view = NewMetricViewWithLabels(ctx, tbl, lbls)
   441  		gView.content[tbl] = view
   442  	}
   443  	return view
   444  }
   445  
   446  // GetSchemaForAccount return account's table, and view's schema
   447  func GetSchemaForAccount(ctx context.Context, account string) []string {
   448  	var sqls = make([]string, 0, 1)
   449  	tbl := SingleMetricTable.Clone()
   450  	tbl.Account = account
   451  	sqls = append(sqls, tbl.ToCreateSql(ctx, true))
   452  
   453  	descChan := make(chan *prom.Desc, 10)
   454  	go func() {
   455  		for _, c := range initCollectors {
   456  			c.Describe(descChan)
   457  		}
   458  		close(descChan)
   459  	}()
   460  
   461  	for desc := range descChan {
   462  		view := getView(ctx, desc)
   463  
   464  		if view.SupportUserAccess && view.OriginTable.SupportUserAccess {
   465  			sqls = append(sqls, view.ToCreateSql(ctx, true))
   466  		}
   467  	}
   468  	return sqls
   469  }
   470  
   471  func init() {
   472  	if table.RegisterTableDefine(SingleMetricTable) != nil {
   473  		panic(moerr.NewInternalError(context.Background(), "metric table already registered"))
   474  	}
   475  }