
     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    15  package mometric
    17  import (
    18  	"context"
    19  	"fmt"
    20  	""
    21  	""
    22  	"net/http"
    23  	"strings"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    28  	""
    29  	""
    30  	""
    31  	""
    32  	""
    33  	""
    34  	ie ""
    35  	""
    36  	""
    37  	v2 ""
    38  	prom ""
    39  	""
    40  	dto ""
    41  )
    43  const (
    44  	MetricDBConst    = metric.MetricDBConst
    45  	SqlCreateDBConst = "create database if not exists " + MetricDBConst
    46  	SqlDropDBConst   = "drop database if exists " + MetricDBConst
    47  	ALL_IN_ONE_MODE  = "monolithic"
    48  )
    50  type CtxServiceType string
    52  const ServiceTypeKey CtxServiceType = "ServiceTypeKey"
    53  const LaunchMode = "ALL"
    55  type statusServer struct {
    56  	*http.Server
    57  	sync.WaitGroup
    58  }
    60  var registry *prom.Registry
    61  var moExporter metric.MetricExporter
    62  var moCollector MetricCollector
    63  var statsLogWriter *StatsLogWriter
    64  var statusSvr *statusServer
    66  // internalRegistry is the registry for metric.InternalCollectors, cooperated with internalExporter.
    67  var internalRegistry *prom.Registry
    68  var internalExporter metric.MetricExporter
    70  var enable bool
    71  var inited uint32
    73  func InitMetric(ctx context.Context, ieFactory func() ie.InternalExecutor, SV *config.ObservabilityParameters, nodeUUID, role string, opts ...InitOption) (act bool) {
    74  	// fix multi-init in standalone
    75  	if !atomic.CompareAndSwapUint32(&inited, 0, 1) {
    76  		return false
    77  	}
    78  	var initOpts InitOptions
    79  	opts = append(opts,
    80  		withExportInterval(SV.MetricExportInterval),
    81  		withUpdateInterval(SV.MetricStorageUsageUpdateInterval.Duration),
    82  		withCheckNewInterval(SV.MetricStorageUsageCheckNewInterval.Duration),
    83  		WithInternalGatherInterval(SV.MetricInternalGatherInterval.Duration),
    84  	)
    85  	for _, opt := range opts {
    86  		opt.ApplyTo(&initOpts)
    87  	}
    88  	// init global variables
    89  	initConfigByParameterUnit(SV)
    90  	registry = prom.NewRegistry()
    91  	if initOpts.writerFactory != nil {
    92  		moCollector = newMetricFSCollector(initOpts.writerFactory, WithFlushInterval(initOpts.exportInterval))
    93  	} else {
    94  		moCollector = newMetricCollector(ieFactory, WithFlushInterval(initOpts.exportInterval))
    95  	}
    96  	moExporter = newMetricExporter(registry, moCollector, nodeUUID, role, WithGatherInterval(metric.GetGatherInterval()))
    97  	internalRegistry = prom.NewRegistry()
    98  	internalExporter = newMetricExporter(internalRegistry, moCollector, nodeUUID, role, WithGatherInterval(initOpts.internalGatherInterval))
    99  	statsLogWriter = newStatsLogWriter(stats.DefaultRegistry, runtime.ProcessLevelRuntime().Logger().Named("StatsLog"), metric.GetStatsGatherInterval())
   101  	// register metrics and create tables
   102  	registerAllMetrics()
   103  	if initOpts.needInitTable {
   104  		initTables(ctx, ieFactory)
   105  	}
   107  	// start the data flow
   108  	if !SV.DisableMetric {
   109  		serviceCtx := context.WithValue(context.Background(), ServiceTypeKey, role)
   110  		moCollector.Start(serviceCtx)
   111  		moExporter.Start(serviceCtx)
   112  		internalExporter.Start(serviceCtx)
   113  		statsLogWriter.Start(serviceCtx)
   114  		metric.SetMetricExporter(moExporter)
   115  	}
   117  	if metric.EnableExportToProm() {
   118  		// http.HandleFunc("/query", makeDebugHandleFunc(ieFactory))
   119  		mux := http.NewServeMux()
   120  		mux.Handle("/metrics", promhttp.HandlerFor(v2.GetPrometheusGatherer(), promhttp.HandlerOpts{}))
   121  		addr := fmt.Sprintf(":%d", SV.StatusPort)
   122  		statusSvr = &statusServer{Server: &http.Server{Addr: addr, Handler: mux}}
   123  		statusSvr.Add(1)
   124  		go func() {
   125  			defer statusSvr.Done()
   126  			if err := statusSvr.ListenAndServe(); err != http.ErrServerClosed {
   127  				panic(fmt.Sprintf("status server error: %v", err))
   128  			}
   129  		}()
   131  		startCrossServicesMetricsTask(ctx)
   133  		logutil.Debugf("[Metric] metrics scrape endpoint is ready at http://%s/metrics", addr)
   134  	}
   136  	enable = true
   137  	SetUpdateStorageUsageInterval(initOpts.updateInterval)
   138  	SetStorageUsageCheckNewInterval(initOpts.checkNewInterval)
   139  	logutil.Debugf("metric with ExportInterval: %v", initOpts.exportInterval)
   140  	logutil.Debugf("metric with UpdateStorageUsageInterval: %v", initOpts.updateInterval)
   141  	return true
   142  }
   144  // this cron task can gather some service level metrics,
   145  func startCrossServicesMetricsTask(ctx context.Context) {
   146  	go func() {
   147  		logutil.Info("cross service metrics task started")
   148  		defer logutil.Info("cross service metrics task exiting")
   150  		timer := time.NewTicker(time.Second * 5)
   151  		for {
   152  			select {
   153  			case <-ctx.Done():
   154  				return
   155  			case <-timer.C:
   156  				mpoolRelatedMetrics()
   157  			}
   158  		}
   159  	}()
   160  }
   162  func mpoolRelatedMetrics() {
   163  	v2.MemTotalCrossPoolFreeCounter.Add(float64(mpool.TotalCrossPoolFreeCounter()))
   165  	v2.MemGlobalStatsAllocatedGauge.Set(float64(mpool.GlobalStats().NumCurrBytes.Load()))
   166  	v2.MemGlobalStatsHighWaterMarkGauge.Set(float64(mpool.GlobalStats().HighWaterMark.Load()))
   167  }
   169  func IsEnable() bool {
   170  	return enable
   171  }
   173  func StopMetricSync() {
   174  	if !atomic.CompareAndSwapUint32(&inited, 1, 0) {
   175  		return
   176  	}
   177  	if moCollector != nil {
   178  		if ch, effect := moCollector.Stop(true); effect {
   179  			<-ch
   180  		}
   181  		moCollector = nil
   182  	}
   183  	if moExporter != nil {
   184  		if ch, effect := moExporter.Stop(true); effect {
   185  			<-ch
   186  		}
   187  		moExporter = nil
   188  	}
   189  	if internalExporter != nil {
   190  		if ch, effect := internalExporter.Stop(true); effect {
   191  			<-ch
   192  		}
   193  		internalExporter = nil
   194  	}
   195  	if statsLogWriter != nil {
   196  		if ch, effect := statsLogWriter.Stop(true); effect {
   197  			<-ch
   198  		}
   199  		statsLogWriter = nil
   200  	}
   201  	if statusSvr != nil {
   202  		_ = statusSvr.Shutdown(context.TODO())
   203  		statusSvr = nil
   204  	}
   205  	logutil.Info("Shutdown metric complete.")
   206  }
   208  func mustRegiterToProm(collector prom.Collector) {
   209  	if err := v2.GetPrometheusRegistry().Register(collector); err != nil {
   210  		// err is either registering a collector more than once or metrics have duplicate description.
   211  		// in any case, we respect the existing collectors in the prom registry
   212  		logutil.Debugf("[Metric] register to prom register: %v", err)
   213  	}
   214  }
   216  func mustRegister(reg *prom.Registry, collector metric.Collector) {
   217  	reg.MustRegister(collector)
   218  	if metric.EnableExportToProm() {
   219  		mustRegiterToProm(collector.CollectorToProm())
   220  	} else {
   221  		collector.CancelToProm()
   222  	}
   223  }
   225  // register all defined collector here
   226  func registerAllMetrics() {
   227  	for _, c := range metric.InitCollectors {
   228  		mustRegister(registry, c)
   229  	}
   230  	for _, c := range metric.InternalCollectors {
   231  		mustRegister(internalRegistry, c)
   232  	}
   233  }
   235  func initConfigByParameterUnit(SV *config.ObservabilityParameters) {
   236  	metric.SetExportToProm(SV.EnableMetricToProm)
   237  	metric.SetGatherInterval(time.Second * time.Duration(SV.MetricGatherInterval))
   238  }
   240  func InitSchema(ctx context.Context, txn executor.TxnExecutor) error {
   241  	if metric.GetForceInit() {
   242  		if _, err := txn.Exec(SqlDropDBConst, executor.StatementOption{}); err != nil {
   243  			return err
   244  		}
   245  	}
   247  	if _, err := txn.Exec(SqlCreateDBConst, executor.StatementOption{}); err != nil {
   248  		return err
   249  	}
   251  	var createCost time.Duration
   252  	defer func() {
   253  		logutil.Debugf("[Metric] init metrics tables: create cost %d ms", createCost.Milliseconds())
   254  	}()
   256  	instant := time.Now()
   257  	descChan := make(chan *prom.Desc, 10)
   258  	go func() {
   259  		for _, c := range metric.InitCollectors {
   260  			c.Describe(descChan)
   261  		}
   262  		for _, c := range metric.InternalCollectors {
   263  			c.Describe(descChan)
   264  		}
   265  		close(descChan)
   266  	}()
   268  	createSql := SingleMetricTable.ToCreateSql(ctx, true)
   269  	if _, err := txn.Exec(createSql, executor.StatementOption{}); err != nil {
   270  		//panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql))
   271  		return moerr.NewInternalError(ctx, "[Metric] init metric tables error: %v, sql: %s", err, createSql)
   272  	}
   274  	createSql = SqlStatementCUTable.ToCreateSql(ctx, true)
   275  	if _, err := txn.Exec(createSql, executor.StatementOption{}); err != nil {
   276  		//panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql))
   277  		return moerr.NewInternalError(ctx, "[Metric] init metric tables error: %v, sql: %s", err, createSql)
   278  	}
   280  	for desc := range descChan {
   281  		view := getView(ctx, desc)
   282  		sql := view.ToCreateSql(ctx, true)
   283  		if _, err := txn.Exec(sql, executor.StatementOption{}); err != nil {
   284  			return moerr.NewInternalError(ctx, "[Metric] init metric tables error: %v, sql: %s", err, sql)
   285  		}
   286  	}
   287  	createCost = time.Since(instant)
   288  	return nil
   289  }
   291  // initTables gathers all metrics and extract metadata to format create table sql
   292  func initTables(ctx context.Context, ieFactory func() ie.InternalExecutor) {
   293  	exec := ieFactory()
   294  	exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish())
   295  	mustExec := func(sql string) {
   296  		if err := exec.Exec(ctx, sql, ie.NewOptsBuilder().Finish()); err != nil {
   297  			panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql))
   298  		}
   299  	}
   300  	if metric.GetForceInit() {
   301  		mustExec(SqlDropDBConst)
   302  	}
   303  	mustExec(SqlCreateDBConst)
   304  	var createCost time.Duration
   305  	defer func() {
   306  		logutil.Debugf(
   307  			"[Metric] init metrics tables: create cost %d ms",
   308  			createCost.Milliseconds())
   309  	}()
   310  	instant := time.Now()
   312  	descChan := make(chan *prom.Desc, 10)
   314  	go func() {
   315  		for _, c := range metric.InitCollectors {
   316  			c.Describe(descChan)
   317  		}
   318  		for _, c := range metric.InternalCollectors {
   319  			c.Describe(descChan)
   320  		}
   321  		close(descChan)
   322  	}()
   324  	mustExec(SingleMetricTable.ToCreateSql(ctx, true))
   325  	mustExec(SqlStatementCUTable.ToCreateSql(ctx, true))
   326  	for desc := range descChan {
   327  		view := getView(ctx, desc)
   328  		sql := view.ToCreateSql(ctx, true)
   329  		mustExec(sql)
   330  	}
   332  	createCost = time.Since(instant)
   333  }
   335  func getView(ctx context.Context, desc *prom.Desc) *table.View {
   336  	extra := newDescExtra(desc)
   337  	var labelNames = make([]string, 0, len(extra.labels))
   338  	for _, lbl := range extra.labels {
   339  		labelNames = append(labelNames, lbl.GetName())
   340  	}
   341  	return GetMetricViewWithLabels(ctx, extra.fqName, labelNames)
   342  }
   344  type descExtra struct {
   345  	orig   *prom.Desc
   346  	fqName string
   347  	labels []*dto.LabelPair
   348  }
   350  // decode inner infomation of a prom.Desc
   351  func newDescExtra(desc *prom.Desc) *descExtra {
   352  	str := desc.String()[14:] // strip Desc{fqName: "
   353  	fqName := str[:strings.Index(str, "\"")]
   354  	str = str[strings.Index(str, "variableLabels: {")+17:] // spot varlbl list
   355  	str = str[:strings.Index(str, "}")]
   356  	varLblCnt := len(strings.Split(str, ","))
   357  	labels := prom.MakeLabelPairs(desc, make([]string, varLblCnt))
   358  	return &descExtra{orig: desc, fqName: fqName, labels: labels}
   359  }
   361  type InitOptions struct {
   362  	writerFactory table.WriterFactory // see WithWriterFactory
   363  	// needInitTable control to do the initTables
   364  	// Deprecated: use InitSchema instead.
   365  	needInitTable bool // see WithInitAction
   366  	// exportInterval
   367  	exportInterval time.Duration // see withExportInterval
   368  	// updateInterval, update StorageUsage interval
   369  	// set by withUpdateInterval
   370  	updateInterval time.Duration
   371  	// checkNewAccountInterval, check new account Internal to collect new account for metric StorageUsage
   372  	// set by withCheckNewInterval
   373  	checkNewInterval time.Duration
   374  	// internalGatherInterval, handle metric.SubSystemMO gather interval
   375  	internalGatherInterval time.Duration
   376  }
   378  type InitOption func(*InitOptions)
   380  func (f InitOption) ApplyTo(opts *InitOptions) {
   381  	f(opts)
   382  }
   384  func WithWriterFactory(factory table.WriterFactory) InitOption {
   385  	return InitOption(func(options *InitOptions) {
   386  		options.writerFactory = factory
   387  	})
   388  }
   390  // Deprecated: Use InitSchema instead.
   391  func WithInitAction(init bool) InitOption {
   392  	return InitOption(func(options *InitOptions) {
   393  		options.needInitTable = init
   394  	})
   395  }
   397  func withExportInterval(sec int) InitOption {
   398  	return InitOption(func(options *InitOptions) {
   399  		options.exportInterval = time.Second * time.Duration(sec)
   400  	})
   401  }
   403  func withUpdateInterval(interval time.Duration) InitOption {
   404  	return InitOption(func(opts *InitOptions) {
   405  		opts.updateInterval = interval
   406  	})
   407  }
   409  func withCheckNewInterval(interval time.Duration) InitOption {
   410  	return InitOption(func(opts *InitOptions) {
   411  		opts.checkNewInterval = interval
   412  	})
   413  }
   415  func WithInternalGatherInterval(interval time.Duration) InitOption {
   416  	return InitOption(func(options *InitOptions) {
   417  		options.internalGatherInterval = interval
   418  	})
   419  }
   421  var (
   422  	metricNameColumn        = table.StringDefaultColumn(`metric_name`, `sys`, `metric name, like: sql_statement_total, server_connections, process_cpu_percent, sys_memory_used, ...`)
   423  	metricCollectTimeColumn = table.DatetimeColumn(`collecttime`, `metric data collect time`)
   424  	metricValueColumn       = table.ValueColumn(`value`, `metric value`)
   425  	metricNodeColumn        = table.StringDefaultColumn(`node`, ALL_IN_ONE_MODE, `mo node uuid`)
   426  	metricRoleColumn        = table.StringDefaultColumn(`role`, ALL_IN_ONE_MODE, `mo node role, like: CN, DN, LOG`)
   427  	metricAccountColumn     = table.StringDefaultColumn(`account`, `sys`, `account name`)
   428  	metricTypeColumn        = table.StringColumn(`type`, `sql type, like: insert, select, ...`)
   430  	sqlSourceTypeColumn = table.StringColumn(`sql_source_type`, `sql_source_type, val like: external_sql, cloud_nonuser_sql, cloud_user_sql, internal_sql, ...`)
   431  )
   433  var SingleMetricTable = &table.Table{
   434  	Account:          table.AccountSys,
   435  	Database:         MetricDBConst,
   436  	Table:            `metric`,
   437  	Columns:          []table.Column{metricNameColumn, metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn, metricAccountColumn, metricTypeColumn},
   438  	PrimaryKeyColumn: []table.Column{},
   439  	ClusterBy:        []table.Column{metricCollectTimeColumn, metricNameColumn, metricAccountColumn},
   440  	Engine:           table.NormalTableEngine,
   441  	Comment:          `metric data`,
   442  	PathBuilder:      table.NewAccountDatePathBuilder(),
   443  	AccountColumn:    &metricAccountColumn,
   444  	// TimestampColumn
   445  	TimestampColumn: &metricCollectTimeColumn,
   446  	// SupportUserAccess
   447  	SupportUserAccess: true,
   448  	// SupportConstAccess
   449  	SupportConstAccess: true,
   450  }
   452  var SqlStatementCUTable = &table.Table{
   453  	Account:          table.AccountSys,
   454  	Database:         MetricDBConst,
   455  	Table:            catalog.MO_SQL_STMT_CU,
   456  	Columns:          []table.Column{metricAccountColumn, metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn, sqlSourceTypeColumn},
   457  	PrimaryKeyColumn: []table.Column{},
   458  	ClusterBy:        []table.Column{metricAccountColumn, metricCollectTimeColumn},
   459  	Engine:           table.NormalTableEngine,
   460  	Comment:          `sql_statement_cu metric data`,
   461  	PathBuilder:      table.NewAccountDatePathBuilder(),
   462  	AccountColumn:    &metricAccountColumn,
   463  	// TimestampColumn
   464  	TimestampColumn: &metricCollectTimeColumn,
   465  	// SupportUserAccess
   466  	SupportUserAccess: true,
   467  	// SupportConstAccess
   468  	SupportConstAccess: true,
   469  }
   471  // GetAllTables
   472  //
   473  // Deprecated: use table.GetAllTables() instead.
   474  func GetAllTables() []*table.Table {
   475  	return []*table.Table{SingleMetricTable, SqlStatementCUTable}
   476  }
   478  func NewMetricView(tbl string, opts ...table.ViewOption) *table.View {
   479  	view := &table.View{
   480  		Database:    MetricDBConst,
   481  		Table:       tbl,
   482  		OriginTable: SingleMetricTable,
   483  		Columns:     []table.Column{metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn},
   484  		Condition:   &table.ViewSingleCondition{Column: metricNameColumn, Table: tbl},
   485  	}
   486  	for _, opt := range opts {
   487  		opt.Apply(view)
   488  	}
   489  	return view
   490  }
   492  func NewMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View {
   493  	var options []table.ViewOption
   494  	// check SubSystem
   495  	var subSystem *metric.SubSystem = nil
   496  	for _, ss := range metric.AllSubSystem {
   497  		if strings.Index(tbl, ss.Name) == 0 {
   498  			subSystem = ss
   499  			break
   500  		}
   501  	}
   502  	if subSystem == nil {
   503  		panic(moerr.NewNotSupported(ctx, "metric unknown SubSystem: %s", tbl))
   504  	}
   505  	options = append(options, table.SupportUserAccess(subSystem.SupportUserAccess))
   506  	// construct columns
   507  	for _, label := range lbls {
   508  		for _, col := range SingleMetricTable.Columns {
   509  			if strings.EqualFold(label, col.Name) {
   510  				options = append(options, table.WithColumn(col))
   511  			}
   512  		}
   513  	}
   514  	return NewMetricView(tbl, options...)
   515  }
   517  var gView struct {
   518  	content map[string]*table.View
   519  	mu      sync.Mutex
   520  }
   522  func GetMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View {
   524  	defer
   525  	if len(gView.content) == 0 {
   526  		gView.content = make(map[string]*table.View)
   527  	}
   528  	view, exist := gView.content[tbl]
   529  	if !exist {
   530  		view = NewMetricViewWithLabels(ctx, tbl, lbls)
   531  		gView.content[tbl] = view
   532  	}
   533  	return view
   534  }
   536  // GetSchemaForAccount return account's table, and view's schema
   537  func GetSchemaForAccount(ctx context.Context, account string) []string {
   538  	var sqls = make([]string, 0, 1)
   539  	tbl := SingleMetricTable.Clone()
   540  	tbl.Account = account
   541  	sqls = append(sqls, tbl.ToCreateSql(ctx, true))
   542  	tbl = SqlStatementCUTable.Clone()
   543  	tbl.Account = account
   544  	sqls = append(sqls, tbl.ToCreateSql(ctx, true))
   546  	descChan := make(chan *prom.Desc, 10)
   547  	go func() {
   548  		for _, c := range metric.InitCollectors {
   549  			c.Describe(descChan)
   550  		}
   551  		close(descChan)
   552  	}()
   554  	for desc := range descChan {
   555  		view := getView(ctx, desc)
   557  		if view.SupportUserAccess && view.OriginTable.SupportUserAccess {
   558  			sqls = append(sqls, view.ToCreateSql(ctx, true))
   559  		}
   560  	}
   561  	return sqls
   562  }
   564  func init() {
   565  	if table.RegisterTableDefine(SingleMetricTable) != nil {
   566  		panic(moerr.NewInternalError(context.Background(), "metric table already registered"))
   567  	}
   568  	if table.RegisterTableDefine(SqlStatementCUTable) != nil {
   569  		panic(moerr.NewInternalError(context.Background(), "metric table 'sql_statement_cu' already registered"))
   570  	}
   571  }