github.com/matrixorigin/matrixone@v1.2.0/pkg/util/metric/mometric/metric.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mometric 16 17 import ( 18 "context" 19 "fmt" 20 "github.com/matrixorigin/matrixone/pkg/catalog" 21 "github.com/matrixorigin/matrixone/pkg/util/executor" 22 "net/http" 23 "strings" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "github.com/matrixorigin/matrixone/pkg/common/moerr" 29 "github.com/matrixorigin/matrixone/pkg/common/mpool" 30 "github.com/matrixorigin/matrixone/pkg/common/runtime" 31 "github.com/matrixorigin/matrixone/pkg/config" 32 "github.com/matrixorigin/matrixone/pkg/logutil" 33 "github.com/matrixorigin/matrixone/pkg/util/export/table" 34 ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor" 35 "github.com/matrixorigin/matrixone/pkg/util/metric" 36 "github.com/matrixorigin/matrixone/pkg/util/metric/stats" 37 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 38 prom "github.com/prometheus/client_golang/prometheus" 39 "github.com/prometheus/client_golang/prometheus/promhttp" 40 dto "github.com/prometheus/client_model/go" 41 ) 42 43 const ( 44 MetricDBConst = metric.MetricDBConst 45 SqlCreateDBConst = "create database if not exists " + MetricDBConst 46 SqlDropDBConst = "drop database if exists " + MetricDBConst 47 ALL_IN_ONE_MODE = "monolithic" 48 ) 49 50 type CtxServiceType string 51 52 const ServiceTypeKey CtxServiceType = "ServiceTypeKey" 53 const LaunchMode = "ALL" 54 55 type statusServer struct { 56 *http.Server 57 sync.WaitGroup 58 } 59 60 var registry *prom.Registry 61 var moExporter metric.MetricExporter 62 var moCollector MetricCollector 63 var statsLogWriter *StatsLogWriter 64 var statusSvr *statusServer 65 66 // internalRegistry is the registry for metric.InternalCollectors, cooperated with internalExporter. 67 var internalRegistry *prom.Registry 68 var internalExporter metric.MetricExporter 69 70 var enable bool 71 var inited uint32 72 73 func InitMetric(ctx context.Context, ieFactory func() ie.InternalExecutor, SV *config.ObservabilityParameters, nodeUUID, role string, opts ...InitOption) (act bool) { 74 // fix multi-init in standalone 75 if !atomic.CompareAndSwapUint32(&inited, 0, 1) { 76 return false 77 } 78 var initOpts InitOptions 79 opts = append(opts, 80 withExportInterval(SV.MetricExportInterval), 81 withUpdateInterval(SV.MetricStorageUsageUpdateInterval.Duration), 82 withCheckNewInterval(SV.MetricStorageUsageCheckNewInterval.Duration), 83 WithInternalGatherInterval(SV.MetricInternalGatherInterval.Duration), 84 ) 85 for _, opt := range opts { 86 opt.ApplyTo(&initOpts) 87 } 88 // init global variables 89 initConfigByParameterUnit(SV) 90 registry = prom.NewRegistry() 91 if initOpts.writerFactory != nil { 92 moCollector = newMetricFSCollector(initOpts.writerFactory, WithFlushInterval(initOpts.exportInterval)) 93 } else { 94 moCollector = newMetricCollector(ieFactory, WithFlushInterval(initOpts.exportInterval)) 95 } 96 moExporter = newMetricExporter(registry, moCollector, nodeUUID, role, WithGatherInterval(metric.GetGatherInterval())) 97 internalRegistry = prom.NewRegistry() 98 internalExporter = newMetricExporter(internalRegistry, moCollector, nodeUUID, role, WithGatherInterval(initOpts.internalGatherInterval)) 99 statsLogWriter = newStatsLogWriter(stats.DefaultRegistry, runtime.ProcessLevelRuntime().Logger().Named("StatsLog"), metric.GetStatsGatherInterval()) 100 101 // register metrics and create tables 102 registerAllMetrics() 103 if initOpts.needInitTable { 104 initTables(ctx, ieFactory) 105 } 106 107 // start the data flow 108 if !SV.DisableMetric { 109 serviceCtx := context.WithValue(context.Background(), ServiceTypeKey, role) 110 moCollector.Start(serviceCtx) 111 moExporter.Start(serviceCtx) 112 internalExporter.Start(serviceCtx) 113 statsLogWriter.Start(serviceCtx) 114 metric.SetMetricExporter(moExporter) 115 } 116 117 if metric.EnableExportToProm() { 118 // http.HandleFunc("/query", makeDebugHandleFunc(ieFactory)) 119 mux := http.NewServeMux() 120 mux.Handle("/metrics", promhttp.HandlerFor(v2.GetPrometheusGatherer(), promhttp.HandlerOpts{})) 121 addr := fmt.Sprintf(":%d", SV.StatusPort) 122 statusSvr = &statusServer{Server: &http.Server{Addr: addr, Handler: mux}} 123 statusSvr.Add(1) 124 go func() { 125 defer statusSvr.Done() 126 if err := statusSvr.ListenAndServe(); err != http.ErrServerClosed { 127 panic(fmt.Sprintf("status server error: %v", err)) 128 } 129 }() 130 131 startCrossServicesMetricsTask(ctx) 132 133 logutil.Debugf("[Metric] metrics scrape endpoint is ready at http://%s/metrics", addr) 134 } 135 136 enable = true 137 SetUpdateStorageUsageInterval(initOpts.updateInterval) 138 SetStorageUsageCheckNewInterval(initOpts.checkNewInterval) 139 logutil.Debugf("metric with ExportInterval: %v", initOpts.exportInterval) 140 logutil.Debugf("metric with UpdateStorageUsageInterval: %v", initOpts.updateInterval) 141 return true 142 } 143 144 // this cron task can gather some service level metrics, 145 func startCrossServicesMetricsTask(ctx context.Context) { 146 go func() { 147 logutil.Info("cross service metrics task started") 148 defer logutil.Info("cross service metrics task exiting") 149 150 timer := time.NewTicker(time.Second * 5) 151 for { 152 select { 153 case <-ctx.Done(): 154 return 155 case <-timer.C: 156 mpoolRelatedMetrics() 157 } 158 } 159 }() 160 } 161 162 func mpoolRelatedMetrics() { 163 v2.MemTotalCrossPoolFreeCounter.Add(float64(mpool.TotalCrossPoolFreeCounter())) 164 165 v2.MemGlobalStatsAllocatedGauge.Set(float64(mpool.GlobalStats().NumCurrBytes.Load())) 166 v2.MemGlobalStatsHighWaterMarkGauge.Set(float64(mpool.GlobalStats().HighWaterMark.Load())) 167 } 168 169 func IsEnable() bool { 170 return enable 171 } 172 173 func StopMetricSync() { 174 if !atomic.CompareAndSwapUint32(&inited, 1, 0) { 175 return 176 } 177 if moCollector != nil { 178 if ch, effect := moCollector.Stop(true); effect { 179 <-ch 180 } 181 moCollector = nil 182 } 183 if moExporter != nil { 184 if ch, effect := moExporter.Stop(true); effect { 185 <-ch 186 } 187 moExporter = nil 188 } 189 if internalExporter != nil { 190 if ch, effect := internalExporter.Stop(true); effect { 191 <-ch 192 } 193 internalExporter = nil 194 } 195 if statsLogWriter != nil { 196 if ch, effect := statsLogWriter.Stop(true); effect { 197 <-ch 198 } 199 statsLogWriter = nil 200 } 201 if statusSvr != nil { 202 _ = statusSvr.Shutdown(context.TODO()) 203 statusSvr = nil 204 } 205 logutil.Info("Shutdown metric complete.") 206 } 207 208 func mustRegiterToProm(collector prom.Collector) { 209 if err := v2.GetPrometheusRegistry().Register(collector); err != nil { 210 // err is either registering a collector more than once or metrics have duplicate description. 211 // in any case, we respect the existing collectors in the prom registry 212 logutil.Debugf("[Metric] register to prom register: %v", err) 213 } 214 } 215 216 func mustRegister(reg *prom.Registry, collector metric.Collector) { 217 reg.MustRegister(collector) 218 if metric.EnableExportToProm() { 219 mustRegiterToProm(collector.CollectorToProm()) 220 } else { 221 collector.CancelToProm() 222 } 223 } 224 225 // register all defined collector here 226 func registerAllMetrics() { 227 for _, c := range metric.InitCollectors { 228 mustRegister(registry, c) 229 } 230 for _, c := range metric.InternalCollectors { 231 mustRegister(internalRegistry, c) 232 } 233 } 234 235 func initConfigByParameterUnit(SV *config.ObservabilityParameters) { 236 metric.SetExportToProm(SV.EnableMetricToProm) 237 metric.SetGatherInterval(time.Second * time.Duration(SV.MetricGatherInterval)) 238 } 239 240 func InitSchema(ctx context.Context, txn executor.TxnExecutor) error { 241 if metric.GetForceInit() { 242 if _, err := txn.Exec(SqlDropDBConst, executor.StatementOption{}); err != nil { 243 return err 244 } 245 } 246 247 if _, err := txn.Exec(SqlCreateDBConst, executor.StatementOption{}); err != nil { 248 return err 249 } 250 251 var createCost time.Duration 252 defer func() { 253 logutil.Debugf("[Metric] init metrics tables: create cost %d ms", createCost.Milliseconds()) 254 }() 255 256 instant := time.Now() 257 descChan := make(chan *prom.Desc, 10) 258 go func() { 259 for _, c := range metric.InitCollectors { 260 c.Describe(descChan) 261 } 262 for _, c := range metric.InternalCollectors { 263 c.Describe(descChan) 264 } 265 close(descChan) 266 }() 267 268 createSql := SingleMetricTable.ToCreateSql(ctx, true) 269 if _, err := txn.Exec(createSql, executor.StatementOption{}); err != nil { 270 //panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql)) 271 return moerr.NewInternalError(ctx, "[Metric] init metric tables error: %v, sql: %s", err, createSql) 272 } 273 274 createSql = SqlStatementCUTable.ToCreateSql(ctx, true) 275 if _, err := txn.Exec(createSql, executor.StatementOption{}); err != nil { 276 //panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql)) 277 return moerr.NewInternalError(ctx, "[Metric] init metric tables error: %v, sql: %s", err, createSql) 278 } 279 280 for desc := range descChan { 281 view := getView(ctx, desc) 282 sql := view.ToCreateSql(ctx, true) 283 if _, err := txn.Exec(sql, executor.StatementOption{}); err != nil { 284 return moerr.NewInternalError(ctx, "[Metric] init metric tables error: %v, sql: %s", err, sql) 285 } 286 } 287 createCost = time.Since(instant) 288 return nil 289 } 290 291 // initTables gathers all metrics and extract metadata to format create table sql 292 func initTables(ctx context.Context, ieFactory func() ie.InternalExecutor) { 293 exec := ieFactory() 294 exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish()) 295 mustExec := func(sql string) { 296 if err := exec.Exec(ctx, sql, ie.NewOptsBuilder().Finish()); err != nil { 297 panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql)) 298 } 299 } 300 if metric.GetForceInit() { 301 mustExec(SqlDropDBConst) 302 } 303 mustExec(SqlCreateDBConst) 304 var createCost time.Duration 305 defer func() { 306 logutil.Debugf( 307 "[Metric] init metrics tables: create cost %d ms", 308 createCost.Milliseconds()) 309 }() 310 instant := time.Now() 311 312 descChan := make(chan *prom.Desc, 10) 313 314 go func() { 315 for _, c := range metric.InitCollectors { 316 c.Describe(descChan) 317 } 318 for _, c := range metric.InternalCollectors { 319 c.Describe(descChan) 320 } 321 close(descChan) 322 }() 323 324 mustExec(SingleMetricTable.ToCreateSql(ctx, true)) 325 mustExec(SqlStatementCUTable.ToCreateSql(ctx, true)) 326 for desc := range descChan { 327 view := getView(ctx, desc) 328 sql := view.ToCreateSql(ctx, true) 329 mustExec(sql) 330 } 331 332 createCost = time.Since(instant) 333 } 334 335 func getView(ctx context.Context, desc *prom.Desc) *table.View { 336 extra := newDescExtra(desc) 337 var labelNames = make([]string, 0, len(extra.labels)) 338 for _, lbl := range extra.labels { 339 labelNames = append(labelNames, lbl.GetName()) 340 } 341 return GetMetricViewWithLabels(ctx, extra.fqName, labelNames) 342 } 343 344 type descExtra struct { 345 orig *prom.Desc 346 fqName string 347 labels []*dto.LabelPair 348 } 349 350 // decode inner infomation of a prom.Desc 351 func newDescExtra(desc *prom.Desc) *descExtra { 352 str := desc.String()[14:] // strip Desc{fqName: " 353 fqName := str[:strings.Index(str, "\"")] 354 str = str[strings.Index(str, "variableLabels: {")+17:] // spot varlbl list 355 str = str[:strings.Index(str, "}")] 356 varLblCnt := len(strings.Split(str, ",")) 357 labels := prom.MakeLabelPairs(desc, make([]string, varLblCnt)) 358 return &descExtra{orig: desc, fqName: fqName, labels: labels} 359 } 360 361 type InitOptions struct { 362 writerFactory table.WriterFactory // see WithWriterFactory 363 // needInitTable control to do the initTables 364 // Deprecated: use InitSchema instead. 365 needInitTable bool // see WithInitAction 366 // exportInterval 367 exportInterval time.Duration // see withExportInterval 368 // updateInterval, update StorageUsage interval 369 // set by withUpdateInterval 370 updateInterval time.Duration 371 // checkNewAccountInterval, check new account Internal to collect new account for metric StorageUsage 372 // set by withCheckNewInterval 373 checkNewInterval time.Duration 374 // internalGatherInterval, handle metric.SubSystemMO gather interval 375 internalGatherInterval time.Duration 376 } 377 378 type InitOption func(*InitOptions) 379 380 func (f InitOption) ApplyTo(opts *InitOptions) { 381 f(opts) 382 } 383 384 func WithWriterFactory(factory table.WriterFactory) InitOption { 385 return InitOption(func(options *InitOptions) { 386 options.writerFactory = factory 387 }) 388 } 389 390 // Deprecated: Use InitSchema instead. 391 func WithInitAction(init bool) InitOption { 392 return InitOption(func(options *InitOptions) { 393 options.needInitTable = init 394 }) 395 } 396 397 func withExportInterval(sec int) InitOption { 398 return InitOption(func(options *InitOptions) { 399 options.exportInterval = time.Second * time.Duration(sec) 400 }) 401 } 402 403 func withUpdateInterval(interval time.Duration) InitOption { 404 return InitOption(func(opts *InitOptions) { 405 opts.updateInterval = interval 406 }) 407 } 408 409 func withCheckNewInterval(interval time.Duration) InitOption { 410 return InitOption(func(opts *InitOptions) { 411 opts.checkNewInterval = interval 412 }) 413 } 414 415 func WithInternalGatherInterval(interval time.Duration) InitOption { 416 return InitOption(func(options *InitOptions) { 417 options.internalGatherInterval = interval 418 }) 419 } 420 421 var ( 422 metricNameColumn = table.StringDefaultColumn(`metric_name`, `sys`, `metric name, like: sql_statement_total, server_connections, process_cpu_percent, sys_memory_used, ...`) 423 metricCollectTimeColumn = table.DatetimeColumn(`collecttime`, `metric data collect time`) 424 metricValueColumn = table.ValueColumn(`value`, `metric value`) 425 metricNodeColumn = table.StringDefaultColumn(`node`, ALL_IN_ONE_MODE, `mo node uuid`) 426 metricRoleColumn = table.StringDefaultColumn(`role`, ALL_IN_ONE_MODE, `mo node role, like: CN, DN, LOG`) 427 metricAccountColumn = table.StringDefaultColumn(`account`, `sys`, `account name`) 428 metricTypeColumn = table.StringColumn(`type`, `sql type, like: insert, select, ...`) 429 430 sqlSourceTypeColumn = table.StringColumn(`sql_source_type`, `sql_source_type, val like: external_sql, cloud_nonuser_sql, cloud_user_sql, internal_sql, ...`) 431 ) 432 433 var SingleMetricTable = &table.Table{ 434 Account: table.AccountSys, 435 Database: MetricDBConst, 436 Table: `metric`, 437 Columns: []table.Column{metricNameColumn, metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn, metricAccountColumn, metricTypeColumn}, 438 PrimaryKeyColumn: []table.Column{}, 439 ClusterBy: []table.Column{metricCollectTimeColumn, metricNameColumn, metricAccountColumn}, 440 Engine: table.NormalTableEngine, 441 Comment: `metric data`, 442 PathBuilder: table.NewAccountDatePathBuilder(), 443 AccountColumn: &metricAccountColumn, 444 // TimestampColumn 445 TimestampColumn: &metricCollectTimeColumn, 446 // SupportUserAccess 447 SupportUserAccess: true, 448 // SupportConstAccess 449 SupportConstAccess: true, 450 } 451 452 var SqlStatementCUTable = &table.Table{ 453 Account: table.AccountSys, 454 Database: MetricDBConst, 455 Table: catalog.MO_SQL_STMT_CU, 456 Columns: []table.Column{metricAccountColumn, metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn, sqlSourceTypeColumn}, 457 PrimaryKeyColumn: []table.Column{}, 458 ClusterBy: []table.Column{metricAccountColumn, metricCollectTimeColumn}, 459 Engine: table.NormalTableEngine, 460 Comment: `sql_statement_cu metric data`, 461 PathBuilder: table.NewAccountDatePathBuilder(), 462 AccountColumn: &metricAccountColumn, 463 // TimestampColumn 464 TimestampColumn: &metricCollectTimeColumn, 465 // SupportUserAccess 466 SupportUserAccess: true, 467 // SupportConstAccess 468 SupportConstAccess: true, 469 } 470 471 // GetAllTables 472 // 473 // Deprecated: use table.GetAllTables() instead. 474 func GetAllTables() []*table.Table { 475 return []*table.Table{SingleMetricTable, SqlStatementCUTable} 476 } 477 478 func NewMetricView(tbl string, opts ...table.ViewOption) *table.View { 479 view := &table.View{ 480 Database: MetricDBConst, 481 Table: tbl, 482 OriginTable: SingleMetricTable, 483 Columns: []table.Column{metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn}, 484 Condition: &table.ViewSingleCondition{Column: metricNameColumn, Table: tbl}, 485 } 486 for _, opt := range opts { 487 opt.Apply(view) 488 } 489 return view 490 } 491 492 func NewMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View { 493 var options []table.ViewOption 494 // check SubSystem 495 var subSystem *metric.SubSystem = nil 496 for _, ss := range metric.AllSubSystem { 497 if strings.Index(tbl, ss.Name) == 0 { 498 subSystem = ss 499 break 500 } 501 } 502 if subSystem == nil { 503 panic(moerr.NewNotSupported(ctx, "metric unknown SubSystem: %s", tbl)) 504 } 505 options = append(options, table.SupportUserAccess(subSystem.SupportUserAccess)) 506 // construct columns 507 for _, label := range lbls { 508 for _, col := range SingleMetricTable.Columns { 509 if strings.EqualFold(label, col.Name) { 510 options = append(options, table.WithColumn(col)) 511 } 512 } 513 } 514 return NewMetricView(tbl, options...) 515 } 516 517 var gView struct { 518 content map[string]*table.View 519 mu sync.Mutex 520 } 521 522 func GetMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View { 523 gView.mu.Lock() 524 defer gView.mu.Unlock() 525 if len(gView.content) == 0 { 526 gView.content = make(map[string]*table.View) 527 } 528 view, exist := gView.content[tbl] 529 if !exist { 530 view = NewMetricViewWithLabels(ctx, tbl, lbls) 531 gView.content[tbl] = view 532 } 533 return view 534 } 535 536 // GetSchemaForAccount return account's table, and view's schema 537 func GetSchemaForAccount(ctx context.Context, account string) []string { 538 var sqls = make([]string, 0, 1) 539 tbl := SingleMetricTable.Clone() 540 tbl.Account = account 541 sqls = append(sqls, tbl.ToCreateSql(ctx, true)) 542 tbl = SqlStatementCUTable.Clone() 543 tbl.Account = account 544 sqls = append(sqls, tbl.ToCreateSql(ctx, true)) 545 546 descChan := make(chan *prom.Desc, 10) 547 go func() { 548 for _, c := range metric.InitCollectors { 549 c.Describe(descChan) 550 } 551 close(descChan) 552 }() 553 554 for desc := range descChan { 555 view := getView(ctx, desc) 556 557 if view.SupportUserAccess && view.OriginTable.SupportUserAccess { 558 sqls = append(sqls, view.ToCreateSql(ctx, true)) 559 } 560 } 561 return sqls 562 } 563 564 func init() { 565 if table.RegisterTableDefine(SingleMetricTable) != nil { 566 panic(moerr.NewInternalError(context.Background(), "metric table already registered")) 567 } 568 if table.RegisterTableDefine(SqlStatementCUTable) != nil { 569 panic(moerr.NewInternalError(context.Background(), "metric table 'sql_statement_cu' already registered")) 570 } 571 }