github.com/matrixorigin/matrixone@v0.7.0/pkg/util/metric/metric.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metric 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "net/http" 22 "strings" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/matrixorigin/matrixone/pkg/common/moerr" 28 "github.com/matrixorigin/matrixone/pkg/config" 29 "github.com/matrixorigin/matrixone/pkg/logutil" 30 "github.com/matrixorigin/matrixone/pkg/util/export/table" 31 ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor" 32 "github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace" 33 34 prom "github.com/prometheus/client_golang/prometheus" 35 "github.com/prometheus/client_golang/prometheus/promhttp" 36 dto "github.com/prometheus/client_model/go" 37 ) 38 39 const ( 40 MetricDBConst = "system_metrics" 41 sqlCreateDBConst = "create database if not exists " + MetricDBConst 42 sqlDropDBConst = "drop database if exists " + MetricDBConst 43 ALL_IN_ONE_MODE = "monolithic" 44 ) 45 46 var ( 47 lblNodeConst = "node" 48 lblRoleConst = "role" 49 lblValueConst = "value" 50 lblTimeConst = "collecttime" 51 occupiedLbls = map[string]struct{}{lblTimeConst: {}, lblValueConst: {}, lblNodeConst: {}, lblRoleConst: {}} 52 ) 53 54 type Collector interface { 55 prom.Collector 56 // cancelToProm remove the cost introduced by being compatible with prometheus 57 CancelToProm() 58 // collectorForProm returns a collector used in prometheus scrape registry 59 CollectorToProm() prom.Collector 60 } 61 62 type selfAsPromCollector struct { 63 self prom.Collector 64 } 65 66 func (s *selfAsPromCollector) init(self prom.Collector) { s.self = self } 67 func (s *selfAsPromCollector) CancelToProm() {} 68 func (s *selfAsPromCollector) CollectorToProm() prom.Collector { return s.self } 69 70 type statusServer struct { 71 *http.Server 72 sync.WaitGroup 73 } 74 75 var registry *prom.Registry 76 var moExporter MetricExporter 77 var moCollector MetricCollector 78 var statusSvr *statusServer 79 var multiTable = false // need set before newMetricFSCollector and initTables 80 81 var inited uint32 82 83 func InitMetric(ctx context.Context, ieFactory func() ie.InternalExecutor, SV *config.ObservabilityParameters, nodeUUID, role string, opts ...InitOption) { 84 // fix multi-init in standalone 85 if !atomic.CompareAndSwapUint32(&inited, 0, 1) { 86 return 87 } 88 var initOpts InitOptions 89 opts = append(opts, 90 withExportInterval(SV.MetricExportInterval), 91 withUpdateInterval(SV.MetricUpdateStorageUsageInterval.Duration), 92 withMultiTable(SV.MetricMultiTable), 93 ) 94 for _, opt := range opts { 95 opt.ApplyTo(&initOpts) 96 } 97 // init global variables 98 initConfigByParamaterUnit(SV) 99 registry = prom.NewRegistry() 100 if initOpts.writerFactory != nil { 101 moCollector = newMetricFSCollector(initOpts.writerFactory, WithFlushInterval(initOpts.exportInterval), ExportMultiTable(initOpts.multiTable)) 102 } else { 103 moCollector = newMetricCollector(ieFactory, WithFlushInterval(initOpts.exportInterval)) 104 } 105 moExporter = newMetricExporter(registry, moCollector, nodeUUID, role) 106 107 // register metrics and create tables 108 registerAllMetrics() 109 multiTable = initOpts.multiTable 110 if initOpts.needInitTable { 111 initTables(ctx, ieFactory, SV.BatchProcessor) 112 } 113 114 // start the data flow 115 serviceCtx := context.Background() 116 moCollector.Start(serviceCtx) 117 moExporter.Start(serviceCtx) 118 119 if getExportToProm() { 120 // http.HandleFunc("/query", makeDebugHandleFunc(ieFactory)) 121 mux := http.NewServeMux() 122 mux.Handle("/metrics", promhttp.HandlerFor(prom.DefaultGatherer, promhttp.HandlerOpts{})) 123 addr := fmt.Sprintf("%s:%d", SV.Host, SV.StatusPort) 124 statusSvr = &statusServer{Server: &http.Server{Addr: addr, Handler: mux}} 125 statusSvr.Add(1) 126 go func() { 127 defer statusSvr.Done() 128 if err := statusSvr.ListenAndServe(); err != http.ErrServerClosed { 129 panic(fmt.Sprintf("status server error: %v", err)) 130 } 131 }() 132 logutil.Infof("[Metric] metrics scrape endpoint is ready at http://%s/metrics", addr) 133 } 134 135 SetUpdateStorageUsageInterval(initOpts.updateInterval) 136 logutil.Infof("metric with ExportInterval: %v", initOpts.exportInterval) 137 logutil.Infof("metric with UpdateStorageUsageInterval: %v", initOpts.updateInterval) 138 } 139 140 func StopMetricSync() { 141 if !atomic.CompareAndSwapUint32(&inited, 1, 0) { 142 return 143 } 144 if moCollector != nil { 145 if ch, effect := moCollector.Stop(true); effect { 146 <-ch 147 } 148 moCollector = nil 149 } 150 if moExporter != nil { 151 if ch, effect := moExporter.Stop(true); effect { 152 <-ch 153 } 154 moExporter = nil 155 } 156 if statusSvr != nil { 157 _ = statusSvr.Shutdown(context.TODO()) 158 statusSvr = nil 159 } 160 logutil.Info("Shutdown metric complete.") 161 } 162 163 func mustRegiterToProm(collector prom.Collector) { 164 if err := prom.Register(collector); err != nil { 165 // err is either registering a collector more than once or metrics have duplicate description. 166 // in any case, we respect the existing collectors in the prom registry 167 logutil.Debugf("[Metric] register to prom register: %v", err) 168 } 169 } 170 171 func mustRegister(collector Collector) { 172 registry.MustRegister(collector) 173 if getExportToProm() { 174 mustRegiterToProm(collector.CollectorToProm()) 175 } else { 176 collector.CancelToProm() 177 } 178 } 179 180 func InitSchema(ctx context.Context, ieFactory func() ie.InternalExecutor) error { 181 initTables(ctx, ieFactory, motrace.FileService) 182 return nil 183 } 184 185 // initTables gathers all metrics and extract metadata to format create table sql 186 func initTables(ctx context.Context, ieFactory func() ie.InternalExecutor, batchProcessMode string) { 187 exec := ieFactory() 188 exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish()) 189 mustExec := func(sql string) { 190 if err := exec.Exec(ctx, sql, ie.NewOptsBuilder().Finish()); err != nil { 191 panic(fmt.Sprintf("[Metric] init metric tables error: %v, sql: %s", err, sql)) 192 } 193 } 194 if getForceInit() { 195 mustExec(sqlDropDBConst) 196 } 197 mustExec(sqlCreateDBConst) 198 var createCost time.Duration 199 defer func() { 200 logutil.Debugf( 201 "[Metric] init metrics tables: create cost %d ms", 202 createCost.Milliseconds()) 203 }() 204 instant := time.Now() 205 206 descChan := make(chan *prom.Desc, 10) 207 208 go func() { 209 for _, c := range initCollectors { 210 c.Describe(descChan) 211 } 212 close(descChan) 213 }() 214 215 if !multiTable { 216 mustExec(SingleMetricTable.ToCreateSql(ctx, true)) 217 for desc := range descChan { 218 view := getView(ctx, desc) 219 sql := view.ToCreateSql(ctx, true) 220 mustExec(sql) 221 } 222 } else { 223 optFactory := table.GetOptionFactory(ctx, table.ExternalTableEngine) 224 buf := new(bytes.Buffer) 225 for desc := range descChan { 226 sql := createTableSqlFromMetricFamily(desc, buf, optFactory) 227 mustExec(sql) 228 } 229 } 230 231 createCost = time.Since(instant) 232 } 233 234 type optionsFactory func(db, tbl, account string) table.TableOptions 235 236 // instead MetricFamily, Desc is used to create tables because we don't want collect errors come into the picture. 237 func createTableSqlFromMetricFamily(desc *prom.Desc, buf *bytes.Buffer, optionsFactory optionsFactory) string { 238 buf.Reset() 239 extra := newDescExtra(desc) 240 opts := optionsFactory(MetricDBConst, extra.fqName, table.AccountAll) 241 buf.WriteString("create ") 242 buf.WriteString(opts.GetCreateOptions()) 243 buf.WriteString(fmt.Sprintf( 244 "table if not exists %s.%s (`%s` datetime(6), `%s` double, `%s` varchar(36), `%s` varchar(20)", 245 MetricDBConst, extra.fqName, lblTimeConst, lblValueConst, lblNodeConst, lblRoleConst, 246 )) 247 for _, lbl := range extra.labels { 248 buf.WriteString(", `") 249 buf.WriteString(lbl.GetName()) 250 buf.WriteString("` varchar(20)") 251 } 252 buf.WriteRune(')') 253 buf.WriteString(opts.GetTableOptions(nil)) 254 return buf.String() 255 } 256 257 func getView(ctx context.Context, desc *prom.Desc) *table.View { 258 extra := newDescExtra(desc) 259 var labelNames = make([]string, 0, len(extra.labels)) 260 for _, lbl := range extra.labels { 261 labelNames = append(labelNames, lbl.GetName()) 262 } 263 return GetMetricViewWithLabels(ctx, extra.fqName, labelNames) 264 } 265 266 type descExtra struct { 267 orig *prom.Desc 268 fqName string 269 labels []*dto.LabelPair 270 } 271 272 // decode inner infomation of a prom.Desc 273 func newDescExtra(desc *prom.Desc) *descExtra { 274 str := desc.String()[14:] // strip Desc{fqName: " 275 fqName := str[:strings.Index(str, "\"")] 276 str = str[strings.Index(str, "variableLabels: [")+17:] // spot varlbl list 277 str = str[:strings.Index(str, "]")] 278 varLblCnt := len(strings.Split(str, " ")) 279 labels := prom.MakeLabelPairs(desc, make([]string, varLblCnt)) 280 return &descExtra{orig: desc, fqName: fqName, labels: labels} 281 } 282 283 func mustValidLbls(name string, consts prom.Labels, vars []string) { 284 mustNotOccupied := func(lblName string) { 285 if _, ok := occupiedLbls[strings.ToLower(lblName)]; ok { 286 panic(fmt.Sprintf("%s contains a occupied label: %s", name, lblName)) 287 } 288 } 289 for k := range consts { 290 mustNotOccupied(k) 291 } 292 for _, v := range vars { 293 mustNotOccupied(v) 294 } 295 } 296 297 type SubSystem struct { 298 Name string 299 Comment string 300 SupportUserAccess bool 301 } 302 303 var SubSystemSql = &SubSystem{"sql", "base on query action", true} 304 var SubSystemServer = &SubSystem{"server", "MO Server status, observe from inside", true} 305 var SubSystemProcess = &SubSystem{"process", "MO process status", false} 306 var SubSystemSys = &SubSystem{"sys", "OS status", false} 307 308 var allSubSystem = map[string]*SubSystem{ 309 SubSystemSql.Name: SubSystemSql, 310 SubSystemServer.Name: SubSystemServer, 311 SubSystemProcess.Name: SubSystemProcess, 312 SubSystemSys.Name: SubSystemSys, 313 } 314 315 type InitOptions struct { 316 writerFactory table.WriterFactory // see WithWriterFactory 317 // needInitTable control to do the initTables 318 needInitTable bool // see WithInitAction 319 // initSingleTable 320 multiTable bool // see WithMultiTable 321 // exportInterval 322 exportInterval time.Duration // see withExportInterval 323 // updateInterval, update StorageUsage interval 324 // set by withUpdateInterval 325 updateInterval time.Duration 326 } 327 328 type InitOption func(*InitOptions) 329 330 func (f InitOption) ApplyTo(opts *InitOptions) { 331 f(opts) 332 } 333 334 func WithWriterFactory(factory table.WriterFactory) InitOption { 335 return InitOption(func(options *InitOptions) { 336 options.writerFactory = factory 337 }) 338 } 339 340 func WithInitAction(init bool) InitOption { 341 return InitOption(func(options *InitOptions) { 342 options.needInitTable = init 343 }) 344 } 345 346 func withMultiTable(multi bool) InitOption { 347 return InitOption(func(options *InitOptions) { 348 options.multiTable = multi 349 }) 350 } 351 352 func withExportInterval(sec int) InitOption { 353 return InitOption(func(options *InitOptions) { 354 options.exportInterval = time.Second * time.Duration(sec) 355 }) 356 } 357 358 func withUpdateInterval(interval time.Duration) InitOption { 359 return InitOption(func(opts *InitOptions) { 360 opts.updateInterval = interval 361 }) 362 } 363 364 var ( 365 metricNameColumn = table.StringDefaultColumn(`metric_name`, `sys`, `metric name, like: sql_statement_total, server_connections, process_cpu_percent, sys_memory_used, ...`) 366 metricCollectTimeColumn = table.DatetimeColumn(`collecttime`, `metric data collect time`) 367 metricValueColumn = table.ValueColumn(`value`, `metric value`) 368 metricNodeColumn = table.StringDefaultColumn(`node`, ALL_IN_ONE_MODE, `mo node uuid`) 369 metricRoleColumn = table.StringDefaultColumn(`role`, ALL_IN_ONE_MODE, `mo node role, like: CN, DN, LOG`) 370 metricAccountColumn = table.StringDefaultColumn(`account`, `sys`, `account name`) 371 metricTypeColumn = table.StringColumn(`type`, `sql type, like: insert, select, ...`) 372 ) 373 374 var SingleMetricTable = &table.Table{ 375 Account: table.AccountAll, 376 Database: MetricDBConst, 377 Table: `metric`, 378 Columns: []table.Column{metricNameColumn, metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn, metricAccountColumn, metricTypeColumn}, 379 PrimaryKeyColumn: []table.Column{}, 380 Engine: table.ExternalTableEngine, 381 Comment: `metric data`, 382 PathBuilder: table.NewAccountDatePathBuilder(), 383 AccountColumn: &metricAccountColumn, 384 // SupportUserAccess 385 SupportUserAccess: true, 386 } 387 388 func NewMetricView(tbl string, opts ...table.ViewOption) *table.View { 389 view := &table.View{ 390 Database: MetricDBConst, 391 Table: tbl, 392 OriginTable: SingleMetricTable, 393 Columns: []table.Column{metricCollectTimeColumn, metricValueColumn, metricNodeColumn, metricRoleColumn}, 394 Condition: &table.ViewSingleCondition{Column: metricNameColumn, Table: tbl}, 395 } 396 for _, opt := range opts { 397 opt.Apply(view) 398 } 399 return view 400 } 401 402 func NewMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View { 403 var options []table.ViewOption 404 // check SubSystem 405 var subSystem *SubSystem = nil 406 for _, ss := range allSubSystem { 407 if strings.Index(tbl, ss.Name) == 0 { 408 subSystem = ss 409 break 410 } 411 } 412 if subSystem == nil { 413 panic(moerr.NewNotSupported(ctx, "metric unknown SubSystem: %s", tbl)) 414 } 415 options = append(options, table.SupportUserAccess(subSystem.SupportUserAccess)) 416 // construct columns 417 for _, label := range lbls { 418 for _, col := range SingleMetricTable.Columns { 419 if strings.EqualFold(label, col.Name) { 420 options = append(options, table.WithColumn(col)) 421 } 422 } 423 } 424 return NewMetricView(tbl, options...) 425 } 426 427 var gView struct { 428 content map[string]*table.View 429 mu sync.Mutex 430 } 431 432 func GetMetricViewWithLabels(ctx context.Context, tbl string, lbls []string) *table.View { 433 gView.mu.Lock() 434 defer gView.mu.Unlock() 435 if len(gView.content) == 0 { 436 gView.content = make(map[string]*table.View) 437 } 438 view, exist := gView.content[tbl] 439 if !exist { 440 view = NewMetricViewWithLabels(ctx, tbl, lbls) 441 gView.content[tbl] = view 442 } 443 return view 444 } 445 446 // GetSchemaForAccount return account's table, and view's schema 447 func GetSchemaForAccount(ctx context.Context, account string) []string { 448 var sqls = make([]string, 0, 1) 449 tbl := SingleMetricTable.Clone() 450 tbl.Account = account 451 sqls = append(sqls, tbl.ToCreateSql(ctx, true)) 452 453 descChan := make(chan *prom.Desc, 10) 454 go func() { 455 for _, c := range initCollectors { 456 c.Describe(descChan) 457 } 458 close(descChan) 459 }() 460 461 for desc := range descChan { 462 view := getView(ctx, desc) 463 464 if view.SupportUserAccess && view.OriginTable.SupportUserAccess { 465 sqls = append(sqls, view.ToCreateSql(ctx, true)) 466 } 467 } 468 return sqls 469 } 470 471 func init() { 472 if table.RegisterTableDefine(SingleMetricTable) != nil { 473 panic(moerr.NewInternalError(context.Background(), "metric table already registered")) 474 } 475 }