github.com/matrixorigin/matrixone@v1.2.0/pkg/util/metric/mometric/metric_collector.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mometric 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "math" 22 "runtime" 23 "time" 24 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/logutil" 27 pb "github.com/matrixorigin/matrixone/pkg/pb/metric" 28 bp "github.com/matrixorigin/matrixone/pkg/util/batchpipe" 29 "github.com/matrixorigin/matrixone/pkg/util/export/table" 30 ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor" 31 ) 32 33 const CHAN_CAPACITY = 10000 34 35 type MetricCollector interface { 36 SendMetrics(context.Context, []*pb.MetricFamily) error 37 Start(context.Context) bool 38 Stop(graceful bool) (<-chan struct{}, bool) 39 } 40 41 type collectorOpts struct { 42 // if a MetricFamily has `metricThreshold` Metrics or more 43 // it deserves a flush operation 44 metricThreshold int 45 // if a RawHist MetricFamily has `sampleThreshold` Samples or more 46 // it deserves a flush operation 47 sampleThreshold int 48 // if we can't flush a MetricFamily for the reason of `metricThreshold` or `sampleThreshold` 49 // after `flushInterval`, we will flush it anyway 50 flushInterval time.Duration 51 // the number of goroutines to execute insert into sql, default is runtime.NumCPU() 52 sqlWorkerNum int 53 } 54 55 func defaultCollectorOpts() collectorOpts { 56 var defaultSqlWorkerNum = int(math.Ceil(float64(runtime.NumCPU()) * 0.1)) 57 return collectorOpts{ 58 metricThreshold: 1000, 59 sampleThreshold: 4096, 60 flushInterval: 15 * time.Second, 61 sqlWorkerNum: defaultSqlWorkerNum, 62 } 63 } 64 65 type collectorOpt interface { 66 ApplyTo(*collectorOpts) 67 } 68 69 type WithMetricThreshold int 70 71 func (x WithMetricThreshold) ApplyTo(o *collectorOpts) { 72 o.metricThreshold = int(x) 73 } 74 75 type WithSampleThreshold int 76 77 func (x WithSampleThreshold) ApplyTo(o *collectorOpts) { 78 o.sampleThreshold = int(x) 79 } 80 81 type WithSqlWorkerNum int 82 83 func (x WithSqlWorkerNum) ApplyTo(o *collectorOpts) { 84 o.sqlWorkerNum = int(x) 85 } 86 87 type WithFlushInterval time.Duration 88 89 func (x WithFlushInterval) ApplyTo(o *collectorOpts) { 90 o.flushInterval = time.Duration(x) 91 } 92 93 var _ MetricCollector = (*metricCollector)(nil) 94 95 type metricCollector struct { 96 *bp.BaseBatchPipe[*pb.MetricFamily, string] 97 ieFactory func() ie.InternalExecutor 98 opts collectorOpts 99 } 100 101 func newMetricCollector(factory func() ie.InternalExecutor, opts ...collectorOpt) MetricCollector { 102 initOpts := defaultCollectorOpts() 103 for _, o := range opts { 104 o.ApplyTo(&initOpts) 105 } 106 c := &metricCollector{ 107 ieFactory: factory, 108 opts: initOpts, 109 } 110 base := bp.NewBaseBatchPipe[*pb.MetricFamily, string](c, bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum)) 111 c.BaseBatchPipe = base 112 return c 113 } 114 115 func (c *metricCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error { 116 for _, mf := range mfs { 117 if err := c.SendItem(ctx, mf); err != nil { 118 return err 119 } 120 } 121 return nil 122 } 123 124 func (c *metricCollector) NewItemBatchHandler(ctx context.Context) func(batch string) { 125 exec := c.ieFactory() 126 exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish()) 127 return func(batch string) { 128 if err := exec.Exec(ctx, batch, ie.NewOptsBuilder().Finish()); err != nil { 129 logutil.Errorf("[Metric] insert error. sql: %s; err: %v", batch, err) 130 } 131 } 132 } 133 134 func (c *metricCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, string] { 135 return &mfset{ 136 Reminder: bp.NewConstantClock(c.opts.flushInterval), 137 metricThreshold: c.opts.metricThreshold, 138 sampleThreshold: c.opts.sampleThreshold, 139 } 140 } 141 142 type mfset struct { 143 bp.Reminder 144 mfs []*pb.MetricFamily 145 typ pb.MetricType 146 rows int // how many buffered rows 147 metricThreshold int // haw many rows should be flushed as a batch 148 sampleThreshold int // treat rawhist samples differently because it has higher generate rate 149 } 150 151 func (s *mfset) Add(mf *pb.MetricFamily) { 152 if s.typ == mf.GetType() { 153 s.typ = mf.GetType() 154 } 155 switch s.typ { 156 case pb.MetricType_COUNTER, pb.MetricType_GAUGE: 157 s.rows += len(mf.Metric) 158 case pb.MetricType_RAWHIST: 159 for _, m := range mf.Metric { 160 s.rows += len(m.RawHist.Samples) 161 } 162 } 163 s.mfs = append(s.mfs, mf) 164 } 165 166 func (s *mfset) ShouldFlush() bool { 167 switch s.typ { 168 case pb.MetricType_COUNTER, pb.MetricType_GAUGE: 169 return s.rows > s.metricThreshold 170 case pb.MetricType_RAWHIST: 171 return s.rows > s.sampleThreshold 172 default: 173 return false 174 } 175 } 176 177 func (s *mfset) Reset() { 178 s.mfs = s.mfs[:0] 179 s.typ = pb.MetricType_COUNTER // 0 180 s.rows = 0 181 s.RemindReset() 182 } 183 184 func (s *mfset) IsEmpty() bool { 185 return len(s.mfs) == 0 186 } 187 188 // GetBatch 189 // getSql extracts a insert sql from a set of MetricFamily. the bytes.Buffer is 190 // used to mitigate memory allocation 191 func (s *mfset) GetBatch(ctx context.Context, buf *bytes.Buffer) string { 192 buf.Reset() 193 buf.WriteString(fmt.Sprintf("insert into %s.%s values ", MetricDBConst, s.mfs[0].GetName())) 194 lblsBuf := new(bytes.Buffer) 195 writeValues := func(t string, v float64, lbls string) { 196 buf.WriteString("(") 197 buf.WriteString(fmt.Sprintf("%q, %f", t, v)) 198 buf.WriteString(lbls) 199 buf.WriteString("),") 200 } 201 for _, mf := range s.mfs { 202 for _, metric := range mf.Metric { 203 // reserved labels 204 lblsBuf.WriteString(fmt.Sprintf(",%q,%q", mf.GetNode(), mf.GetRole())) 205 // custom labels 206 for _, lbl := range metric.Label { 207 lblsBuf.WriteString(",\"") 208 lblsBuf.WriteString(lbl.GetValue()) 209 lblsBuf.WriteRune('"') 210 } 211 lbls := lblsBuf.String() 212 lblsBuf.Reset() 213 214 switch mf.GetType() { 215 case pb.MetricType_COUNTER: 216 time := localTimeStr(metric.GetCollecttime()) 217 writeValues(time, metric.Counter.GetValue(), lbls) 218 case pb.MetricType_GAUGE: 219 time := localTimeStr(metric.GetCollecttime()) 220 writeValues(time, metric.Gauge.GetValue(), lbls) 221 case pb.MetricType_RAWHIST: 222 for _, sample := range metric.RawHist.Samples { 223 time := localTimeStr(sample.GetDatetime()) 224 writeValues(time, sample.GetValue(), lbls) 225 } 226 default: 227 panic(fmt.Sprintf("unsupported metric type %v", mf.GetType())) 228 } 229 } 230 } 231 sql := buf.String() 232 // metric has at least one row, so we can remove the tail comma safely 233 sql = sql[:len(sql)-1] 234 return sql 235 } 236 237 var _ MetricCollector = (*metricFSCollector)(nil) 238 239 type metricFSCollector struct { 240 *bp.BaseBatchPipe[*pb.MetricFamily, table.ExportRequests] 241 writerFactory table.WriterFactory 242 opts collectorOpts 243 } 244 245 func (c *metricFSCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error { 246 for _, mf := range mfs { 247 if err := c.SendItem(ctx, mf); err != nil { 248 return err 249 } 250 } 251 return nil 252 } 253 254 func newMetricFSCollector(writerFactory table.WriterFactory, opts ...collectorOpt) MetricCollector { 255 initOpts := defaultCollectorOpts() 256 for _, o := range opts { 257 o.ApplyTo(&initOpts) 258 } 259 c := &metricFSCollector{ 260 writerFactory: writerFactory, 261 opts: initOpts, 262 } 263 pipeOpts := []bp.BaseBatchPipeOpt{bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum), 264 bp.PipeWithBufferWorkerNum(1), // only one table 265 bp.PipeWithItemNameFormatter(func(bp.HasName) string { 266 return SingleMetricTable.GetName() 267 }), 268 } 269 base := bp.NewBaseBatchPipe[*pb.MetricFamily, table.ExportRequests](c, pipeOpts...) 270 c.BaseBatchPipe = base 271 return c 272 } 273 274 func (c *metricFSCollector) NewItemBatchHandler(ctx context.Context) func(batch table.ExportRequests) { 275 return func(batchs table.ExportRequests) { 276 for _, batch := range batchs { 277 if _, err := batch.Handle(); err != nil { 278 logutil.Errorf("[Metric] failed to write, err: %v", err) 279 } 280 } 281 } 282 } 283 284 func (c *metricFSCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, table.ExportRequests] { 285 return &mfsetETL{ 286 mfset: mfset{ 287 Reminder: bp.NewConstantClock(c.opts.flushInterval), 288 metricThreshold: c.opts.metricThreshold, 289 sampleThreshold: c.opts.sampleThreshold, 290 }, 291 collector: c, 292 } 293 } 294 295 type mfsetETL struct { 296 mfset 297 collector *metricFSCollector 298 } 299 300 // GetBatch implements table.Table.GetBatch. 301 // Write metric into two tables: one for metric table, another for sql_statement_cu table. 302 func (s *mfsetETL) GetBatch(ctx context.Context, buf *bytes.Buffer) table.ExportRequests { 303 buf.Reset() 304 305 ts := time.Now() 306 buffer := make(map[string]table.RowWriter, 2) 307 writeValues := func(row *table.Row) error { 308 w, exist := buffer[row.Table.GetName()] 309 if !exist { 310 w = s.collector.writerFactory.GetRowWriter(ctx, row.GetAccount(), row.Table, ts) 311 buffer[row.Table.GetName()] = w 312 } 313 if err := w.WriteRow(row); err != nil { 314 return err 315 } 316 return nil 317 } 318 rows := make(map[string]*table.Row, 2) 319 defer func() { 320 for _, r := range rows { 321 r.Free() 322 } 323 }() 324 getRow := func(metricName string) *table.Row { 325 tbl := SingleMetricTable 326 if metricName == SqlStatementCUTable.GetName() { 327 tbl = SqlStatementCUTable 328 } 329 row, exist := rows[tbl.GetName()] 330 if !exist { 331 row = tbl.GetRow(ctx) 332 rows[tbl.GetName()] = row 333 } 334 return row 335 } 336 337 for _, mf := range s.mfs { 338 for _, metric := range mf.Metric { 339 // reserved labels 340 row := getRow(mf.GetName()) 341 row.Reset() 342 // table `metric` NEED column `metric_name` 343 // table `sql_statement_cu` NO column `metric_name` 344 if row.Table.GetName() == SingleMetricTable.GetName() { 345 row.SetColumnVal(metricNameColumn, table.StringField(mf.GetName())) 346 } 347 row.SetColumnVal(metricNodeColumn, table.StringField(mf.GetNode())) 348 row.SetColumnVal(metricRoleColumn, table.StringField(mf.GetRole())) 349 // custom labels 350 for _, lbl := range metric.Label { 351 row.SetVal(lbl.GetName(), table.StringField(lbl.GetValue())) 352 } 353 354 switch mf.GetType() { 355 case pb.MetricType_COUNTER: 356 time := localTime(metric.GetCollecttime()) 357 row.SetColumnVal(metricCollectTimeColumn, table.TimeField(time)) 358 row.SetColumnVal(metricValueColumn, table.Float64Field(metric.Counter.GetValue())) 359 _ = writeValues(row) 360 case pb.MetricType_GAUGE: 361 time := localTime(metric.GetCollecttime()) 362 row.SetColumnVal(metricCollectTimeColumn, table.TimeField(time)) 363 row.SetColumnVal(metricValueColumn, table.Float64Field(metric.Gauge.GetValue())) 364 _ = writeValues(row) 365 case pb.MetricType_RAWHIST: 366 for _, sample := range metric.RawHist.Samples { 367 time := localTime(sample.GetDatetime()) 368 row.SetColumnVal(metricCollectTimeColumn, table.TimeField(time)) 369 row.SetColumnVal(metricValueColumn, table.Float64Field(sample.GetValue())) 370 _ = writeValues(row) 371 } 372 default: 373 panic(moerr.NewInternalError(ctx, "unsupported metric type %v", mf.GetType())) 374 } 375 } 376 } 377 378 reqs := make([]table.WriteRequest, 0, len(buffer)) 379 for _, w := range buffer { 380 reqs = append(reqs, table.NewRowRequest(w)) 381 } 382 383 return reqs 384 } 385 386 func localTime(value int64) time.Time { 387 return time.UnixMicro(value).In(time.Local) 388 } 389 390 func localTimeStr(value int64) string { 391 return time.UnixMicro(value).In(time.Local).Format("2006-01-02 15:04:05.000000") 392 }