github.com/matrixorigin/matrixone@v0.7.0/pkg/util/metric/metric_collector.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metric 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "runtime" 22 "time" 23 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/logutil" 26 pb "github.com/matrixorigin/matrixone/pkg/pb/metric" 27 bp "github.com/matrixorigin/matrixone/pkg/util/batchpipe" 28 "github.com/matrixorigin/matrixone/pkg/util/export/table" 29 ie "github.com/matrixorigin/matrixone/pkg/util/internalExecutor" 30 ) 31 32 const CHAN_CAPACITY = 10000 33 34 type MetricCollector interface { 35 SendMetrics(context.Context, []*pb.MetricFamily) error 36 Start(context.Context) bool 37 Stop(graceful bool) (<-chan struct{}, bool) 38 } 39 40 type collectorOpts struct { 41 // if a MetricFamily has `metricThreshold` Metrics or more 42 // it deserves a flush operation 43 metricThreshold int 44 // if a RawHist MetricFamily has `sampleThreshold` Samples or more 45 // it deserves a flush operation 46 sampleThreshold int 47 // if we can't flush a MetricFamily for the reason of `metricThreshold` or `sampleThreshold` 48 // after `flushInterval`, we will flush it anyway 49 flushInterval time.Duration 50 // the number of goroutines to execute insert into sql, default is runtime.NumCPU() 51 sqlWorkerNum int 52 // multiTable 53 multiTable bool 54 } 55 56 func defaultCollectorOpts() collectorOpts { 57 return collectorOpts{ 58 metricThreshold: 1000, 59 sampleThreshold: 4096, 60 flushInterval: 15 * time.Second, 61 sqlWorkerNum: runtime.NumCPU(), 62 } 63 } 64 65 type collectorOpt interface { 66 ApplyTo(*collectorOpts) 67 } 68 69 type WithMetricThreshold int 70 71 func (x WithMetricThreshold) ApplyTo(o *collectorOpts) { 72 o.metricThreshold = int(x) 73 } 74 75 type WithSampleThreshold int 76 77 func (x WithSampleThreshold) ApplyTo(o *collectorOpts) { 78 o.sampleThreshold = int(x) 79 } 80 81 type WithSqlWorkerNum int 82 83 func (x WithSqlWorkerNum) ApplyTo(o *collectorOpts) { 84 o.sqlWorkerNum = int(x) 85 } 86 87 type WithFlushInterval time.Duration 88 89 func (x WithFlushInterval) ApplyTo(o *collectorOpts) { 90 o.flushInterval = time.Duration(x) 91 } 92 93 type ExportMultiTable bool 94 95 func (x ExportMultiTable) ApplyTo(o *collectorOpts) { 96 o.multiTable = bool(x) 97 } 98 99 var _ MetricCollector = (*metricCollector)(nil) 100 101 type metricCollector struct { 102 *bp.BaseBatchPipe[*pb.MetricFamily, string] 103 ieFactory func() ie.InternalExecutor 104 opts collectorOpts 105 } 106 107 func newMetricCollector(factory func() ie.InternalExecutor, opts ...collectorOpt) MetricCollector { 108 initOpts := defaultCollectorOpts() 109 for _, o := range opts { 110 o.ApplyTo(&initOpts) 111 } 112 c := &metricCollector{ 113 ieFactory: factory, 114 opts: initOpts, 115 } 116 base := bp.NewBaseBatchPipe[*pb.MetricFamily, string](c, bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum)) 117 c.BaseBatchPipe = base 118 return c 119 } 120 121 func (c *metricCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error { 122 for _, mf := range mfs { 123 if err := c.SendItem(ctx, mf); err != nil { 124 return err 125 } 126 } 127 return nil 128 } 129 130 func (c *metricCollector) NewItemBatchHandler(ctx context.Context) func(batch string) { 131 exec := c.ieFactory() 132 exec.ApplySessionOverride(ie.NewOptsBuilder().Database(MetricDBConst).Internal(true).Finish()) 133 return func(batch string) { 134 if err := exec.Exec(ctx, batch, ie.NewOptsBuilder().Finish()); err != nil { 135 logutil.Errorf("[Metric] insert error. sql: %s; err: %v", batch, err) 136 } 137 } 138 } 139 140 func (c *metricCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, string] { 141 return &mfset{ 142 Reminder: bp.NewConstantClock(c.opts.flushInterval), 143 metricThreshold: c.opts.metricThreshold, 144 sampleThreshold: c.opts.sampleThreshold, 145 } 146 } 147 148 type mfset struct { 149 bp.Reminder 150 mfs []*pb.MetricFamily 151 typ pb.MetricType 152 rows int // how many buffered rows 153 metricThreshold int // haw many rows should be flushed as a batch 154 sampleThreshold int // treat rawhist samples differently because it has higher generate rate 155 } 156 157 func (s *mfset) Add(mf *pb.MetricFamily) { 158 if s.typ == mf.GetType() { 159 s.typ = mf.GetType() 160 } 161 switch s.typ { 162 case pb.MetricType_COUNTER, pb.MetricType_GAUGE: 163 s.rows += len(mf.Metric) 164 case pb.MetricType_RAWHIST: 165 for _, m := range mf.Metric { 166 s.rows += len(m.RawHist.Samples) 167 } 168 } 169 s.mfs = append(s.mfs, mf) 170 } 171 172 func (s *mfset) ShouldFlush() bool { 173 switch s.typ { 174 case pb.MetricType_COUNTER, pb.MetricType_GAUGE: 175 return s.rows > s.metricThreshold 176 case pb.MetricType_RAWHIST: 177 return s.rows > s.sampleThreshold 178 default: 179 return false 180 } 181 } 182 183 func (s *mfset) Reset() { 184 s.mfs = s.mfs[:0] 185 s.typ = pb.MetricType_COUNTER // 0 186 s.rows = 0 187 s.RemindReset() 188 } 189 190 func (s *mfset) IsEmpty() bool { 191 return len(s.mfs) == 0 192 } 193 194 // GetBatch 195 // getSql extracts a insert sql from a set of MetricFamily. the bytes.Buffer is 196 // used to mitigate memory allocation 197 func (s *mfset) GetBatch(ctx context.Context, buf *bytes.Buffer) string { 198 buf.Reset() 199 buf.WriteString(fmt.Sprintf("insert into %s.%s values ", MetricDBConst, s.mfs[0].GetName())) 200 lblsBuf := new(bytes.Buffer) 201 writeValues := func(t string, v float64, lbls string) { 202 buf.WriteString("(") 203 buf.WriteString(fmt.Sprintf("%q, %f", t, v)) 204 buf.WriteString(lbls) 205 buf.WriteString("),") 206 } 207 for _, mf := range s.mfs { 208 for _, metric := range mf.Metric { 209 // reserved labels 210 lblsBuf.WriteString(fmt.Sprintf(",%q,%q", mf.GetNode(), mf.GetRole())) 211 // custom labels 212 for _, lbl := range metric.Label { 213 lblsBuf.WriteString(",\"") 214 lblsBuf.WriteString(lbl.GetValue()) 215 lblsBuf.WriteRune('"') 216 } 217 lbls := lblsBuf.String() 218 lblsBuf.Reset() 219 220 switch mf.GetType() { 221 case pb.MetricType_COUNTER: 222 time := localTimeStr(metric.GetCollecttime()) 223 writeValues(time, metric.Counter.GetValue(), lbls) 224 case pb.MetricType_GAUGE: 225 time := localTimeStr(metric.GetCollecttime()) 226 writeValues(time, metric.Gauge.GetValue(), lbls) 227 case pb.MetricType_RAWHIST: 228 for _, sample := range metric.RawHist.Samples { 229 time := localTimeStr(sample.GetDatetime()) 230 writeValues(time, sample.GetValue(), lbls) 231 } 232 default: 233 panic(fmt.Sprintf("unsupported metric type %v", mf.GetType())) 234 } 235 } 236 } 237 sql := buf.String() 238 // metric has at least one row, so we can remove the tail comma safely 239 sql = sql[:len(sql)-1] 240 return sql 241 } 242 243 var _ MetricCollector = (*metricFSCollector)(nil) 244 245 type metricFSCollector struct { 246 *bp.BaseBatchPipe[*pb.MetricFamily, table.ExportRequests] 247 writerFactory table.WriterFactory 248 opts collectorOpts 249 } 250 251 func (c *metricFSCollector) SendMetrics(ctx context.Context, mfs []*pb.MetricFamily) error { 252 for _, mf := range mfs { 253 if err := c.SendItem(ctx, mf); err != nil { 254 return err 255 } 256 } 257 return nil 258 } 259 260 func newMetricFSCollector(writerFactory table.WriterFactory, opts ...collectorOpt) MetricCollector { 261 initOpts := defaultCollectorOpts() 262 for _, o := range opts { 263 o.ApplyTo(&initOpts) 264 } 265 c := &metricFSCollector{ 266 writerFactory: writerFactory, 267 opts: initOpts, 268 } 269 pipeOpts := []bp.BaseBatchPipeOpt{bp.PipeWithBatchWorkerNum(c.opts.sqlWorkerNum)} 270 if !initOpts.multiTable { 271 pipeOpts = append(pipeOpts, 272 bp.PipeWithBufferWorkerNum(1), 273 bp.PipeWithItemNameFormatter(func(bp.HasName) string { 274 return SingleMetricTable.GetName() 275 })) 276 } 277 base := bp.NewBaseBatchPipe[*pb.MetricFamily, table.ExportRequests](c, pipeOpts...) 278 c.BaseBatchPipe = base 279 return c 280 } 281 282 func (c *metricFSCollector) NewItemBatchHandler(ctx context.Context) func(batch table.ExportRequests) { 283 return func(batchs table.ExportRequests) { 284 for _, batch := range batchs { 285 if _, err := batch.Handle(); err != nil { 286 logutil.Errorf("[Metric] failed to write, err: %v", err) 287 } 288 } 289 } 290 } 291 292 func (c *metricFSCollector) NewItemBuffer(_ string) bp.ItemBuffer[*pb.MetricFamily, table.ExportRequests] { 293 return &mfsetETL{ 294 mfset: mfset{ 295 Reminder: bp.NewConstantClock(c.opts.flushInterval), 296 metricThreshold: c.opts.metricThreshold, 297 sampleThreshold: c.opts.sampleThreshold, 298 }, 299 collector: c, 300 } 301 } 302 303 type mfsetETL struct { 304 mfset 305 collector *metricFSCollector 306 } 307 308 func (s *mfsetETL) GetBatch(ctx context.Context, buf *bytes.Buffer) table.ExportRequests { 309 buf.Reset() 310 311 ts := time.Now() 312 buffer := make(map[string]table.RowWriter, 2) 313 writeValues := func(row *table.Row) error { 314 w, exist := buffer[row.GetAccount()] 315 if !exist { 316 w = s.collector.writerFactory(ctx, row.GetAccount(), SingleMetricTable, ts) 317 buffer[row.GetAccount()] = w 318 } 319 if err := w.WriteRow(row); err != nil { 320 return err 321 } 322 return nil 323 } 324 325 row := SingleMetricTable.GetRow(ctx) 326 defer row.Free() 327 for _, mf := range s.mfs { 328 for _, metric := range mf.Metric { 329 // reserved labels 330 row.Reset() 331 row.SetColumnVal(metricNameColumn, mf.GetName()) 332 row.SetColumnVal(metricNodeColumn, mf.GetNode()) 333 row.SetColumnVal(metricRoleColumn, mf.GetRole()) 334 // custom labels 335 for _, lbl := range metric.Label { 336 row.SetVal(lbl.GetName(), lbl.GetValue()) 337 } 338 339 switch mf.GetType() { 340 case pb.MetricType_COUNTER: 341 time := localTime(metric.GetCollecttime()) 342 row.SetColumnVal(metricCollectTimeColumn, time) 343 row.SetColumnVal(metricValueColumn, metric.Counter.GetValue()) 344 _ = writeValues(row) 345 case pb.MetricType_GAUGE: 346 time := localTime(metric.GetCollecttime()) 347 row.SetColumnVal(metricCollectTimeColumn, time) 348 row.SetColumnVal(metricValueColumn, metric.Gauge.GetValue()) 349 _ = writeValues(row) 350 case pb.MetricType_RAWHIST: 351 for _, sample := range metric.RawHist.Samples { 352 time := localTime(sample.GetDatetime()) 353 row.SetColumnVal(metricCollectTimeColumn, time) 354 row.SetColumnVal(metricValueColumn, sample.GetValue()) 355 _ = writeValues(row) 356 } 357 default: 358 panic(moerr.NewInternalError(ctx, "unsupported metric type %v", mf.GetType())) 359 } 360 } 361 } 362 363 reqs := make([]table.WriteRequest, 0, len(buffer)) 364 for _, w := range buffer { 365 reqs = append(reqs, table.NewRowRequest(w)) 366 } 367 368 return reqs 369 } 370 371 func localTime(value int64) time.Time { 372 return time.UnixMicro(value).In(time.Local) 373 } 374 375 func localTimeStr(value int64) string { 376 return time.UnixMicro(value).In(time.Local).Format("2006-01-02 15:04:05.000000") 377 }