github.com/matrixorigin/matrixone@v1.2.0/pkg/util/trace/impl/motrace/mo_trace.go (about) 1 // Copyright The OpenTelemetry Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Portions of this file are additionally subject to the following 16 // copyright. 17 // 18 // Copyright (C) 2022 Matrix Origin. 19 // 20 // Modified the behavior and the interface of the step. 21 22 package motrace 23 24 import ( 25 "context" 26 "encoding/hex" 27 "errors" 28 "sync" 29 "time" 30 "unsafe" 31 32 "github.com/matrixorigin/matrixone/pkg/common/moerr" 33 "github.com/matrixorigin/matrixone/pkg/logutil" 34 "github.com/matrixorigin/matrixone/pkg/util/export/table" 35 "github.com/matrixorigin/matrixone/pkg/util/profile" 36 "github.com/matrixorigin/matrixone/pkg/util/trace" 37 38 "go.uber.org/zap" 39 "go.uber.org/zap/zapcore" 40 ) 41 42 //nolint:revive // revive complains about stutter of `trace.TraceFlags`. 43 44 var _ trace.Tracer = &MOTracer{} 45 46 // MOTracer is the creator of Spans. 47 type MOTracer struct { 48 trace.TracerConfig 49 provider *MOTracerProvider 50 } 51 52 // Start starts a Span and returns it along with a context containing it. 53 // 54 // The Span is created with the provided name and as a child of any existing 55 // span context found in passed context. The created Span will be 56 // configured appropriately by any SpanOption passed. 57 // 58 // Only timeout Span can be recorded, hold in SpanConfig.LongTimeThreshold. 59 // There are 4 diff ways to setting threshold value: 60 // 1. using default val, which was hold by MOTracerProvider.longSpanTime. 61 // 2. using `WithLongTimeThreshold()` SpanOption, that will override the default val. 62 // 3. passing the Deadline context, then it will check the Deadline at Span ended instead of checking Threshold. 63 // 4. using `WithHungThreshold()` SpanOption, that will override the passed context with context.WithTimeout(ctx, hungThreshold) 64 // and create a new work goroutine to check Deadline event. 65 // 66 // When Span pass timeout threshold or got the deadline event, not only the Span will be recorded, 67 // but also trigger profile dump specify by `WithProfileGoroutine()`, `WithProfileHeap()`, `WithProfileThreadCreate()`, 68 // `WithProfileAllocs()`, `WithProfileBlock()`, `WithProfileMutex()`, `WithProfileCpuSecs()`, `WithProfileTraceSecs()` SpanOption. 69 func (t *MOTracer) Start(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { 70 if !t.IsEnable(opts...) { 71 return ctx, trace.NoopSpan{} 72 } 73 74 span := newMOSpan() 75 76 span.tracer = t 77 span.ctx = ctx 78 span.init(name, opts...) 79 80 parent := trace.SpanFromContext(ctx) 81 psc := parent.SpanContext() 82 83 if span.NewRoot || psc.IsEmpty() { 84 span.TraceID, span.SpanID = t.provider.idGenerator.NewIDs() 85 span.Parent = trace.NoopSpan{} 86 } else { 87 span.TraceID, span.SpanID = psc.TraceID, t.provider.idGenerator.NewSpanID() 88 span.Parent = parent 89 } 90 91 // handle HungThreshold 92 if threshold := span.SpanConfig.HungThreshold(); threshold > 0 { 93 s := newMOHungSpan(span) 94 return trace.ContextWithSpan(ctx, s), s 95 } 96 97 return trace.ContextWithSpan(ctx, span), span 98 } 99 100 func (t *MOTracer) Debug(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) { 101 if !t.provider.debugMode { 102 return ctx, trace.NoopSpan{} 103 } 104 return t.Start(ctx, name, opts...) 105 } 106 107 func (t *MOTracer) IsEnable(opts ...trace.SpanStartOption) bool { 108 var cfg trace.SpanConfig 109 for idx := range opts { 110 opts[idx].ApplySpanStart(&cfg) 111 } 112 113 enable := t.provider.IsEnable() 114 115 // check if is this span kind controlled by mo_ctl. 116 if has, state, _ := trace.IsMOCtledSpan(cfg.Kind); has { 117 return enable && state 118 } 119 120 return enable 121 } 122 123 var _ trace.Span = (*MOHungSpan)(nil) 124 125 type MOHungSpan struct { 126 *MOSpan 127 // quitCtx is used to stop the work goroutine loop(). 128 // Because of quitCtx and deadlineCtx are based on init span.ctx, both can be canceled outside span. 129 // So, quitCtx can help to detect this situation, like loop(). 130 quitCtx context.Context 131 // quitCancel cancel quitCtx in End() 132 quitCancel context.CancelFunc 133 trigger *time.Timer 134 stop func() 135 stopped bool 136 // mux control doProfile exec order, called in loop and End 137 mux sync.Mutex 138 } 139 140 func newMOHungSpan(span *MOSpan) *MOHungSpan { 141 ctx, cancel := context.WithTimeout(span.ctx, span.SpanConfig.HungThreshold()) 142 span.ctx = ctx 143 s := &MOHungSpan{ 144 MOSpan: span, 145 quitCtx: ctx, 146 quitCancel: cancel, 147 } 148 s.trigger = time.AfterFunc(s.HungThreshold(), func() { 149 s.mux.Lock() 150 defer s.mux.Unlock() 151 if e := s.quitCtx.Err(); errors.Is(e, context.Canceled) || s.stopped { 152 return 153 } 154 s.doProfile() 155 logutil.Warn("span trigger hung threshold", 156 trace.SpanField(s.SpanContext()), 157 zap.String("span_name", s.Name), 158 zap.Duration("threshold", s.HungThreshold())) 159 }) 160 s.stop = func() { 161 s.trigger.Stop() 162 s.quitCancel() 163 s.stopped = true 164 } 165 return s 166 } 167 168 func (s *MOHungSpan) End(options ...trace.SpanEndOption) { 169 s.mux.Lock() 170 defer s.mux.Unlock() 171 s.stop() 172 s.MOSpan.End(options...) 173 } 174 175 var _ trace.Span = (*MOSpan)(nil) 176 177 // MOSpan implement export.IBuffer2SqlItem and export.CsvFields 178 type MOSpan struct { 179 trace.SpanConfig 180 Name string `json:"name"` 181 StartTime time.Time `json:"start_time"` 182 EndTime time.Time `jons:"end_time"` 183 // Duration 184 Duration time.Duration `json:"duration"` 185 // ExtraFields 186 ExtraFields []zap.Field `json:"extra"` 187 188 tracer *MOTracer 189 ctx context.Context 190 needRecord bool 191 192 doneProfile bool 193 onEnd []func() 194 } 195 196 var spanPool = &sync.Pool{New: func() any { 197 return &MOSpan{ 198 onEnd: make([]func(), 0, 2), // speedup first append op 199 } 200 }} 201 202 func newMOSpan() *MOSpan { 203 return spanPool.Get().(*MOSpan) 204 } 205 206 func (s *MOSpan) init(name string, opts ...trace.SpanStartOption) { 207 s.Name = name 208 s.StartTime = time.Now() 209 s.LongTimeThreshold = s.tracer.provider.longSpanTime 210 for _, opt := range opts { 211 opt.ApplySpanStart(&s.SpanConfig) 212 } 213 } 214 215 func (s *MOSpan) Size() int64 { 216 return int64(unsafe.Sizeof(*s)) + int64(len(s.Name)) 217 } 218 219 func (s *MOSpan) Free() { 220 s.SpanConfig.Reset() 221 s.Parent = nil 222 s.Name = "" 223 s.tracer = nil 224 s.ctx = nil 225 s.needRecord = false 226 s.ExtraFields = nil 227 s.StartTime = table.ZeroTime 228 s.EndTime = table.ZeroTime 229 s.doneProfile = false 230 s.onEnd = s.onEnd[:0] 231 spanPool.Put(s) 232 } 233 234 func (s *MOSpan) GetName() string { 235 return spanView.OriginTable.GetName() 236 } 237 238 func (s *MOSpan) GetTable() *table.Table { return spanView.OriginTable } 239 240 func (s *MOSpan) FillRow(ctx context.Context, row *table.Row) { 241 row.Reset() 242 row.SetColumnVal(rawItemCol, table.StringField(spanView.Table)) 243 row.SetColumnVal(spanIDCol, table.StringField(hex.EncodeToString(s.SpanID[:]))) 244 row.SetColumnVal(traceIDCol, table.UuidField(s.TraceID[:])) 245 row.SetColumnVal(spanKindCol, table.StringField(s.Kind.String())) 246 psc := s.Parent.SpanContext() 247 if psc.SpanID != trace.NilSpanID { 248 row.SetColumnVal(parentSpanIDCol, table.StringField(hex.EncodeToString(psc.SpanID[:]))) 249 } 250 row.SetColumnVal(nodeUUIDCol, table.StringField(GetNodeResource().NodeUuid)) 251 row.SetColumnVal(nodeTypeCol, table.StringField(GetNodeResource().NodeType)) 252 row.SetColumnVal(spanNameCol, table.StringField(s.Name)) 253 row.SetColumnVal(startTimeCol, table.TimeField(s.StartTime)) 254 row.SetColumnVal(endTimeCol, table.TimeField(s.EndTime)) 255 row.SetColumnVal(timestampCol, table.TimeField(s.EndTime)) 256 row.SetColumnVal(durationCol, table.Uint64Field(uint64(s.Duration))) 257 row.SetColumnVal(resourceCol, table.StringField(s.tracer.provider.resource.String())) 258 259 // fill extra fields 260 if len(s.ExtraFields) > 0 { 261 encoder := getEncoder() 262 buf, err := encoder.EncodeEntry(zapcore.Entry{}, s.ExtraFields) 263 if buf != nil { 264 defer buf.Free() 265 } 266 if err != nil { 267 moerr.ConvertGoError(ctx, err) 268 } else { 269 row.SetColumnVal(extraCol, table.StringField(buf.String())) 270 } 271 } 272 } 273 274 // End completes the Span. Span will be recorded if meets the following condition: 275 // 1. If set Deadline in ctx, which specified at the MOTracer.Start, just check if encounters the Deadline. 276 // 2. If NOT set Deadline, then check condition: MOSpan.Duration >= MOSpan.GetLongTimeThreshold(). 277 func (s *MOSpan) End(options ...trace.SpanEndOption) { 278 var err error 279 s.EndTime = time.Now() 280 for _, fn := range s.onEnd { 281 fn() 282 } 283 284 s.needRecord, err = s.NeedRecord() 285 286 if !s.needRecord { 287 freeMOSpan(s) 288 return 289 } 290 // apply End option 291 for _, opt := range options { 292 opt.ApplySpanEnd(&s.SpanConfig) 293 } 294 295 s.AddExtraFields(s.SpanConfig.Extra...) 296 297 // do profile 298 if s.NeedProfile() { 299 s.doProfile() 300 } 301 // record error info 302 if err != nil { 303 s.ExtraFields = append(s.ExtraFields, zap.Error(err)) 304 } 305 // do Collect 306 for _, sp := range s.tracer.provider.spanProcessors { 307 sp.OnEnd(s) 308 } 309 } 310 311 func (s *MOSpan) doProfileRuntime(ctx context.Context, name string, debug int) { 312 now := time.Now() 313 factory := s.tracer.provider.writerFactory 314 filepath := profile.GetProfileName(name, s.SpanID.String(), now) 315 w := factory.GetWriter(ctx, filepath) 316 err := profile.ProfileRuntime(name, w, debug) 317 if err == nil { 318 err = w.Close() 319 } 320 if err != nil { 321 s.AddExtraFields(zap.String(name, err.Error())) 322 } else { 323 s.AddExtraFields(zap.String(name, filepath)) 324 } 325 } 326 327 func (s *MOSpan) doProfileSystemStatus(ctx context.Context) { 328 if s.SpanConfig.ProfileSystemStatusFn == nil { 329 return 330 } 331 data, err := s.SpanConfig.ProfileSystemStatusFn() 332 if err != nil { 333 s.AddExtraFields(zap.String(profile.STATUS, err.Error())) 334 return 335 } 336 if data == nil { 337 return 338 } 339 340 factory := s.tracer.provider.writerFactory 341 filepath := profile.GetSystemStatusFilePath(s.SpanID.String(), time.Now()) 342 logutil.Infof("system status dumped to file %s", filepath) 343 w := factory.GetWriter(ctx, filepath) 344 _, err = w.Write(data) 345 if err == nil { 346 err = w.Close() 347 } 348 if err != nil { 349 s.AddExtraFields(zap.String(profile.STATUS, err.Error())) 350 } else { 351 s.AddExtraFields(zap.String(profile.STATUS, filepath)) 352 } 353 } 354 355 func (s *MOSpan) NeedRecord() (bool, error) { 356 // if the span kind falls in mo_ctl controlled spans, we 357 // hope it ignores the long time threshold set by the tracer and deadline restrictions. 358 // but the threshold set by mo ctl need to be considered 359 if has, state, threshold := trace.IsMOCtledSpan(s.Kind); has { 360 return state && (s.Duration >= threshold), nil 361 } 362 // the default logic that before mo_ctl controlled spans have been introduced 363 deadline, hasDeadline := s.ctx.Deadline() 364 s.Duration = s.EndTime.Sub(s.StartTime) 365 if hasDeadline { 366 if s.EndTime.After(deadline) { 367 return true, s.ctx.Err() 368 } 369 } else { 370 return s.Duration >= s.LongTimeThreshold, nil 371 } 372 return false, nil 373 } 374 375 // doProfile is sync op. 376 func (s *MOSpan) doProfile() { 377 if s.doneProfile { 378 return 379 } 380 factory := s.tracer.provider.writerFactory 381 ctx := DefaultContext() 382 // do profile goroutine txt 383 if s.ProfileGoroutine() { 384 s.doProfileRuntime(ctx, profile.GOROUTINE, 2) 385 } 386 // do profile heap pprof 387 if s.ProfileHeap() { 388 s.doProfileRuntime(ctx, profile.HEAP, 0) 389 } 390 // do profile allocs 391 if s.ProfileAllocs() { 392 s.doProfileRuntime(ctx, profile.ALLOCS, 0) 393 } 394 // do profile threadcreate 395 if s.ProfileThreadCreate() { 396 s.doProfileRuntime(ctx, profile.THREADCREATE, 0) 397 } 398 // do profile block 399 if s.ProfileBlock() { 400 s.doProfileRuntime(ctx, profile.BLOCK, 0) 401 } 402 // do profile mutex 403 if s.ProfileMutex() { 404 s.doProfileRuntime(ctx, profile.MUTEX, 0) 405 } 406 // profile cpu should be the last one op, caused by it will sustain few seconds 407 if s.ProfileCpuSecs() > 0 { 408 filepath := profile.GetProfileName(profile.CPU, s.SpanID.String(), s.EndTime) 409 w := factory.GetWriter(ctx, filepath) 410 err := profile.ProfileCPU(w, s.ProfileCpuSecs()) 411 if err == nil { 412 err = w.Close() 413 } 414 if err != nil { 415 s.AddExtraFields(zap.String(profile.CPU, err.Error())) 416 } else { 417 s.AddExtraFields(zap.String(profile.CPU, filepath)) 418 } 419 } 420 // profile trace is a sync-op, it will sustain few seconds 421 if s.ProfileTraceSecs() > 0 { 422 filepath := profile.GetProfileName(profile.TRACE, s.SpanID.String(), s.EndTime) 423 w := factory.GetWriter(ctx, filepath) 424 err := profile.ProfileTrace(w, s.ProfileTraceSecs()) 425 if err == nil { 426 err = w.Close() 427 } 428 if err != nil { 429 s.AddExtraFields(zap.String(profile.TRACE, err.Error())) 430 } else { 431 s.AddExtraFields(zap.String(profile.TRACE, filepath)) 432 } 433 } 434 // profile system status. 435 if s.ProfileSystemStatus() { 436 s.doProfileSystemStatus(ctx) 437 } 438 s.doneProfile = true 439 } 440 441 var freeMOSpan = func(s *MOSpan) { 442 s.Free() 443 } 444 445 func (s *MOSpan) AddExtraFields(fields ...zap.Field) { 446 s.ExtraFields = append(s.ExtraFields, fields...) 447 } 448 449 func (s *MOSpan) SpanContext() trace.SpanContext { 450 return s.SpanConfig.SpanContext 451 } 452 453 func (s *MOSpan) ParentSpanContext() trace.SpanContext { 454 return s.SpanConfig.Parent.SpanContext() 455 } 456 457 const timestampFormatter = "2006-01-02 15:04:05.000000" 458 459 func Time2DatetimeString(t time.Time) string { 460 return t.Format(timestampFormatter) 461 } 462 463 var jsonEncoder zapcore.Encoder 464 var jsonEncoderInit sync.Once 465 466 func getEncoder() zapcore.Encoder { 467 jsonEncoderInit.Do(func() { 468 jsonEncoder = zapcore.NewJSONEncoder(zapcore.EncoderConfig{ 469 TimeKey: "", 470 LevelKey: "", 471 NameKey: "", 472 CallerKey: "", 473 FunctionKey: zapcore.OmitKey, 474 MessageKey: "", 475 StacktraceKey: "", 476 SkipLineEnding: true, 477 }) 478 }) 479 return jsonEncoder.Clone() 480 }