github.com/matrixorigin/matrixone@v1.2.0/pkg/util/trace/impl/motrace/mo_trace.go (about)

     1  // Copyright The OpenTelemetry Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Portions of this file are additionally subject to the following
    16  // copyright.
    17  //
    18  // Copyright (C) 2022 Matrix Origin.
    19  //
    20  // Modified the behavior and the interface of the step.
    21  
    22  package motrace
    23  
    24  import (
    25  	"context"
    26  	"encoding/hex"
    27  	"errors"
    28  	"sync"
    29  	"time"
    30  	"unsafe"
    31  
    32  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    33  	"github.com/matrixorigin/matrixone/pkg/logutil"
    34  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    35  	"github.com/matrixorigin/matrixone/pkg/util/profile"
    36  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    37  
    38  	"go.uber.org/zap"
    39  	"go.uber.org/zap/zapcore"
    40  )
    41  
    42  //nolint:revive // revive complains about stutter of `trace.TraceFlags`.
    43  
    44  var _ trace.Tracer = &MOTracer{}
    45  
    46  // MOTracer is the creator of Spans.
    47  type MOTracer struct {
    48  	trace.TracerConfig
    49  	provider *MOTracerProvider
    50  }
    51  
    52  // Start starts a Span and returns it along with a context containing it.
    53  //
    54  // The Span is created with the provided name and as a child of any existing
    55  // span context found in passed context. The created Span will be
    56  // configured appropriately by any SpanOption passed.
    57  //
    58  // Only timeout Span can be recorded, hold in SpanConfig.LongTimeThreshold.
    59  // There are 4 diff ways to setting threshold value:
    60  // 1. using default val, which was hold by MOTracerProvider.longSpanTime.
    61  // 2. using `WithLongTimeThreshold()` SpanOption, that will override the default val.
    62  // 3. passing the Deadline context, then it will check the Deadline at Span ended instead of checking Threshold.
    63  // 4. using `WithHungThreshold()` SpanOption, that will override the passed context with context.WithTimeout(ctx, hungThreshold)
    64  // and create a new work goroutine to check Deadline event.
    65  //
    66  // When Span pass timeout threshold or got the deadline event, not only the Span will be recorded,
    67  // but also trigger profile dump specify by `WithProfileGoroutine()`, `WithProfileHeap()`, `WithProfileThreadCreate()`,
    68  // `WithProfileAllocs()`, `WithProfileBlock()`, `WithProfileMutex()`, `WithProfileCpuSecs()`, `WithProfileTraceSecs()` SpanOption.
    69  func (t *MOTracer) Start(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
    70  	if !t.IsEnable(opts...) {
    71  		return ctx, trace.NoopSpan{}
    72  	}
    73  
    74  	span := newMOSpan()
    75  
    76  	span.tracer = t
    77  	span.ctx = ctx
    78  	span.init(name, opts...)
    79  
    80  	parent := trace.SpanFromContext(ctx)
    81  	psc := parent.SpanContext()
    82  
    83  	if span.NewRoot || psc.IsEmpty() {
    84  		span.TraceID, span.SpanID = t.provider.idGenerator.NewIDs()
    85  		span.Parent = trace.NoopSpan{}
    86  	} else {
    87  		span.TraceID, span.SpanID = psc.TraceID, t.provider.idGenerator.NewSpanID()
    88  		span.Parent = parent
    89  	}
    90  
    91  	// handle HungThreshold
    92  	if threshold := span.SpanConfig.HungThreshold(); threshold > 0 {
    93  		s := newMOHungSpan(span)
    94  		return trace.ContextWithSpan(ctx, s), s
    95  	}
    96  
    97  	return trace.ContextWithSpan(ctx, span), span
    98  }
    99  
   100  func (t *MOTracer) Debug(ctx context.Context, name string, opts ...trace.SpanStartOption) (context.Context, trace.Span) {
   101  	if !t.provider.debugMode {
   102  		return ctx, trace.NoopSpan{}
   103  	}
   104  	return t.Start(ctx, name, opts...)
   105  }
   106  
   107  func (t *MOTracer) IsEnable(opts ...trace.SpanStartOption) bool {
   108  	var cfg trace.SpanConfig
   109  	for idx := range opts {
   110  		opts[idx].ApplySpanStart(&cfg)
   111  	}
   112  
   113  	enable := t.provider.IsEnable()
   114  
   115  	// check if is this span kind controlled by mo_ctl.
   116  	if has, state, _ := trace.IsMOCtledSpan(cfg.Kind); has {
   117  		return enable && state
   118  	}
   119  
   120  	return enable
   121  }
   122  
   123  var _ trace.Span = (*MOHungSpan)(nil)
   124  
   125  type MOHungSpan struct {
   126  	*MOSpan
   127  	// quitCtx is used to stop the work goroutine loop().
   128  	// Because of quitCtx and deadlineCtx are based on init span.ctx, both can be canceled outside span.
   129  	// So, quitCtx can help to detect this situation, like loop().
   130  	quitCtx context.Context
   131  	// quitCancel cancel quitCtx in End()
   132  	quitCancel context.CancelFunc
   133  	trigger    *time.Timer
   134  	stop       func()
   135  	stopped    bool
   136  	// mux control doProfile exec order, called in loop and End
   137  	mux sync.Mutex
   138  }
   139  
   140  func newMOHungSpan(span *MOSpan) *MOHungSpan {
   141  	ctx, cancel := context.WithTimeout(span.ctx, span.SpanConfig.HungThreshold())
   142  	span.ctx = ctx
   143  	s := &MOHungSpan{
   144  		MOSpan:     span,
   145  		quitCtx:    ctx,
   146  		quitCancel: cancel,
   147  	}
   148  	s.trigger = time.AfterFunc(s.HungThreshold(), func() {
   149  		s.mux.Lock()
   150  		defer s.mux.Unlock()
   151  		if e := s.quitCtx.Err(); errors.Is(e, context.Canceled) || s.stopped {
   152  			return
   153  		}
   154  		s.doProfile()
   155  		logutil.Warn("span trigger hung threshold",
   156  			trace.SpanField(s.SpanContext()),
   157  			zap.String("span_name", s.Name),
   158  			zap.Duration("threshold", s.HungThreshold()))
   159  	})
   160  	s.stop = func() {
   161  		s.trigger.Stop()
   162  		s.quitCancel()
   163  		s.stopped = true
   164  	}
   165  	return s
   166  }
   167  
   168  func (s *MOHungSpan) End(options ...trace.SpanEndOption) {
   169  	s.mux.Lock()
   170  	defer s.mux.Unlock()
   171  	s.stop()
   172  	s.MOSpan.End(options...)
   173  }
   174  
   175  var _ trace.Span = (*MOSpan)(nil)
   176  
   177  // MOSpan implement export.IBuffer2SqlItem and export.CsvFields
   178  type MOSpan struct {
   179  	trace.SpanConfig
   180  	Name      string    `json:"name"`
   181  	StartTime time.Time `json:"start_time"`
   182  	EndTime   time.Time `jons:"end_time"`
   183  	// Duration
   184  	Duration time.Duration `json:"duration"`
   185  	// ExtraFields
   186  	ExtraFields []zap.Field `json:"extra"`
   187  
   188  	tracer     *MOTracer
   189  	ctx        context.Context
   190  	needRecord bool
   191  
   192  	doneProfile bool
   193  	onEnd       []func()
   194  }
   195  
   196  var spanPool = &sync.Pool{New: func() any {
   197  	return &MOSpan{
   198  		onEnd: make([]func(), 0, 2), // speedup first append op
   199  	}
   200  }}
   201  
   202  func newMOSpan() *MOSpan {
   203  	return spanPool.Get().(*MOSpan)
   204  }
   205  
   206  func (s *MOSpan) init(name string, opts ...trace.SpanStartOption) {
   207  	s.Name = name
   208  	s.StartTime = time.Now()
   209  	s.LongTimeThreshold = s.tracer.provider.longSpanTime
   210  	for _, opt := range opts {
   211  		opt.ApplySpanStart(&s.SpanConfig)
   212  	}
   213  }
   214  
   215  func (s *MOSpan) Size() int64 {
   216  	return int64(unsafe.Sizeof(*s)) + int64(len(s.Name))
   217  }
   218  
   219  func (s *MOSpan) Free() {
   220  	s.SpanConfig.Reset()
   221  	s.Parent = nil
   222  	s.Name = ""
   223  	s.tracer = nil
   224  	s.ctx = nil
   225  	s.needRecord = false
   226  	s.ExtraFields = nil
   227  	s.StartTime = table.ZeroTime
   228  	s.EndTime = table.ZeroTime
   229  	s.doneProfile = false
   230  	s.onEnd = s.onEnd[:0]
   231  	spanPool.Put(s)
   232  }
   233  
   234  func (s *MOSpan) GetName() string {
   235  	return spanView.OriginTable.GetName()
   236  }
   237  
   238  func (s *MOSpan) GetTable() *table.Table { return spanView.OriginTable }
   239  
   240  func (s *MOSpan) FillRow(ctx context.Context, row *table.Row) {
   241  	row.Reset()
   242  	row.SetColumnVal(rawItemCol, table.StringField(spanView.Table))
   243  	row.SetColumnVal(spanIDCol, table.StringField(hex.EncodeToString(s.SpanID[:])))
   244  	row.SetColumnVal(traceIDCol, table.UuidField(s.TraceID[:]))
   245  	row.SetColumnVal(spanKindCol, table.StringField(s.Kind.String()))
   246  	psc := s.Parent.SpanContext()
   247  	if psc.SpanID != trace.NilSpanID {
   248  		row.SetColumnVal(parentSpanIDCol, table.StringField(hex.EncodeToString(psc.SpanID[:])))
   249  	}
   250  	row.SetColumnVal(nodeUUIDCol, table.StringField(GetNodeResource().NodeUuid))
   251  	row.SetColumnVal(nodeTypeCol, table.StringField(GetNodeResource().NodeType))
   252  	row.SetColumnVal(spanNameCol, table.StringField(s.Name))
   253  	row.SetColumnVal(startTimeCol, table.TimeField(s.StartTime))
   254  	row.SetColumnVal(endTimeCol, table.TimeField(s.EndTime))
   255  	row.SetColumnVal(timestampCol, table.TimeField(s.EndTime))
   256  	row.SetColumnVal(durationCol, table.Uint64Field(uint64(s.Duration)))
   257  	row.SetColumnVal(resourceCol, table.StringField(s.tracer.provider.resource.String()))
   258  
   259  	// fill extra fields
   260  	if len(s.ExtraFields) > 0 {
   261  		encoder := getEncoder()
   262  		buf, err := encoder.EncodeEntry(zapcore.Entry{}, s.ExtraFields)
   263  		if buf != nil {
   264  			defer buf.Free()
   265  		}
   266  		if err != nil {
   267  			moerr.ConvertGoError(ctx, err)
   268  		} else {
   269  			row.SetColumnVal(extraCol, table.StringField(buf.String()))
   270  		}
   271  	}
   272  }
   273  
   274  // End completes the Span. Span will be recorded if meets the following condition:
   275  // 1. If set Deadline in ctx, which specified at the MOTracer.Start, just check if encounters the Deadline.
   276  // 2. If NOT set Deadline, then check condition: MOSpan.Duration >= MOSpan.GetLongTimeThreshold().
   277  func (s *MOSpan) End(options ...trace.SpanEndOption) {
   278  	var err error
   279  	s.EndTime = time.Now()
   280  	for _, fn := range s.onEnd {
   281  		fn()
   282  	}
   283  
   284  	s.needRecord, err = s.NeedRecord()
   285  
   286  	if !s.needRecord {
   287  		freeMOSpan(s)
   288  		return
   289  	}
   290  	// apply End option
   291  	for _, opt := range options {
   292  		opt.ApplySpanEnd(&s.SpanConfig)
   293  	}
   294  
   295  	s.AddExtraFields(s.SpanConfig.Extra...)
   296  
   297  	// do profile
   298  	if s.NeedProfile() {
   299  		s.doProfile()
   300  	}
   301  	// record error info
   302  	if err != nil {
   303  		s.ExtraFields = append(s.ExtraFields, zap.Error(err))
   304  	}
   305  	// do Collect
   306  	for _, sp := range s.tracer.provider.spanProcessors {
   307  		sp.OnEnd(s)
   308  	}
   309  }
   310  
   311  func (s *MOSpan) doProfileRuntime(ctx context.Context, name string, debug int) {
   312  	now := time.Now()
   313  	factory := s.tracer.provider.writerFactory
   314  	filepath := profile.GetProfileName(name, s.SpanID.String(), now)
   315  	w := factory.GetWriter(ctx, filepath)
   316  	err := profile.ProfileRuntime(name, w, debug)
   317  	if err == nil {
   318  		err = w.Close()
   319  	}
   320  	if err != nil {
   321  		s.AddExtraFields(zap.String(name, err.Error()))
   322  	} else {
   323  		s.AddExtraFields(zap.String(name, filepath))
   324  	}
   325  }
   326  
   327  func (s *MOSpan) doProfileSystemStatus(ctx context.Context) {
   328  	if s.SpanConfig.ProfileSystemStatusFn == nil {
   329  		return
   330  	}
   331  	data, err := s.SpanConfig.ProfileSystemStatusFn()
   332  	if err != nil {
   333  		s.AddExtraFields(zap.String(profile.STATUS, err.Error()))
   334  		return
   335  	}
   336  	if data == nil {
   337  		return
   338  	}
   339  
   340  	factory := s.tracer.provider.writerFactory
   341  	filepath := profile.GetSystemStatusFilePath(s.SpanID.String(), time.Now())
   342  	logutil.Infof("system status dumped to file %s", filepath)
   343  	w := factory.GetWriter(ctx, filepath)
   344  	_, err = w.Write(data)
   345  	if err == nil {
   346  		err = w.Close()
   347  	}
   348  	if err != nil {
   349  		s.AddExtraFields(zap.String(profile.STATUS, err.Error()))
   350  	} else {
   351  		s.AddExtraFields(zap.String(profile.STATUS, filepath))
   352  	}
   353  }
   354  
   355  func (s *MOSpan) NeedRecord() (bool, error) {
   356  	// if the span kind falls in mo_ctl controlled spans, we
   357  	// hope it ignores the long time threshold set by the tracer and deadline restrictions.
   358  	// but the threshold set by mo ctl need to be considered
   359  	if has, state, threshold := trace.IsMOCtledSpan(s.Kind); has {
   360  		return state && (s.Duration >= threshold), nil
   361  	}
   362  	// the default logic that before mo_ctl controlled spans have been introduced
   363  	deadline, hasDeadline := s.ctx.Deadline()
   364  	s.Duration = s.EndTime.Sub(s.StartTime)
   365  	if hasDeadline {
   366  		if s.EndTime.After(deadline) {
   367  			return true, s.ctx.Err()
   368  		}
   369  	} else {
   370  		return s.Duration >= s.LongTimeThreshold, nil
   371  	}
   372  	return false, nil
   373  }
   374  
   375  // doProfile is sync op.
   376  func (s *MOSpan) doProfile() {
   377  	if s.doneProfile {
   378  		return
   379  	}
   380  	factory := s.tracer.provider.writerFactory
   381  	ctx := DefaultContext()
   382  	// do profile goroutine txt
   383  	if s.ProfileGoroutine() {
   384  		s.doProfileRuntime(ctx, profile.GOROUTINE, 2)
   385  	}
   386  	// do profile heap pprof
   387  	if s.ProfileHeap() {
   388  		s.doProfileRuntime(ctx, profile.HEAP, 0)
   389  	}
   390  	// do profile allocs
   391  	if s.ProfileAllocs() {
   392  		s.doProfileRuntime(ctx, profile.ALLOCS, 0)
   393  	}
   394  	// do profile threadcreate
   395  	if s.ProfileThreadCreate() {
   396  		s.doProfileRuntime(ctx, profile.THREADCREATE, 0)
   397  	}
   398  	// do profile block
   399  	if s.ProfileBlock() {
   400  		s.doProfileRuntime(ctx, profile.BLOCK, 0)
   401  	}
   402  	// do profile mutex
   403  	if s.ProfileMutex() {
   404  		s.doProfileRuntime(ctx, profile.MUTEX, 0)
   405  	}
   406  	// profile cpu should be the last one op, caused by it will sustain few seconds
   407  	if s.ProfileCpuSecs() > 0 {
   408  		filepath := profile.GetProfileName(profile.CPU, s.SpanID.String(), s.EndTime)
   409  		w := factory.GetWriter(ctx, filepath)
   410  		err := profile.ProfileCPU(w, s.ProfileCpuSecs())
   411  		if err == nil {
   412  			err = w.Close()
   413  		}
   414  		if err != nil {
   415  			s.AddExtraFields(zap.String(profile.CPU, err.Error()))
   416  		} else {
   417  			s.AddExtraFields(zap.String(profile.CPU, filepath))
   418  		}
   419  	}
   420  	// profile trace is a sync-op, it will sustain few seconds
   421  	if s.ProfileTraceSecs() > 0 {
   422  		filepath := profile.GetProfileName(profile.TRACE, s.SpanID.String(), s.EndTime)
   423  		w := factory.GetWriter(ctx, filepath)
   424  		err := profile.ProfileTrace(w, s.ProfileTraceSecs())
   425  		if err == nil {
   426  			err = w.Close()
   427  		}
   428  		if err != nil {
   429  			s.AddExtraFields(zap.String(profile.TRACE, err.Error()))
   430  		} else {
   431  			s.AddExtraFields(zap.String(profile.TRACE, filepath))
   432  		}
   433  	}
   434  	// profile system status.
   435  	if s.ProfileSystemStatus() {
   436  		s.doProfileSystemStatus(ctx)
   437  	}
   438  	s.doneProfile = true
   439  }
   440  
   441  var freeMOSpan = func(s *MOSpan) {
   442  	s.Free()
   443  }
   444  
   445  func (s *MOSpan) AddExtraFields(fields ...zap.Field) {
   446  	s.ExtraFields = append(s.ExtraFields, fields...)
   447  }
   448  
   449  func (s *MOSpan) SpanContext() trace.SpanContext {
   450  	return s.SpanConfig.SpanContext
   451  }
   452  
   453  func (s *MOSpan) ParentSpanContext() trace.SpanContext {
   454  	return s.SpanConfig.Parent.SpanContext()
   455  }
   456  
   457  const timestampFormatter = "2006-01-02 15:04:05.000000"
   458  
   459  func Time2DatetimeString(t time.Time) string {
   460  	return t.Format(timestampFormatter)
   461  }
   462  
   463  var jsonEncoder zapcore.Encoder
   464  var jsonEncoderInit sync.Once
   465  
   466  func getEncoder() zapcore.Encoder {
   467  	jsonEncoderInit.Do(func() {
   468  		jsonEncoder = zapcore.NewJSONEncoder(zapcore.EncoderConfig{
   469  			TimeKey:        "",
   470  			LevelKey:       "",
   471  			NameKey:        "",
   472  			CallerKey:      "",
   473  			FunctionKey:    zapcore.OmitKey,
   474  			MessageKey:     "",
   475  			StacktraceKey:  "",
   476  			SkipLineEnding: true,
   477  		})
   478  	})
   479  	return jsonEncoder.Clone()
   480  }