github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/stats.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/colexec/execpb"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    23  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    24  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    25  )
    26  
    27  // VectorizedStatsCollector collects VectorizedStats on Operators.
    28  //
    29  // If two Operators are connected (i.e. one is an input to another), the
    30  // corresponding VectorizedStatsCollectors are also "connected" by sharing a
    31  // StopWatch.
    32  type VectorizedStatsCollector struct {
    33  	colexecbase.Operator
    34  	NonExplainable
    35  	execpb.VectorizedStats
    36  	idTagKey string
    37  
    38  	// inputWatch is a single stop watch that is shared with all the input
    39  	// Operators. If the Operator doesn't have any inputs (like colBatchScan),
    40  	// it is not shared with anyone. It is used by the wrapped Operator to
    41  	// measure its stall or execution time.
    42  	inputWatch *timeutil.StopWatch
    43  	// outputWatch is a stop watch that is shared with the Operator that the
    44  	// wrapped Operator is feeding into. It must be started right before
    45  	// returning a batch when Nexted. It is used by the "output" Operator.
    46  	outputWatch *timeutil.StopWatch
    47  
    48  	memMonitors  []*mon.BytesMonitor
    49  	diskMonitors []*mon.BytesMonitor
    50  }
    51  
    52  var _ colexecbase.Operator = &VectorizedStatsCollector{}
    53  
    54  // NewVectorizedStatsCollector creates a new VectorizedStatsCollector which
    55  // wraps 'op' that corresponds to a component with either ProcessorID or
    56  // StreamID 'id' (with 'idTagKey' distinguishing between the two). 'isStall'
    57  // indicates whether stall or execution time is being measured. 'inputWatch'
    58  // must be non-nil.
    59  func NewVectorizedStatsCollector(
    60  	op colexecbase.Operator,
    61  	id int32,
    62  	idTagKey string,
    63  	isStall bool,
    64  	inputWatch *timeutil.StopWatch,
    65  	memMonitors []*mon.BytesMonitor,
    66  	diskMonitors []*mon.BytesMonitor,
    67  ) *VectorizedStatsCollector {
    68  	if inputWatch == nil {
    69  		colexecerror.InternalError("input watch for VectorizedStatsCollector is nil")
    70  	}
    71  	return &VectorizedStatsCollector{
    72  		Operator:        op,
    73  		VectorizedStats: execpb.VectorizedStats{ID: id, Stall: isStall},
    74  		idTagKey:        idTagKey,
    75  		inputWatch:      inputWatch,
    76  		memMonitors:     memMonitors,
    77  		diskMonitors:    diskMonitors,
    78  	}
    79  }
    80  
    81  // SetOutputWatch sets vsc.outputWatch to outputWatch. It is used to "connect"
    82  // this VectorizedStatsCollector to the next one in the chain.
    83  func (vsc *VectorizedStatsCollector) SetOutputWatch(outputWatch *timeutil.StopWatch) {
    84  	vsc.outputWatch = outputWatch
    85  }
    86  
    87  // Next is part of Operator interface.
    88  func (vsc *VectorizedStatsCollector) Next(ctx context.Context) coldata.Batch {
    89  	if vsc.outputWatch != nil {
    90  		// vsc.outputWatch is non-nil which means that this Operator is outputting
    91  		// the batches into another one. In order to avoid double counting the time
    92  		// actually spent in the current "input" Operator, we're stopping the stop
    93  		// watch of the other "output" Operator before doing any computations here.
    94  		vsc.outputWatch.Stop()
    95  	}
    96  
    97  	var batch coldata.Batch
    98  	if vsc.VectorizedStats.Stall {
    99  		// We're measuring stall time, so there are no inputs into the wrapped
   100  		// Operator, and we need to start the stop watch ourselves.
   101  		vsc.inputWatch.Start()
   102  	}
   103  	batch = vsc.Operator.Next(ctx)
   104  	if batch.Length() > 0 {
   105  		vsc.NumBatches++
   106  		vsc.NumTuples += int64(batch.Length())
   107  	}
   108  	vsc.inputWatch.Stop()
   109  	if vsc.outputWatch != nil {
   110  		// vsc.outputWatch is non-nil which means that this Operator is outputting
   111  		// the batches into another one. To allow for measuring the execution time
   112  		// of that other Operator, we're starting the stop watch right before
   113  		// returning batch.
   114  		vsc.outputWatch.Start()
   115  	}
   116  	return batch
   117  }
   118  
   119  // finalizeStats records the time measured by the stop watch into the stats as
   120  // well as the memory and disk usage.
   121  func (vsc *VectorizedStatsCollector) finalizeStats() {
   122  	vsc.Time = vsc.inputWatch.Elapsed()
   123  	for _, memMon := range vsc.memMonitors {
   124  		vsc.MaxAllocatedMem += memMon.MaximumBytes()
   125  	}
   126  	for _, diskMon := range vsc.diskMonitors {
   127  		vsc.MaxAllocatedDisk += diskMon.MaximumBytes()
   128  	}
   129  }
   130  
   131  // OutputStats outputs the vectorized stats collected by vsc into ctx.
   132  func (vsc *VectorizedStatsCollector) OutputStats(
   133  	ctx context.Context, flowID string, deterministicStats bool,
   134  ) {
   135  	if vsc.ID < 0 {
   136  		// Ignore this stats collector since it is not associated with any
   137  		// component.
   138  		return
   139  	}
   140  	// We're creating a new span for every component setting the appropriate
   141  	// tag so that it is displayed correctly on the flow diagram.
   142  	// TODO(yuzefovich): these spans are created and finished right away which
   143  	// is not the way they are supposed to be used, so this should be fixed.
   144  	_, span := tracing.ChildSpan(ctx, fmt.Sprintf("%T", vsc.Operator))
   145  	span.SetTag(execinfrapb.FlowIDTagKey, flowID)
   146  	span.SetTag(vsc.idTagKey, vsc.ID)
   147  	vsc.finalizeStats()
   148  	if deterministicStats {
   149  		vsc.VectorizedStats.Time = 0
   150  		vsc.MaxAllocatedMem = 0
   151  		vsc.MaxAllocatedDisk = 0
   152  		vsc.NumBatches = 0
   153  	}
   154  	tracing.SetSpanStats(span, &vsc.VectorizedStats)
   155  	span.Finish()
   156  }