github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sample_aggregator.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/axiomhq/hyperloglog"
    18  	"github.com/cockroachdb/cockroach/pkg/jobs"
    19  	"github.com/cockroachdb/cockroach/pkg/kv"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/stats"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    28  	"github.com/cockroachdb/cockroach/pkg/util"
    29  	"github.com/cockroachdb/cockroach/pkg/util/log"
    30  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    31  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    33  	"github.com/cockroachdb/errors"
    34  	"github.com/opentracing/opentracing-go"
    35  )
    36  
    37  // A sample aggregator processor aggregates results from multiple sampler
    38  // processors. See SampleAggregatorSpec for more details.
    39  type sampleAggregator struct {
    40  	execinfra.ProcessorBase
    41  
    42  	spec    *execinfrapb.SampleAggregatorSpec
    43  	input   execinfra.RowSource
    44  	inTypes []*types.T
    45  	sr      stats.SampleReservoir
    46  
    47  	// memAcc accounts for memory accumulated throughout the life of the
    48  	// sampleAggregator.
    49  	memAcc mon.BoundAccount
    50  
    51  	// tempMemAcc is used to account for memory that is allocated temporarily
    52  	// and released before the sampleAggregator is finished.
    53  	tempMemAcc mon.BoundAccount
    54  
    55  	tableID     sqlbase.ID
    56  	sampledCols []sqlbase.ColumnID
    57  	sketches    []sketchInfo
    58  
    59  	// Input column indices for special columns.
    60  	rankCol      int
    61  	sketchIdxCol int
    62  	numRowsCol   int
    63  	numNullsCol  int
    64  	sketchCol    int
    65  }
    66  
    67  var _ execinfra.Processor = &sampleAggregator{}
    68  
    69  const sampleAggregatorProcName = "sample aggregator"
    70  
    71  // SampleAggregatorProgressInterval is the frequency at which the
    72  // SampleAggregator processor will report progress. It is mutable for testing.
    73  var SampleAggregatorProgressInterval = 5 * time.Second
    74  
    75  func newSampleAggregator(
    76  	flowCtx *execinfra.FlowCtx,
    77  	processorID int32,
    78  	spec *execinfrapb.SampleAggregatorSpec,
    79  	input execinfra.RowSource,
    80  	post *execinfrapb.PostProcessSpec,
    81  	output execinfra.RowReceiver,
    82  ) (*sampleAggregator, error) {
    83  	for _, s := range spec.Sketches {
    84  		if len(s.Columns) == 0 {
    85  			return nil, errors.Errorf("no columns")
    86  		}
    87  		if _, ok := supportedSketchTypes[s.SketchType]; !ok {
    88  			return nil, errors.Errorf("unsupported sketch type %s", s.SketchType)
    89  		}
    90  		if s.GenerateHistogram && s.HistogramMaxBuckets == 0 {
    91  			return nil, errors.Errorf("histogram max buckets not specified")
    92  		}
    93  		if s.GenerateHistogram && len(s.Columns) != 1 {
    94  			return nil, errors.Errorf("histograms require one column")
    95  		}
    96  	}
    97  
    98  	ctx := flowCtx.EvalCtx.Ctx()
    99  	// Limit the memory use by creating a child monitor with a hard limit.
   100  	// The processor will disable histogram collection if this limit is not
   101  	// enough.
   102  	memMonitor := execinfra.NewLimitedMonitor(ctx, flowCtx.EvalCtx.Mon, flowCtx.Cfg, "sample-aggregator-mem")
   103  	rankCol := len(input.OutputTypes()) - 5
   104  	s := &sampleAggregator{
   105  		spec:         spec,
   106  		input:        input,
   107  		inTypes:      input.OutputTypes(),
   108  		memAcc:       memMonitor.MakeBoundAccount(),
   109  		tempMemAcc:   memMonitor.MakeBoundAccount(),
   110  		tableID:      spec.TableID,
   111  		sampledCols:  spec.SampledColumnIDs,
   112  		sketches:     make([]sketchInfo, len(spec.Sketches)),
   113  		rankCol:      rankCol,
   114  		sketchIdxCol: rankCol + 1,
   115  		numRowsCol:   rankCol + 2,
   116  		numNullsCol:  rankCol + 3,
   117  		sketchCol:    rankCol + 4,
   118  	}
   119  
   120  	var sampleCols util.FastIntSet
   121  	for i := range spec.Sketches {
   122  		s.sketches[i] = sketchInfo{
   123  			spec:     spec.Sketches[i],
   124  			sketch:   hyperloglog.New14(),
   125  			numNulls: 0,
   126  			numRows:  0,
   127  		}
   128  		if spec.Sketches[i].GenerateHistogram {
   129  			sampleCols.Add(int(spec.Sketches[i].Columns[0]))
   130  		}
   131  	}
   132  
   133  	s.sr.Init(int(spec.SampleSize), input.OutputTypes()[:rankCol], &s.memAcc, sampleCols)
   134  
   135  	if err := s.Init(
   136  		nil, post, input.OutputTypes(), flowCtx, processorID, output, memMonitor,
   137  		execinfra.ProcStateOpts{
   138  			TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata {
   139  				s.close()
   140  				return nil
   141  			},
   142  		},
   143  	); err != nil {
   144  		return nil, err
   145  	}
   146  	return s, nil
   147  }
   148  
   149  func (s *sampleAggregator) pushTrailingMeta(ctx context.Context) {
   150  	execinfra.SendTraceData(ctx, s.Out.Output())
   151  }
   152  
   153  // Run is part of the Processor interface.
   154  func (s *sampleAggregator) Run(ctx context.Context) {
   155  	s.input.Start(ctx)
   156  	s.StartInternal(ctx, sampleAggregatorProcName)
   157  
   158  	earlyExit, err := s.mainLoop(s.Ctx)
   159  	if err != nil {
   160  		execinfra.DrainAndClose(s.Ctx, s.Out.Output(), err, s.pushTrailingMeta, s.input)
   161  	} else if !earlyExit {
   162  		s.pushTrailingMeta(s.Ctx)
   163  		s.input.ConsumerClosed()
   164  		s.Out.Close()
   165  	}
   166  	s.MoveToDraining(nil /* err */)
   167  }
   168  
   169  func (s *sampleAggregator) close() {
   170  	if s.InternalClose() {
   171  		s.memAcc.Close(s.Ctx)
   172  		s.tempMemAcc.Close(s.Ctx)
   173  		s.MemMonitor.Stop(s.Ctx)
   174  	}
   175  }
   176  
   177  func (s *sampleAggregator) mainLoop(ctx context.Context) (earlyExit bool, err error) {
   178  	var job *jobs.Job
   179  	jobID := s.spec.JobID
   180  	// Some tests run this code without a job, so check if the jobID is 0.
   181  	if jobID != 0 {
   182  		job, err = s.FlowCtx.Cfg.JobRegistry.LoadJob(ctx, s.spec.JobID)
   183  		if err != nil {
   184  			return false, err
   185  		}
   186  	}
   187  
   188  	lastReportedFractionCompleted := float32(-1)
   189  	// Report progress (0 to 1).
   190  	progFn := func(fractionCompleted float32) error {
   191  		if jobID == 0 {
   192  			return nil
   193  		}
   194  		// If it changed by less than 1%, just check for cancellation (which is more
   195  		// efficient).
   196  		if fractionCompleted < 1.0 && fractionCompleted < lastReportedFractionCompleted+0.01 {
   197  			return job.CheckStatus(ctx)
   198  		}
   199  		lastReportedFractionCompleted = fractionCompleted
   200  		return job.FractionProgressed(ctx, jobs.FractionUpdater(fractionCompleted))
   201  	}
   202  
   203  	var rowsProcessed uint64
   204  	progressUpdates := util.Every(SampleAggregatorProgressInterval)
   205  	var da sqlbase.DatumAlloc
   206  	var tmpSketch hyperloglog.Sketch
   207  	for {
   208  		row, meta := s.input.Next()
   209  		if meta != nil {
   210  			if meta.SamplerProgress != nil {
   211  				rowsProcessed += meta.SamplerProgress.RowsProcessed
   212  				if progressUpdates.ShouldProcess(timeutil.Now()) {
   213  					// Periodically report fraction progressed and check that the job has
   214  					// not been paused or canceled.
   215  					var fractionCompleted float32
   216  					if s.spec.RowsExpected > 0 {
   217  						fractionCompleted = float32(float64(rowsProcessed) / float64(s.spec.RowsExpected))
   218  						const maxProgress = 0.99
   219  						if fractionCompleted > maxProgress {
   220  							// Since the total number of rows expected is just an estimate,
   221  							// don't report more than 99% completion until the very end.
   222  							fractionCompleted = maxProgress
   223  						}
   224  					}
   225  
   226  					if err := progFn(fractionCompleted); err != nil {
   227  						return false, err
   228  					}
   229  				}
   230  				if meta.SamplerProgress.HistogramDisabled {
   231  					// One of the sampler processors probably ran out of memory while
   232  					// collecting histogram samples. Disable sample collection so we
   233  					// don't create a biased histogram.
   234  					s.sr.Disable()
   235  				}
   236  			} else if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) {
   237  				// No cleanup required; emitHelper() took care of it.
   238  				return true, nil
   239  			}
   240  			continue
   241  		}
   242  		if row == nil {
   243  			break
   244  		}
   245  
   246  		// The row is either:
   247  		//  - a sampled row, which has NULLs on all columns from sketchIdxCol
   248  		//    onward, or
   249  		//  - a sketch row, which has all NULLs on all columns before sketchIdxCol.
   250  		if row[s.sketchIdxCol].IsNull() {
   251  			// This must be a sampled row.
   252  			rank, err := row[s.rankCol].GetInt()
   253  			if err != nil {
   254  				return false, errors.NewAssertionErrorWithWrappedErrf(err, "decoding rank column")
   255  			}
   256  			// Retain the rows with the top ranks.
   257  			if err := s.sr.SampleRow(ctx, s.EvalCtx, row[:s.rankCol], uint64(rank)); err != nil {
   258  				if code := pgerror.GetPGCode(err); code != pgcode.OutOfMemory {
   259  					return false, err
   260  				}
   261  				// We hit an out of memory error. Clear the sample reservoir and
   262  				// disable histogram sample collection.
   263  				s.sr.Disable()
   264  				log.Info(ctx, "disabling histogram collection due to excessive memory utilization")
   265  			}
   266  			continue
   267  		}
   268  		// This is a sketch row.
   269  		sketchIdx, err := row[s.sketchIdxCol].GetInt()
   270  		if err != nil {
   271  			return false, err
   272  		}
   273  		if sketchIdx < 0 || sketchIdx > int64(len(s.sketches)) {
   274  			return false, errors.Errorf("invalid sketch index %d", sketchIdx)
   275  		}
   276  
   277  		numRows, err := row[s.numRowsCol].GetInt()
   278  		if err != nil {
   279  			return false, err
   280  		}
   281  		s.sketches[sketchIdx].numRows += numRows
   282  
   283  		numNulls, err := row[s.numNullsCol].GetInt()
   284  		if err != nil {
   285  			return false, err
   286  		}
   287  		s.sketches[sketchIdx].numNulls += numNulls
   288  
   289  		// Decode the sketch.
   290  		if err := row[s.sketchCol].EnsureDecoded(s.inTypes[s.sketchCol], &da); err != nil {
   291  			return false, err
   292  		}
   293  		d := row[s.sketchCol].Datum
   294  		if d == tree.DNull {
   295  			return false, errors.AssertionFailedf("NULL sketch data")
   296  		}
   297  		if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil {
   298  			return false, err
   299  		}
   300  		if err := s.sketches[sketchIdx].sketch.Merge(&tmpSketch); err != nil {
   301  			return false, errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data")
   302  		}
   303  	}
   304  	// Report progress one last time so we don't write results if the job was
   305  	// canceled.
   306  	if err = progFn(1.0); err != nil {
   307  		return false, err
   308  	}
   309  	return false, s.writeResults(ctx)
   310  }
   311  
   312  // writeResults inserts the new statistics into system.table_statistics.
   313  func (s *sampleAggregator) writeResults(ctx context.Context) error {
   314  	// Turn off tracing so these writes don't affect the results of EXPLAIN
   315  	// ANALYZE.
   316  	if span := opentracing.SpanFromContext(ctx); span != nil && tracing.IsRecording(span) {
   317  		// TODO(rytaft): this also hides writes in this function from SQL session
   318  		// traces.
   319  		ctx = opentracing.ContextWithSpan(ctx, nil)
   320  	}
   321  
   322  	// TODO(andrei): This method would benefit from a session interface on the
   323  	// internal executor instead of doing this weird thing where it uses the
   324  	// internal executor to execute one statement at a time inside a db.Txn()
   325  	// closure.
   326  	if err := s.FlowCtx.Cfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
   327  		for _, si := range s.sketches {
   328  			distinctCount := int64(si.sketch.Estimate())
   329  			var histogram *stats.HistogramData
   330  			if si.spec.GenerateHistogram && len(s.sr.Get()) != 0 {
   331  				colIdx := int(si.spec.Columns[0])
   332  				typ := s.inTypes[colIdx]
   333  
   334  				h, err := s.generateHistogram(
   335  					ctx,
   336  					s.EvalCtx,
   337  					s.sr.Get(),
   338  					colIdx,
   339  					typ,
   340  					si.numRows-si.numNulls,
   341  					distinctCount,
   342  					int(si.spec.HistogramMaxBuckets),
   343  				)
   344  				if err != nil {
   345  					return err
   346  				}
   347  				histogram = &h
   348  			}
   349  
   350  			columnIDs := make([]sqlbase.ColumnID, len(si.spec.Columns))
   351  			for i, c := range si.spec.Columns {
   352  				columnIDs[i] = s.sampledCols[c]
   353  			}
   354  
   355  			// Delete old stats that have been superseded.
   356  			if err := stats.DeleteOldStatsForColumns(
   357  				ctx,
   358  				s.FlowCtx.Cfg.Executor,
   359  				txn,
   360  				s.tableID,
   361  				columnIDs,
   362  			); err != nil {
   363  				return err
   364  			}
   365  
   366  			// Insert the new stat.
   367  			if err := stats.InsertNewStat(
   368  				ctx,
   369  				s.FlowCtx.Cfg.Executor,
   370  				txn,
   371  				s.tableID,
   372  				si.spec.StatName,
   373  				columnIDs,
   374  				si.numRows,
   375  				distinctCount,
   376  				si.numNulls,
   377  				histogram,
   378  			); err != nil {
   379  				return err
   380  			}
   381  
   382  			// Release any memory temporarily used for this statistic.
   383  			s.tempMemAcc.Clear(ctx)
   384  		}
   385  
   386  		return nil
   387  	}); err != nil {
   388  		return err
   389  	}
   390  
   391  	if g, ok := s.FlowCtx.Cfg.Gossip.Optional(47925); ok {
   392  		// Gossip invalidation of the stat caches for this table.
   393  		return stats.GossipTableStatAdded(g, s.tableID)
   394  	}
   395  	return nil
   396  }
   397  
   398  // generateHistogram returns a histogram (on a given column) from a set of
   399  // samples.
   400  // numRows is the total number of rows from which values were sampled
   401  // (excluding rows that have NULL values on the histogram column).
   402  func (s *sampleAggregator) generateHistogram(
   403  	ctx context.Context,
   404  	evalCtx *tree.EvalContext,
   405  	samples []stats.SampledRow,
   406  	colIdx int,
   407  	colType *types.T,
   408  	numRows int64,
   409  	distinctCount int64,
   410  	maxBuckets int,
   411  ) (stats.HistogramData, error) {
   412  	// Account for the memory we'll use copying the samples into values.
   413  	if err := s.tempMemAcc.Grow(ctx, sizeOfDatum*int64(len(samples))); err != nil {
   414  		return stats.HistogramData{}, err
   415  	}
   416  	values := make(tree.Datums, 0, len(samples))
   417  
   418  	var da sqlbase.DatumAlloc
   419  	for _, sample := range samples {
   420  		ed := &sample.Row[colIdx]
   421  		// Ignore NULLs (they are counted separately).
   422  		if !ed.IsNull() {
   423  			beforeSize := ed.Datum.Size()
   424  			if err := ed.EnsureDecoded(colType, &da); err != nil {
   425  				return stats.HistogramData{}, err
   426  			}
   427  			afterSize := ed.Datum.Size()
   428  
   429  			// Perform memory accounting. This memory is not added to the temporary
   430  			// account since it won't be released until the sampleAggregator is
   431  			// destroyed.
   432  			if afterSize > beforeSize {
   433  				if err := s.memAcc.Grow(ctx, int64(afterSize-beforeSize)); err != nil {
   434  					return stats.HistogramData{}, err
   435  				}
   436  			}
   437  
   438  			values = append(values, ed.Datum)
   439  		}
   440  	}
   441  	return stats.EquiDepthHistogram(evalCtx, values, numRows, distinctCount, maxBuckets)
   442  }