github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sampler.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	"encoding/binary"
    16  	"time"
    17  
    18  	"github.com/axiomhq/hyperloglog"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/stats"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    27  	"github.com/cockroachdb/cockroach/pkg/util"
    28  	"github.com/cockroachdb/cockroach/pkg/util/log"
    29  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    30  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    31  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    32  	"github.com/cockroachdb/errors"
    33  )
    34  
    35  // sketchInfo contains the specification and run-time state for each sketch.
    36  type sketchInfo struct {
    37  	spec     execinfrapb.SketchSpec
    38  	sketch   *hyperloglog.Sketch
    39  	numNulls int64
    40  	numRows  int64
    41  }
    42  
    43  // A sampler processor returns a random sample of rows, as well as "global"
    44  // statistics (including cardinality estimation sketch data). See SamplerSpec
    45  // for more details.
    46  type samplerProcessor struct {
    47  	execinfra.ProcessorBase
    48  
    49  	flowCtx         *execinfra.FlowCtx
    50  	input           execinfra.RowSource
    51  	memAcc          mon.BoundAccount
    52  	sr              stats.SampleReservoir
    53  	sketches        []sketchInfo
    54  	outTypes        []*types.T
    55  	maxFractionIdle float64
    56  
    57  	// Output column indices for special columns.
    58  	rankCol      int
    59  	sketchIdxCol int
    60  	numRowsCol   int
    61  	numNullsCol  int
    62  	sketchCol    int
    63  }
    64  
    65  var _ execinfra.Processor = &samplerProcessor{}
    66  
    67  const samplerProcName = "sampler"
    68  
    69  // SamplerProgressInterval corresponds to the number of input rows after which
    70  // the sampler will report progress by pushing a metadata record.  It is mutable
    71  // for testing.
    72  var SamplerProgressInterval = 10000
    73  
    74  var supportedSketchTypes = map[execinfrapb.SketchType]struct{}{
    75  	// The code currently hardcodes the use of this single type of sketch
    76  	// (which avoids the extra complexity until we actually have multiple types).
    77  	execinfrapb.SketchType_HLL_PLUS_PLUS_V1: {},
    78  }
    79  
    80  // maxIdleSleepTime is the maximum amount of time we sleep for throttling
    81  // (we sleep once every SamplerProgressInterval rows).
    82  const maxIdleSleepTime = 10 * time.Second
    83  
    84  // At 25% average CPU usage we start throttling automatic stats.
    85  const cpuUsageMinThrottle = 0.25
    86  
    87  // At 75% average CPU usage we reach maximum throttling of automatic stats.
    88  const cpuUsageMaxThrottle = 0.75
    89  
    90  func newSamplerProcessor(
    91  	flowCtx *execinfra.FlowCtx,
    92  	processorID int32,
    93  	spec *execinfrapb.SamplerSpec,
    94  	input execinfra.RowSource,
    95  	post *execinfrapb.PostProcessSpec,
    96  	output execinfra.RowReceiver,
    97  ) (*samplerProcessor, error) {
    98  	for _, s := range spec.Sketches {
    99  		if _, ok := supportedSketchTypes[s.SketchType]; !ok {
   100  			return nil, errors.Errorf("unsupported sketch type %s", s.SketchType)
   101  		}
   102  	}
   103  
   104  	ctx := flowCtx.EvalCtx.Ctx()
   105  	// Limit the memory use by creating a child monitor with a hard limit.
   106  	// The processor will disable histogram collection if this limit is not
   107  	// enough.
   108  	memMonitor := execinfra.NewLimitedMonitor(ctx, flowCtx.EvalCtx.Mon, flowCtx.Cfg, "sampler-mem")
   109  	s := &samplerProcessor{
   110  		flowCtx:         flowCtx,
   111  		input:           input,
   112  		memAcc:          memMonitor.MakeBoundAccount(),
   113  		sketches:        make([]sketchInfo, len(spec.Sketches)),
   114  		maxFractionIdle: spec.MaxFractionIdle,
   115  	}
   116  
   117  	var sampleCols util.FastIntSet
   118  	for i := range spec.Sketches {
   119  		s.sketches[i] = sketchInfo{
   120  			spec:     spec.Sketches[i],
   121  			sketch:   hyperloglog.New14(),
   122  			numNulls: 0,
   123  			numRows:  0,
   124  		}
   125  		if spec.Sketches[i].GenerateHistogram {
   126  			sampleCols.Add(int(spec.Sketches[i].Columns[0]))
   127  		}
   128  	}
   129  
   130  	s.sr.Init(int(spec.SampleSize), input.OutputTypes(), &s.memAcc, sampleCols)
   131  
   132  	inTypes := input.OutputTypes()
   133  	outTypes := make([]*types.T, 0, len(inTypes)+5)
   134  
   135  	// First columns are the same as the input.
   136  	outTypes = append(outTypes, inTypes...)
   137  
   138  	// An INT column for the rank of each row.
   139  	s.rankCol = len(outTypes)
   140  	outTypes = append(outTypes, types.Int)
   141  
   142  	// An INT column indicating the sketch index.
   143  	s.sketchIdxCol = len(outTypes)
   144  	outTypes = append(outTypes, types.Int)
   145  
   146  	// An INT column indicating the number of rows processed.
   147  	s.numRowsCol = len(outTypes)
   148  	outTypes = append(outTypes, types.Int)
   149  
   150  	// An INT column indicating the number of rows that have a NULL in all sketch
   151  	// columns.
   152  	s.numNullsCol = len(outTypes)
   153  	outTypes = append(outTypes, types.Int)
   154  
   155  	// A BYTES column with the sketch data.
   156  	s.sketchCol = len(outTypes)
   157  	outTypes = append(outTypes, types.Bytes)
   158  	s.outTypes = outTypes
   159  
   160  	if err := s.Init(
   161  		nil, post, outTypes, flowCtx, processorID, output, memMonitor,
   162  		execinfra.ProcStateOpts{
   163  			TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata {
   164  				s.close()
   165  				return nil
   166  			},
   167  		},
   168  	); err != nil {
   169  		return nil, err
   170  	}
   171  	return s, nil
   172  }
   173  
   174  func (s *samplerProcessor) pushTrailingMeta(ctx context.Context) {
   175  	execinfra.SendTraceData(ctx, s.Out.Output())
   176  }
   177  
   178  // Run is part of the Processor interface.
   179  func (s *samplerProcessor) Run(ctx context.Context) {
   180  	s.input.Start(ctx)
   181  	s.StartInternal(ctx, samplerProcName)
   182  
   183  	earlyExit, err := s.mainLoop(s.Ctx)
   184  	if err != nil {
   185  		execinfra.DrainAndClose(s.Ctx, s.Out.Output(), err, s.pushTrailingMeta, s.input)
   186  	} else if !earlyExit {
   187  		s.pushTrailingMeta(s.Ctx)
   188  		s.input.ConsumerClosed()
   189  		s.Out.Close()
   190  	}
   191  	s.MoveToDraining(nil /* err */)
   192  }
   193  
   194  // TestingSamplerSleep introduces a sleep inside the sampler, every
   195  // <samplerProgressInterval>. Used to simulate a heavily throttled
   196  // run for testing.
   197  var TestingSamplerSleep time.Duration
   198  
   199  func (s *samplerProcessor) mainLoop(ctx context.Context) (earlyExit bool, err error) {
   200  	rng, _ := randutil.NewPseudoRand()
   201  	var da sqlbase.DatumAlloc
   202  	var buf []byte
   203  	rowCount := 0
   204  	lastWakeupTime := timeutil.Now()
   205  	for {
   206  		row, meta := s.input.Next()
   207  		if meta != nil {
   208  			if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) {
   209  				// No cleanup required; emitHelper() took care of it.
   210  				return true, nil
   211  			}
   212  			continue
   213  		}
   214  		if row == nil {
   215  			break
   216  		}
   217  
   218  		rowCount++
   219  		if rowCount%SamplerProgressInterval == 0 {
   220  			// Send a metadata record to check that the consumer is still alive and
   221  			// report number of rows processed since the last update.
   222  			meta := &execinfrapb.ProducerMetadata{SamplerProgress: &execinfrapb.RemoteProducerMetadata_SamplerProgress{
   223  				RowsProcessed: uint64(SamplerProgressInterval),
   224  			}}
   225  			if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) {
   226  				return true, nil
   227  			}
   228  
   229  			if s.maxFractionIdle > 0 {
   230  				// Look at CRDB's average CPU usage in the last 10 seconds:
   231  				//  - if it is lower than cpuUsageMinThrottle, we do not throttle;
   232  				//  - if it is higher than cpuUsageMaxThrottle, we throttle all the way;
   233  				//  - in-between, we scale the idle time proportionally.
   234  				usage := s.flowCtx.Cfg.RuntimeStats.GetCPUCombinedPercentNorm()
   235  
   236  				if usage > cpuUsageMinThrottle {
   237  					fractionIdle := s.maxFractionIdle
   238  					if usage < cpuUsageMaxThrottle {
   239  						fractionIdle *= (usage - cpuUsageMinThrottle) /
   240  							(cpuUsageMaxThrottle - cpuUsageMinThrottle)
   241  					}
   242  					if log.V(1) {
   243  						log.Infof(
   244  							ctx, "throttling to fraction idle %.2f (based on usage %.2f)", fractionIdle, usage,
   245  						)
   246  					}
   247  
   248  					elapsed := timeutil.Now().Sub(lastWakeupTime)
   249  					// Throttle the processor according to fractionIdle.
   250  					// Wait time is calculated as follows:
   251  					//
   252  					//       fraction_idle = t_wait / (t_run + t_wait)
   253  					//  ==>  t_wait = t_run * fraction_idle / (1 - fraction_idle)
   254  					//
   255  					wait := time.Duration(float64(elapsed) * fractionIdle / (1 - fractionIdle))
   256  					if wait > maxIdleSleepTime {
   257  						wait = maxIdleSleepTime
   258  					}
   259  					timer := time.NewTimer(wait)
   260  					defer timer.Stop()
   261  					select {
   262  					case <-timer.C:
   263  						break
   264  					case <-s.flowCtx.Stopper().ShouldStop():
   265  						break
   266  					}
   267  				}
   268  				lastWakeupTime = timeutil.Now()
   269  			}
   270  
   271  			if TestingSamplerSleep != 0 {
   272  				time.Sleep(TestingSamplerSleep)
   273  			}
   274  		}
   275  
   276  		var intbuf [8]byte
   277  		for i := range s.sketches {
   278  			s.sketches[i].numRows++
   279  
   280  			var col uint32
   281  			var useFastPath bool
   282  			if len(s.sketches[i].spec.Columns) == 1 {
   283  				col = s.sketches[i].spec.Columns[0]
   284  				isNull := row[col].IsNull()
   285  				useFastPath = s.outTypes[col].Family() == types.IntFamily && !isNull
   286  			}
   287  
   288  			if useFastPath {
   289  				// Fast path for integers.
   290  				// TODO(radu): make this more general.
   291  				val, err := row[col].GetInt()
   292  				if err != nil {
   293  					return false, err
   294  				}
   295  
   296  				// Note: this encoding is not identical with the one in the general path
   297  				// below, but it achieves the same thing (we want equal integers to
   298  				// encode to equal []bytes). The only caveat is that all samplers must
   299  				// use the same encodings, so changes will require a new SketchType to
   300  				// avoid problems during upgrade.
   301  				//
   302  				// We could use a more efficient hash function and use InsertHash, but
   303  				// it must be a very good hash function (HLL expects the hash values to
   304  				// be uniformly distributed in the 2^64 range). Experiments (on tpcc
   305  				// order_line) with simplistic functions yielded bad results.
   306  				binary.LittleEndian.PutUint64(intbuf[:], uint64(val))
   307  				s.sketches[i].sketch.Insert(intbuf[:])
   308  			} else {
   309  				isNull := true
   310  				buf = buf[:0]
   311  				for _, col := range s.sketches[i].spec.Columns {
   312  					buf, err = row[col].Fingerprint(s.outTypes[col], &da, buf)
   313  					isNull = isNull && row[col].IsNull()
   314  					if err != nil {
   315  						return false, err
   316  					}
   317  				}
   318  				if isNull {
   319  					s.sketches[i].numNulls++
   320  				}
   321  				s.sketches[i].sketch.Insert(buf)
   322  			}
   323  		}
   324  
   325  		// Use Int63 so we don't have headaches converting to DInt.
   326  		rank := uint64(rng.Int63())
   327  		if err := s.sr.SampleRow(ctx, s.EvalCtx, row, rank); err != nil {
   328  			if code := pgerror.GetPGCode(err); code != pgcode.OutOfMemory {
   329  				return false, err
   330  			}
   331  			// We hit an out of memory error. Clear the sample reservoir and
   332  			// disable histogram sample collection.
   333  			s.sr.Disable()
   334  			log.Info(ctx, "disabling histogram collection due to excessive memory utilization")
   335  
   336  			// Send a metadata record so the sample aggregator will also disable
   337  			// histogram collection.
   338  			meta := &execinfrapb.ProducerMetadata{SamplerProgress: &execinfrapb.RemoteProducerMetadata_SamplerProgress{
   339  				HistogramDisabled: true,
   340  			}}
   341  			if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) {
   342  				return true, nil
   343  			}
   344  		}
   345  	}
   346  
   347  	outRow := make(sqlbase.EncDatumRow, len(s.outTypes))
   348  	for i := range outRow {
   349  		outRow[i] = sqlbase.DatumToEncDatum(s.outTypes[i], tree.DNull)
   350  	}
   351  	// Emit the sampled rows.
   352  	for _, sample := range s.sr.Get() {
   353  		copy(outRow, sample.Row)
   354  		outRow[s.rankCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(sample.Rank))}
   355  		if !emitHelper(ctx, &s.Out, outRow, nil /* meta */, s.pushTrailingMeta, s.input) {
   356  			return true, nil
   357  		}
   358  	}
   359  	// Release the memory for the sampled rows.
   360  	s.sr = stats.SampleReservoir{}
   361  
   362  	// Emit the sketch rows.
   363  	for i := range outRow {
   364  		outRow[i] = sqlbase.DatumToEncDatum(s.outTypes[i], tree.DNull)
   365  	}
   366  
   367  	for i, si := range s.sketches {
   368  		outRow[s.sketchIdxCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(i))}
   369  		outRow[s.numRowsCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numRows))}
   370  		outRow[s.numNullsCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numNulls))}
   371  		data, err := si.sketch.MarshalBinary()
   372  		if err != nil {
   373  			return false, err
   374  		}
   375  		outRow[s.sketchCol] = sqlbase.EncDatum{Datum: tree.NewDBytes(tree.DBytes(data))}
   376  		if !emitHelper(ctx, &s.Out, outRow, nil /* meta */, s.pushTrailingMeta, s.input) {
   377  			return true, nil
   378  		}
   379  	}
   380  
   381  	// Send one last progress update to the consumer.
   382  	meta := &execinfrapb.ProducerMetadata{SamplerProgress: &execinfrapb.RemoteProducerMetadata_SamplerProgress{
   383  		RowsProcessed: uint64(rowCount % SamplerProgressInterval),
   384  	}}
   385  	if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) {
   386  		return true, nil
   387  	}
   388  
   389  	return false, nil
   390  }
   391  
   392  func (s *samplerProcessor) close() {
   393  	if s.InternalClose() {
   394  		s.memAcc.Close(s.Ctx)
   395  		s.MemMonitor.Stop(s.Ctx)
   396  	}
   397  }