github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sample_aggregator.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/axiomhq/hyperloglog" 18 "github.com/cockroachdb/cockroach/pkg/jobs" 19 "github.com/cockroachdb/cockroach/pkg/kv" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 21 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 22 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 23 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 24 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 25 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 26 "github.com/cockroachdb/cockroach/pkg/sql/stats" 27 "github.com/cockroachdb/cockroach/pkg/sql/types" 28 "github.com/cockroachdb/cockroach/pkg/util" 29 "github.com/cockroachdb/cockroach/pkg/util/log" 30 "github.com/cockroachdb/cockroach/pkg/util/mon" 31 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 32 "github.com/cockroachdb/cockroach/pkg/util/tracing" 33 "github.com/cockroachdb/errors" 34 "github.com/opentracing/opentracing-go" 35 ) 36 37 // A sample aggregator processor aggregates results from multiple sampler 38 // processors. See SampleAggregatorSpec for more details. 39 type sampleAggregator struct { 40 execinfra.ProcessorBase 41 42 spec *execinfrapb.SampleAggregatorSpec 43 input execinfra.RowSource 44 inTypes []*types.T 45 sr stats.SampleReservoir 46 47 // memAcc accounts for memory accumulated throughout the life of the 48 // sampleAggregator. 49 memAcc mon.BoundAccount 50 51 // tempMemAcc is used to account for memory that is allocated temporarily 52 // and released before the sampleAggregator is finished. 53 tempMemAcc mon.BoundAccount 54 55 tableID sqlbase.ID 56 sampledCols []sqlbase.ColumnID 57 sketches []sketchInfo 58 59 // Input column indices for special columns. 60 rankCol int 61 sketchIdxCol int 62 numRowsCol int 63 numNullsCol int 64 sketchCol int 65 } 66 67 var _ execinfra.Processor = &sampleAggregator{} 68 69 const sampleAggregatorProcName = "sample aggregator" 70 71 // SampleAggregatorProgressInterval is the frequency at which the 72 // SampleAggregator processor will report progress. It is mutable for testing. 73 var SampleAggregatorProgressInterval = 5 * time.Second 74 75 func newSampleAggregator( 76 flowCtx *execinfra.FlowCtx, 77 processorID int32, 78 spec *execinfrapb.SampleAggregatorSpec, 79 input execinfra.RowSource, 80 post *execinfrapb.PostProcessSpec, 81 output execinfra.RowReceiver, 82 ) (*sampleAggregator, error) { 83 for _, s := range spec.Sketches { 84 if len(s.Columns) == 0 { 85 return nil, errors.Errorf("no columns") 86 } 87 if _, ok := supportedSketchTypes[s.SketchType]; !ok { 88 return nil, errors.Errorf("unsupported sketch type %s", s.SketchType) 89 } 90 if s.GenerateHistogram && s.HistogramMaxBuckets == 0 { 91 return nil, errors.Errorf("histogram max buckets not specified") 92 } 93 if s.GenerateHistogram && len(s.Columns) != 1 { 94 return nil, errors.Errorf("histograms require one column") 95 } 96 } 97 98 ctx := flowCtx.EvalCtx.Ctx() 99 // Limit the memory use by creating a child monitor with a hard limit. 100 // The processor will disable histogram collection if this limit is not 101 // enough. 102 memMonitor := execinfra.NewLimitedMonitor(ctx, flowCtx.EvalCtx.Mon, flowCtx.Cfg, "sample-aggregator-mem") 103 rankCol := len(input.OutputTypes()) - 5 104 s := &sampleAggregator{ 105 spec: spec, 106 input: input, 107 inTypes: input.OutputTypes(), 108 memAcc: memMonitor.MakeBoundAccount(), 109 tempMemAcc: memMonitor.MakeBoundAccount(), 110 tableID: spec.TableID, 111 sampledCols: spec.SampledColumnIDs, 112 sketches: make([]sketchInfo, len(spec.Sketches)), 113 rankCol: rankCol, 114 sketchIdxCol: rankCol + 1, 115 numRowsCol: rankCol + 2, 116 numNullsCol: rankCol + 3, 117 sketchCol: rankCol + 4, 118 } 119 120 var sampleCols util.FastIntSet 121 for i := range spec.Sketches { 122 s.sketches[i] = sketchInfo{ 123 spec: spec.Sketches[i], 124 sketch: hyperloglog.New14(), 125 numNulls: 0, 126 numRows: 0, 127 } 128 if spec.Sketches[i].GenerateHistogram { 129 sampleCols.Add(int(spec.Sketches[i].Columns[0])) 130 } 131 } 132 133 s.sr.Init(int(spec.SampleSize), input.OutputTypes()[:rankCol], &s.memAcc, sampleCols) 134 135 if err := s.Init( 136 nil, post, input.OutputTypes(), flowCtx, processorID, output, memMonitor, 137 execinfra.ProcStateOpts{ 138 TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata { 139 s.close() 140 return nil 141 }, 142 }, 143 ); err != nil { 144 return nil, err 145 } 146 return s, nil 147 } 148 149 func (s *sampleAggregator) pushTrailingMeta(ctx context.Context) { 150 execinfra.SendTraceData(ctx, s.Out.Output()) 151 } 152 153 // Run is part of the Processor interface. 154 func (s *sampleAggregator) Run(ctx context.Context) { 155 s.input.Start(ctx) 156 s.StartInternal(ctx, sampleAggregatorProcName) 157 158 earlyExit, err := s.mainLoop(s.Ctx) 159 if err != nil { 160 execinfra.DrainAndClose(s.Ctx, s.Out.Output(), err, s.pushTrailingMeta, s.input) 161 } else if !earlyExit { 162 s.pushTrailingMeta(s.Ctx) 163 s.input.ConsumerClosed() 164 s.Out.Close() 165 } 166 s.MoveToDraining(nil /* err */) 167 } 168 169 func (s *sampleAggregator) close() { 170 if s.InternalClose() { 171 s.memAcc.Close(s.Ctx) 172 s.tempMemAcc.Close(s.Ctx) 173 s.MemMonitor.Stop(s.Ctx) 174 } 175 } 176 177 func (s *sampleAggregator) mainLoop(ctx context.Context) (earlyExit bool, err error) { 178 var job *jobs.Job 179 jobID := s.spec.JobID 180 // Some tests run this code without a job, so check if the jobID is 0. 181 if jobID != 0 { 182 job, err = s.FlowCtx.Cfg.JobRegistry.LoadJob(ctx, s.spec.JobID) 183 if err != nil { 184 return false, err 185 } 186 } 187 188 lastReportedFractionCompleted := float32(-1) 189 // Report progress (0 to 1). 190 progFn := func(fractionCompleted float32) error { 191 if jobID == 0 { 192 return nil 193 } 194 // If it changed by less than 1%, just check for cancellation (which is more 195 // efficient). 196 if fractionCompleted < 1.0 && fractionCompleted < lastReportedFractionCompleted+0.01 { 197 return job.CheckStatus(ctx) 198 } 199 lastReportedFractionCompleted = fractionCompleted 200 return job.FractionProgressed(ctx, jobs.FractionUpdater(fractionCompleted)) 201 } 202 203 var rowsProcessed uint64 204 progressUpdates := util.Every(SampleAggregatorProgressInterval) 205 var da sqlbase.DatumAlloc 206 var tmpSketch hyperloglog.Sketch 207 for { 208 row, meta := s.input.Next() 209 if meta != nil { 210 if meta.SamplerProgress != nil { 211 rowsProcessed += meta.SamplerProgress.RowsProcessed 212 if progressUpdates.ShouldProcess(timeutil.Now()) { 213 // Periodically report fraction progressed and check that the job has 214 // not been paused or canceled. 215 var fractionCompleted float32 216 if s.spec.RowsExpected > 0 { 217 fractionCompleted = float32(float64(rowsProcessed) / float64(s.spec.RowsExpected)) 218 const maxProgress = 0.99 219 if fractionCompleted > maxProgress { 220 // Since the total number of rows expected is just an estimate, 221 // don't report more than 99% completion until the very end. 222 fractionCompleted = maxProgress 223 } 224 } 225 226 if err := progFn(fractionCompleted); err != nil { 227 return false, err 228 } 229 } 230 if meta.SamplerProgress.HistogramDisabled { 231 // One of the sampler processors probably ran out of memory while 232 // collecting histogram samples. Disable sample collection so we 233 // don't create a biased histogram. 234 s.sr.Disable() 235 } 236 } else if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) { 237 // No cleanup required; emitHelper() took care of it. 238 return true, nil 239 } 240 continue 241 } 242 if row == nil { 243 break 244 } 245 246 // The row is either: 247 // - a sampled row, which has NULLs on all columns from sketchIdxCol 248 // onward, or 249 // - a sketch row, which has all NULLs on all columns before sketchIdxCol. 250 if row[s.sketchIdxCol].IsNull() { 251 // This must be a sampled row. 252 rank, err := row[s.rankCol].GetInt() 253 if err != nil { 254 return false, errors.NewAssertionErrorWithWrappedErrf(err, "decoding rank column") 255 } 256 // Retain the rows with the top ranks. 257 if err := s.sr.SampleRow(ctx, s.EvalCtx, row[:s.rankCol], uint64(rank)); err != nil { 258 if code := pgerror.GetPGCode(err); code != pgcode.OutOfMemory { 259 return false, err 260 } 261 // We hit an out of memory error. Clear the sample reservoir and 262 // disable histogram sample collection. 263 s.sr.Disable() 264 log.Info(ctx, "disabling histogram collection due to excessive memory utilization") 265 } 266 continue 267 } 268 // This is a sketch row. 269 sketchIdx, err := row[s.sketchIdxCol].GetInt() 270 if err != nil { 271 return false, err 272 } 273 if sketchIdx < 0 || sketchIdx > int64(len(s.sketches)) { 274 return false, errors.Errorf("invalid sketch index %d", sketchIdx) 275 } 276 277 numRows, err := row[s.numRowsCol].GetInt() 278 if err != nil { 279 return false, err 280 } 281 s.sketches[sketchIdx].numRows += numRows 282 283 numNulls, err := row[s.numNullsCol].GetInt() 284 if err != nil { 285 return false, err 286 } 287 s.sketches[sketchIdx].numNulls += numNulls 288 289 // Decode the sketch. 290 if err := row[s.sketchCol].EnsureDecoded(s.inTypes[s.sketchCol], &da); err != nil { 291 return false, err 292 } 293 d := row[s.sketchCol].Datum 294 if d == tree.DNull { 295 return false, errors.AssertionFailedf("NULL sketch data") 296 } 297 if err := tmpSketch.UnmarshalBinary([]byte(*d.(*tree.DBytes))); err != nil { 298 return false, err 299 } 300 if err := s.sketches[sketchIdx].sketch.Merge(&tmpSketch); err != nil { 301 return false, errors.NewAssertionErrorWithWrappedErrf(err, "merging sketch data") 302 } 303 } 304 // Report progress one last time so we don't write results if the job was 305 // canceled. 306 if err = progFn(1.0); err != nil { 307 return false, err 308 } 309 return false, s.writeResults(ctx) 310 } 311 312 // writeResults inserts the new statistics into system.table_statistics. 313 func (s *sampleAggregator) writeResults(ctx context.Context) error { 314 // Turn off tracing so these writes don't affect the results of EXPLAIN 315 // ANALYZE. 316 if span := opentracing.SpanFromContext(ctx); span != nil && tracing.IsRecording(span) { 317 // TODO(rytaft): this also hides writes in this function from SQL session 318 // traces. 319 ctx = opentracing.ContextWithSpan(ctx, nil) 320 } 321 322 // TODO(andrei): This method would benefit from a session interface on the 323 // internal executor instead of doing this weird thing where it uses the 324 // internal executor to execute one statement at a time inside a db.Txn() 325 // closure. 326 if err := s.FlowCtx.Cfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 327 for _, si := range s.sketches { 328 distinctCount := int64(si.sketch.Estimate()) 329 var histogram *stats.HistogramData 330 if si.spec.GenerateHistogram && len(s.sr.Get()) != 0 { 331 colIdx := int(si.spec.Columns[0]) 332 typ := s.inTypes[colIdx] 333 334 h, err := s.generateHistogram( 335 ctx, 336 s.EvalCtx, 337 s.sr.Get(), 338 colIdx, 339 typ, 340 si.numRows-si.numNulls, 341 distinctCount, 342 int(si.spec.HistogramMaxBuckets), 343 ) 344 if err != nil { 345 return err 346 } 347 histogram = &h 348 } 349 350 columnIDs := make([]sqlbase.ColumnID, len(si.spec.Columns)) 351 for i, c := range si.spec.Columns { 352 columnIDs[i] = s.sampledCols[c] 353 } 354 355 // Delete old stats that have been superseded. 356 if err := stats.DeleteOldStatsForColumns( 357 ctx, 358 s.FlowCtx.Cfg.Executor, 359 txn, 360 s.tableID, 361 columnIDs, 362 ); err != nil { 363 return err 364 } 365 366 // Insert the new stat. 367 if err := stats.InsertNewStat( 368 ctx, 369 s.FlowCtx.Cfg.Executor, 370 txn, 371 s.tableID, 372 si.spec.StatName, 373 columnIDs, 374 si.numRows, 375 distinctCount, 376 si.numNulls, 377 histogram, 378 ); err != nil { 379 return err 380 } 381 382 // Release any memory temporarily used for this statistic. 383 s.tempMemAcc.Clear(ctx) 384 } 385 386 return nil 387 }); err != nil { 388 return err 389 } 390 391 if g, ok := s.FlowCtx.Cfg.Gossip.Optional(47925); ok { 392 // Gossip invalidation of the stat caches for this table. 393 return stats.GossipTableStatAdded(g, s.tableID) 394 } 395 return nil 396 } 397 398 // generateHistogram returns a histogram (on a given column) from a set of 399 // samples. 400 // numRows is the total number of rows from which values were sampled 401 // (excluding rows that have NULL values on the histogram column). 402 func (s *sampleAggregator) generateHistogram( 403 ctx context.Context, 404 evalCtx *tree.EvalContext, 405 samples []stats.SampledRow, 406 colIdx int, 407 colType *types.T, 408 numRows int64, 409 distinctCount int64, 410 maxBuckets int, 411 ) (stats.HistogramData, error) { 412 // Account for the memory we'll use copying the samples into values. 413 if err := s.tempMemAcc.Grow(ctx, sizeOfDatum*int64(len(samples))); err != nil { 414 return stats.HistogramData{}, err 415 } 416 values := make(tree.Datums, 0, len(samples)) 417 418 var da sqlbase.DatumAlloc 419 for _, sample := range samples { 420 ed := &sample.Row[colIdx] 421 // Ignore NULLs (they are counted separately). 422 if !ed.IsNull() { 423 beforeSize := ed.Datum.Size() 424 if err := ed.EnsureDecoded(colType, &da); err != nil { 425 return stats.HistogramData{}, err 426 } 427 afterSize := ed.Datum.Size() 428 429 // Perform memory accounting. This memory is not added to the temporary 430 // account since it won't be released until the sampleAggregator is 431 // destroyed. 432 if afterSize > beforeSize { 433 if err := s.memAcc.Grow(ctx, int64(afterSize-beforeSize)); err != nil { 434 return stats.HistogramData{}, err 435 } 436 } 437 438 values = append(values, ed.Datum) 439 } 440 } 441 return stats.EquiDepthHistogram(evalCtx, values, numRows, distinctCount, maxBuckets) 442 }