github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sampler.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "encoding/binary" 16 "time" 17 18 "github.com/axiomhq/hyperloglog" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 21 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 22 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 23 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 24 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 25 "github.com/cockroachdb/cockroach/pkg/sql/stats" 26 "github.com/cockroachdb/cockroach/pkg/sql/types" 27 "github.com/cockroachdb/cockroach/pkg/util" 28 "github.com/cockroachdb/cockroach/pkg/util/log" 29 "github.com/cockroachdb/cockroach/pkg/util/mon" 30 "github.com/cockroachdb/cockroach/pkg/util/randutil" 31 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 32 "github.com/cockroachdb/errors" 33 ) 34 35 // sketchInfo contains the specification and run-time state for each sketch. 36 type sketchInfo struct { 37 spec execinfrapb.SketchSpec 38 sketch *hyperloglog.Sketch 39 numNulls int64 40 numRows int64 41 } 42 43 // A sampler processor returns a random sample of rows, as well as "global" 44 // statistics (including cardinality estimation sketch data). See SamplerSpec 45 // for more details. 46 type samplerProcessor struct { 47 execinfra.ProcessorBase 48 49 flowCtx *execinfra.FlowCtx 50 input execinfra.RowSource 51 memAcc mon.BoundAccount 52 sr stats.SampleReservoir 53 sketches []sketchInfo 54 outTypes []*types.T 55 maxFractionIdle float64 56 57 // Output column indices for special columns. 58 rankCol int 59 sketchIdxCol int 60 numRowsCol int 61 numNullsCol int 62 sketchCol int 63 } 64 65 var _ execinfra.Processor = &samplerProcessor{} 66 67 const samplerProcName = "sampler" 68 69 // SamplerProgressInterval corresponds to the number of input rows after which 70 // the sampler will report progress by pushing a metadata record. It is mutable 71 // for testing. 72 var SamplerProgressInterval = 10000 73 74 var supportedSketchTypes = map[execinfrapb.SketchType]struct{}{ 75 // The code currently hardcodes the use of this single type of sketch 76 // (which avoids the extra complexity until we actually have multiple types). 77 execinfrapb.SketchType_HLL_PLUS_PLUS_V1: {}, 78 } 79 80 // maxIdleSleepTime is the maximum amount of time we sleep for throttling 81 // (we sleep once every SamplerProgressInterval rows). 82 const maxIdleSleepTime = 10 * time.Second 83 84 // At 25% average CPU usage we start throttling automatic stats. 85 const cpuUsageMinThrottle = 0.25 86 87 // At 75% average CPU usage we reach maximum throttling of automatic stats. 88 const cpuUsageMaxThrottle = 0.75 89 90 func newSamplerProcessor( 91 flowCtx *execinfra.FlowCtx, 92 processorID int32, 93 spec *execinfrapb.SamplerSpec, 94 input execinfra.RowSource, 95 post *execinfrapb.PostProcessSpec, 96 output execinfra.RowReceiver, 97 ) (*samplerProcessor, error) { 98 for _, s := range spec.Sketches { 99 if _, ok := supportedSketchTypes[s.SketchType]; !ok { 100 return nil, errors.Errorf("unsupported sketch type %s", s.SketchType) 101 } 102 } 103 104 ctx := flowCtx.EvalCtx.Ctx() 105 // Limit the memory use by creating a child monitor with a hard limit. 106 // The processor will disable histogram collection if this limit is not 107 // enough. 108 memMonitor := execinfra.NewLimitedMonitor(ctx, flowCtx.EvalCtx.Mon, flowCtx.Cfg, "sampler-mem") 109 s := &samplerProcessor{ 110 flowCtx: flowCtx, 111 input: input, 112 memAcc: memMonitor.MakeBoundAccount(), 113 sketches: make([]sketchInfo, len(spec.Sketches)), 114 maxFractionIdle: spec.MaxFractionIdle, 115 } 116 117 var sampleCols util.FastIntSet 118 for i := range spec.Sketches { 119 s.sketches[i] = sketchInfo{ 120 spec: spec.Sketches[i], 121 sketch: hyperloglog.New14(), 122 numNulls: 0, 123 numRows: 0, 124 } 125 if spec.Sketches[i].GenerateHistogram { 126 sampleCols.Add(int(spec.Sketches[i].Columns[0])) 127 } 128 } 129 130 s.sr.Init(int(spec.SampleSize), input.OutputTypes(), &s.memAcc, sampleCols) 131 132 inTypes := input.OutputTypes() 133 outTypes := make([]*types.T, 0, len(inTypes)+5) 134 135 // First columns are the same as the input. 136 outTypes = append(outTypes, inTypes...) 137 138 // An INT column for the rank of each row. 139 s.rankCol = len(outTypes) 140 outTypes = append(outTypes, types.Int) 141 142 // An INT column indicating the sketch index. 143 s.sketchIdxCol = len(outTypes) 144 outTypes = append(outTypes, types.Int) 145 146 // An INT column indicating the number of rows processed. 147 s.numRowsCol = len(outTypes) 148 outTypes = append(outTypes, types.Int) 149 150 // An INT column indicating the number of rows that have a NULL in all sketch 151 // columns. 152 s.numNullsCol = len(outTypes) 153 outTypes = append(outTypes, types.Int) 154 155 // A BYTES column with the sketch data. 156 s.sketchCol = len(outTypes) 157 outTypes = append(outTypes, types.Bytes) 158 s.outTypes = outTypes 159 160 if err := s.Init( 161 nil, post, outTypes, flowCtx, processorID, output, memMonitor, 162 execinfra.ProcStateOpts{ 163 TrailingMetaCallback: func(context.Context) []execinfrapb.ProducerMetadata { 164 s.close() 165 return nil 166 }, 167 }, 168 ); err != nil { 169 return nil, err 170 } 171 return s, nil 172 } 173 174 func (s *samplerProcessor) pushTrailingMeta(ctx context.Context) { 175 execinfra.SendTraceData(ctx, s.Out.Output()) 176 } 177 178 // Run is part of the Processor interface. 179 func (s *samplerProcessor) Run(ctx context.Context) { 180 s.input.Start(ctx) 181 s.StartInternal(ctx, samplerProcName) 182 183 earlyExit, err := s.mainLoop(s.Ctx) 184 if err != nil { 185 execinfra.DrainAndClose(s.Ctx, s.Out.Output(), err, s.pushTrailingMeta, s.input) 186 } else if !earlyExit { 187 s.pushTrailingMeta(s.Ctx) 188 s.input.ConsumerClosed() 189 s.Out.Close() 190 } 191 s.MoveToDraining(nil /* err */) 192 } 193 194 // TestingSamplerSleep introduces a sleep inside the sampler, every 195 // <samplerProgressInterval>. Used to simulate a heavily throttled 196 // run for testing. 197 var TestingSamplerSleep time.Duration 198 199 func (s *samplerProcessor) mainLoop(ctx context.Context) (earlyExit bool, err error) { 200 rng, _ := randutil.NewPseudoRand() 201 var da sqlbase.DatumAlloc 202 var buf []byte 203 rowCount := 0 204 lastWakeupTime := timeutil.Now() 205 for { 206 row, meta := s.input.Next() 207 if meta != nil { 208 if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) { 209 // No cleanup required; emitHelper() took care of it. 210 return true, nil 211 } 212 continue 213 } 214 if row == nil { 215 break 216 } 217 218 rowCount++ 219 if rowCount%SamplerProgressInterval == 0 { 220 // Send a metadata record to check that the consumer is still alive and 221 // report number of rows processed since the last update. 222 meta := &execinfrapb.ProducerMetadata{SamplerProgress: &execinfrapb.RemoteProducerMetadata_SamplerProgress{ 223 RowsProcessed: uint64(SamplerProgressInterval), 224 }} 225 if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) { 226 return true, nil 227 } 228 229 if s.maxFractionIdle > 0 { 230 // Look at CRDB's average CPU usage in the last 10 seconds: 231 // - if it is lower than cpuUsageMinThrottle, we do not throttle; 232 // - if it is higher than cpuUsageMaxThrottle, we throttle all the way; 233 // - in-between, we scale the idle time proportionally. 234 usage := s.flowCtx.Cfg.RuntimeStats.GetCPUCombinedPercentNorm() 235 236 if usage > cpuUsageMinThrottle { 237 fractionIdle := s.maxFractionIdle 238 if usage < cpuUsageMaxThrottle { 239 fractionIdle *= (usage - cpuUsageMinThrottle) / 240 (cpuUsageMaxThrottle - cpuUsageMinThrottle) 241 } 242 if log.V(1) { 243 log.Infof( 244 ctx, "throttling to fraction idle %.2f (based on usage %.2f)", fractionIdle, usage, 245 ) 246 } 247 248 elapsed := timeutil.Now().Sub(lastWakeupTime) 249 // Throttle the processor according to fractionIdle. 250 // Wait time is calculated as follows: 251 // 252 // fraction_idle = t_wait / (t_run + t_wait) 253 // ==> t_wait = t_run * fraction_idle / (1 - fraction_idle) 254 // 255 wait := time.Duration(float64(elapsed) * fractionIdle / (1 - fractionIdle)) 256 if wait > maxIdleSleepTime { 257 wait = maxIdleSleepTime 258 } 259 timer := time.NewTimer(wait) 260 defer timer.Stop() 261 select { 262 case <-timer.C: 263 break 264 case <-s.flowCtx.Stopper().ShouldStop(): 265 break 266 } 267 } 268 lastWakeupTime = timeutil.Now() 269 } 270 271 if TestingSamplerSleep != 0 { 272 time.Sleep(TestingSamplerSleep) 273 } 274 } 275 276 var intbuf [8]byte 277 for i := range s.sketches { 278 s.sketches[i].numRows++ 279 280 var col uint32 281 var useFastPath bool 282 if len(s.sketches[i].spec.Columns) == 1 { 283 col = s.sketches[i].spec.Columns[0] 284 isNull := row[col].IsNull() 285 useFastPath = s.outTypes[col].Family() == types.IntFamily && !isNull 286 } 287 288 if useFastPath { 289 // Fast path for integers. 290 // TODO(radu): make this more general. 291 val, err := row[col].GetInt() 292 if err != nil { 293 return false, err 294 } 295 296 // Note: this encoding is not identical with the one in the general path 297 // below, but it achieves the same thing (we want equal integers to 298 // encode to equal []bytes). The only caveat is that all samplers must 299 // use the same encodings, so changes will require a new SketchType to 300 // avoid problems during upgrade. 301 // 302 // We could use a more efficient hash function and use InsertHash, but 303 // it must be a very good hash function (HLL expects the hash values to 304 // be uniformly distributed in the 2^64 range). Experiments (on tpcc 305 // order_line) with simplistic functions yielded bad results. 306 binary.LittleEndian.PutUint64(intbuf[:], uint64(val)) 307 s.sketches[i].sketch.Insert(intbuf[:]) 308 } else { 309 isNull := true 310 buf = buf[:0] 311 for _, col := range s.sketches[i].spec.Columns { 312 buf, err = row[col].Fingerprint(s.outTypes[col], &da, buf) 313 isNull = isNull && row[col].IsNull() 314 if err != nil { 315 return false, err 316 } 317 } 318 if isNull { 319 s.sketches[i].numNulls++ 320 } 321 s.sketches[i].sketch.Insert(buf) 322 } 323 } 324 325 // Use Int63 so we don't have headaches converting to DInt. 326 rank := uint64(rng.Int63()) 327 if err := s.sr.SampleRow(ctx, s.EvalCtx, row, rank); err != nil { 328 if code := pgerror.GetPGCode(err); code != pgcode.OutOfMemory { 329 return false, err 330 } 331 // We hit an out of memory error. Clear the sample reservoir and 332 // disable histogram sample collection. 333 s.sr.Disable() 334 log.Info(ctx, "disabling histogram collection due to excessive memory utilization") 335 336 // Send a metadata record so the sample aggregator will also disable 337 // histogram collection. 338 meta := &execinfrapb.ProducerMetadata{SamplerProgress: &execinfrapb.RemoteProducerMetadata_SamplerProgress{ 339 HistogramDisabled: true, 340 }} 341 if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) { 342 return true, nil 343 } 344 } 345 } 346 347 outRow := make(sqlbase.EncDatumRow, len(s.outTypes)) 348 for i := range outRow { 349 outRow[i] = sqlbase.DatumToEncDatum(s.outTypes[i], tree.DNull) 350 } 351 // Emit the sampled rows. 352 for _, sample := range s.sr.Get() { 353 copy(outRow, sample.Row) 354 outRow[s.rankCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(sample.Rank))} 355 if !emitHelper(ctx, &s.Out, outRow, nil /* meta */, s.pushTrailingMeta, s.input) { 356 return true, nil 357 } 358 } 359 // Release the memory for the sampled rows. 360 s.sr = stats.SampleReservoir{} 361 362 // Emit the sketch rows. 363 for i := range outRow { 364 outRow[i] = sqlbase.DatumToEncDatum(s.outTypes[i], tree.DNull) 365 } 366 367 for i, si := range s.sketches { 368 outRow[s.sketchIdxCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(i))} 369 outRow[s.numRowsCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numRows))} 370 outRow[s.numNullsCol] = sqlbase.EncDatum{Datum: tree.NewDInt(tree.DInt(si.numNulls))} 371 data, err := si.sketch.MarshalBinary() 372 if err != nil { 373 return false, err 374 } 375 outRow[s.sketchCol] = sqlbase.EncDatum{Datum: tree.NewDBytes(tree.DBytes(data))} 376 if !emitHelper(ctx, &s.Out, outRow, nil /* meta */, s.pushTrailingMeta, s.input) { 377 return true, nil 378 } 379 } 380 381 // Send one last progress update to the consumer. 382 meta := &execinfrapb.ProducerMetadata{SamplerProgress: &execinfrapb.RemoteProducerMetadata_SamplerProgress{ 383 RowsProcessed: uint64(rowCount % SamplerProgressInterval), 384 }} 385 if !emitHelper(ctx, &s.Out, nil /* row */, meta, s.pushTrailingMeta, s.input) { 386 return true, nil 387 } 388 389 return false, nil 390 } 391 392 func (s *samplerProcessor) close() { 393 if s.InternalClose() { 394 s.memAcc.Close(s.Ctx) 395 s.MemMonitor.Stop(s.Ctx) 396 } 397 }