github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql_plan_stats.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/jobs" 18 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 19 "github.com/cockroachdb/cockroach/pkg/kv" 20 "github.com/cockroachdb/cockroach/pkg/settings" 21 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/cockroach/pkg/sql/span" 24 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 25 "github.com/cockroachdb/cockroach/pkg/sql/stats" 26 "github.com/cockroachdb/cockroach/pkg/sql/types" 27 "github.com/cockroachdb/cockroach/pkg/util" 28 "github.com/cockroachdb/cockroach/pkg/util/hlc" 29 "github.com/cockroachdb/errors" 30 "github.com/cockroachdb/logtags" 31 ) 32 33 type requestedStat struct { 34 columns []sqlbase.ColumnID 35 histogram bool 36 histogramMaxBuckets int 37 name string 38 } 39 40 const histogramSamples = 10000 41 const histogramBuckets = 200 42 43 // maxTimestampAge is the maximum allowed age of a scan timestamp during table 44 // stats collection, used when creating statistics AS OF SYSTEM TIME. The 45 // timestamp is advanced during long operations as needed. See TableReaderSpec. 46 // 47 // The lowest TTL we recommend is 10 minutes. This value must be be lower than 48 // that. 49 var maxTimestampAge = settings.RegisterDurationSetting( 50 "sql.stats.max_timestamp_age", 51 "maximum age of timestamp during table statistics collection", 52 5*time.Minute, 53 ) 54 55 func (dsp *DistSQLPlanner) createStatsPlan( 56 planCtx *PlanningCtx, 57 desc *sqlbase.ImmutableTableDescriptor, 58 reqStats []requestedStat, 59 job *jobs.Job, 60 ) (*PhysicalPlan, error) { 61 if len(reqStats) == 0 { 62 return nil, errors.New("no stats requested") 63 } 64 65 details := job.Details().(jobspb.CreateStatsDetails) 66 67 // Calculate the set of columns we need to scan. 68 var colCfg scanColumnsConfig 69 var tableColSet util.FastIntSet 70 for _, s := range reqStats { 71 for _, c := range s.columns { 72 if !tableColSet.Contains(int(c)) { 73 tableColSet.Add(int(c)) 74 colCfg.wantedColumns = append(colCfg.wantedColumns, tree.ColumnID(c)) 75 } 76 } 77 } 78 79 // Create the table readers; for this we initialize a dummy scanNode. 80 scan := scanNode{desc: desc} 81 err := scan.initDescDefaults(colCfg) 82 if err != nil { 83 return nil, err 84 } 85 sb := span.MakeBuilder(planCtx.planner.ExecCfg().Codec, desc.TableDesc(), scan.index) 86 scan.spans, err = sb.UnconstrainedSpans() 87 if err != nil { 88 return nil, err 89 } 90 scan.isFull = true 91 92 p, err := dsp.createTableReaders(planCtx, &scan) 93 if err != nil { 94 return nil, err 95 } 96 97 if details.AsOf != nil { 98 // If the read is historical, set the max timestamp age. 99 val := maxTimestampAge.Get(&dsp.st.SV) 100 for i := range p.Processors { 101 spec := p.Processors[i].Spec.Core.TableReader 102 spec.MaxTimestampAgeNanos = uint64(val) 103 } 104 } 105 106 sketchSpecs := make([]execinfrapb.SketchSpec, len(reqStats)) 107 sampledColumnIDs := make([]sqlbase.ColumnID, len(scan.cols)) 108 for i, s := range reqStats { 109 spec := execinfrapb.SketchSpec{ 110 SketchType: execinfrapb.SketchType_HLL_PLUS_PLUS_V1, 111 GenerateHistogram: s.histogram, 112 HistogramMaxBuckets: uint32(s.histogramMaxBuckets), 113 Columns: make([]uint32, len(s.columns)), 114 StatName: s.name, 115 } 116 for i, colID := range s.columns { 117 colIdx, ok := scan.colIdxMap[colID] 118 if !ok { 119 panic("necessary column not scanned") 120 } 121 streamColIdx := p.PlanToStreamColMap[colIdx] 122 spec.Columns[i] = uint32(streamColIdx) 123 sampledColumnIDs[streamColIdx] = colID 124 } 125 126 sketchSpecs[i] = spec 127 } 128 129 // Set up the samplers. 130 sampler := &execinfrapb.SamplerSpec{Sketches: sketchSpecs} 131 for _, s := range reqStats { 132 sampler.MaxFractionIdle = details.MaxFractionIdle 133 if s.histogram { 134 sampler.SampleSize = histogramSamples 135 } 136 } 137 138 // The sampler outputs the original columns plus a rank column and four sketch columns. 139 outTypes := make([]*types.T, 0, len(p.ResultTypes)+5) 140 outTypes = append(outTypes, p.ResultTypes...) 141 // An INT column for the rank of each row. 142 outTypes = append(outTypes, types.Int) 143 // An INT column indicating the sketch index. 144 outTypes = append(outTypes, types.Int) 145 // An INT column indicating the number of rows processed. 146 outTypes = append(outTypes, types.Int) 147 // An INT column indicating the number of rows that have a NULL in any sketch 148 // column. 149 outTypes = append(outTypes, types.Int) 150 // A BYTES column with the sketch data. 151 outTypes = append(outTypes, types.Bytes) 152 153 p.AddNoGroupingStage( 154 execinfrapb.ProcessorCoreUnion{Sampler: sampler}, 155 execinfrapb.PostProcessSpec{}, 156 outTypes, 157 execinfrapb.Ordering{}, 158 ) 159 160 // Estimate the expected number of rows based on existing stats in the cache. 161 tableStats, err := planCtx.planner.execCfg.TableStatsCache.GetTableStats(planCtx.ctx, desc.ID) 162 if err != nil { 163 return nil, err 164 } 165 166 var rowsExpected uint64 167 if len(tableStats) > 0 { 168 overhead := stats.AutomaticStatisticsFractionStaleRows.Get(&dsp.st.SV) 169 // Convert to a signed integer first to make the linter happy. 170 rowsExpected = uint64(int64( 171 // The total expected number of rows is the same number that was measured 172 // most recently, plus some overhead for possible insertions. 173 float64(tableStats[0].RowCount) * (1 + overhead), 174 )) 175 } 176 177 var jobID int64 178 if job.ID() != nil { 179 jobID = *job.ID() 180 } 181 182 // Set up the final SampleAggregator stage. 183 agg := &execinfrapb.SampleAggregatorSpec{ 184 Sketches: sketchSpecs, 185 SampleSize: sampler.SampleSize, 186 SampledColumnIDs: sampledColumnIDs, 187 TableID: desc.ID, 188 JobID: jobID, 189 RowsExpected: rowsExpected, 190 } 191 // Plan the SampleAggregator on the gateway, unless we have a single Sampler. 192 node := dsp.nodeDesc.NodeID 193 if len(p.ResultRouters) == 1 { 194 node = p.Processors[p.ResultRouters[0]].Node 195 } 196 p.AddSingleGroupStage( 197 node, 198 execinfrapb.ProcessorCoreUnion{SampleAggregator: agg}, 199 execinfrapb.PostProcessSpec{}, 200 []*types.T{}, 201 ) 202 203 return p, nil 204 } 205 206 func (dsp *DistSQLPlanner) createPlanForCreateStats( 207 planCtx *PlanningCtx, job *jobs.Job, 208 ) (*PhysicalPlan, error) { 209 details := job.Details().(jobspb.CreateStatsDetails) 210 reqStats := make([]requestedStat, len(details.ColumnStats)) 211 histogramCollectionEnabled := stats.HistogramClusterMode.Get(&dsp.st.SV) 212 for i := 0; i < len(reqStats); i++ { 213 histogram := details.ColumnStats[i].HasHistogram && histogramCollectionEnabled 214 reqStats[i] = requestedStat{ 215 columns: details.ColumnStats[i].ColumnIDs, 216 histogram: histogram, 217 histogramMaxBuckets: histogramBuckets, 218 name: details.Name, 219 } 220 } 221 222 tableDesc := sqlbase.NewImmutableTableDescriptor(details.Table) 223 return dsp.createStatsPlan(planCtx, tableDesc, reqStats, job) 224 } 225 226 func (dsp *DistSQLPlanner) planAndRunCreateStats( 227 ctx context.Context, 228 evalCtx *extendedEvalContext, 229 planCtx *PlanningCtx, 230 txn *kv.Txn, 231 job *jobs.Job, 232 resultRows *RowResultWriter, 233 ) error { 234 ctx = logtags.AddTag(ctx, "create-stats-distsql", nil) 235 236 physPlan, err := dsp.createPlanForCreateStats(planCtx, job) 237 if err != nil { 238 return err 239 } 240 241 dsp.FinalizePlan(planCtx, physPlan) 242 243 recv := MakeDistSQLReceiver( 244 ctx, 245 resultRows, 246 tree.DDL, 247 evalCtx.ExecCfg.RangeDescriptorCache, 248 evalCtx.ExecCfg.LeaseHolderCache, 249 txn, 250 func(ts hlc.Timestamp) { 251 evalCtx.ExecCfg.Clock.Update(ts) 252 }, 253 evalCtx.Tracing, 254 ) 255 defer recv.Release() 256 257 dsp.Run(planCtx, txn, physPlan, recv, evalCtx, nil /* finishedSetupFn */)() 258 return resultRows.Err() 259 }