github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/execinfrapb/processors_table_stats.proto (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 // 11 // Processor definitions for distributed SQL APIs. See 12 // docs/RFCS/distributed_sql.md. 13 // All the concepts here are "physical plan" concepts. 14 15 syntax = "proto2"; 16 // Beware! This package name must not be changed, even though it doesn't match 17 // the Go package name, because it defines the Protobuf message names which 18 // can't be changed without breaking backward compatibility. 19 package cockroach.sql.distsqlrun; 20 option go_package = "execinfrapb"; 21 22 import "sql/sqlbase/structured.proto"; 23 import "gogoproto/gogo.proto"; 24 25 enum SketchType { 26 // This is the github.com/axiomhq/hyperloglog binary format (as of commit 27 // 730eea1) for a sketch with precision 14. Values are encoded using their key 28 // encoding, except integers which are encoded in 8 bytes (little-endian). 29 HLL_PLUS_PLUS_V1 = 0; 30 } 31 32 // SketchSpec contains the specification for a generated statistic. 33 message SketchSpec { 34 optional SketchType sketch_type = 1 [(gogoproto.nullable) = false]; 35 36 // Each value is an index identifying a column in the input stream. 37 // TODO(radu): currently only one column is supported. 38 repeated uint32 columns = 2; 39 40 // If set, we generate a histogram for the first column in the sketch. 41 optional bool generate_histogram = 3 [(gogoproto.nullable) = false]; 42 43 // Controls the maximum number of buckets in the histogram. 44 // Only used by the SampleAggregator. 45 optional uint32 histogram_max_buckets = 4 [(gogoproto.nullable) = false]; 46 47 // Only used by the SampleAggregator. 48 optional string stat_name = 5 [(gogoproto.nullable) = false]; 49 } 50 51 // SamplerSpec is the specification of a "sampler" processor which 52 // returns a sample (random subset) of the input columns and computes 53 // cardinality estimation sketches on sets of columns. 54 // 55 // The sampler is configured with a sample size and sets of columns 56 // for the sketches. It produces one row with global statistics, one 57 // row with sketch information for each sketch plus at most 58 // sample_size sampled rows. 59 // 60 // The following method is used to do reservoir sampling: we generate a 61 // "rank" for each row, which is just a random, uniformly distributed 62 // 64-bit value. The rows with the smallest <sample_size> ranks are selected. 63 // This method is chosen because it allows to combine sample sets very easily. 64 // 65 // The internal schema of the processor is formed of two column 66 // groups: 67 // 1. sampled row columns: 68 // - columns that map 1-1 to the columns in the input (same 69 // schema as the input). Note that columns unused in a histogram are 70 // set to NULL. 71 // - an INT column with the "rank" of the row; this is a random value 72 // associated with the row (necessary for combining sample sets). 73 // 2. sketch columns: 74 // - an INT column indicating the sketch index 75 // (0 to len(sketches) - 1). 76 // - an INT column indicating the number of rows processed 77 // - an INT column indicating the number of rows with NULL values 78 // on all columns of the sketch. 79 // - a BYTES column with the binary sketch data (format 80 // dependent on the sketch type). 81 // Rows have NULLs on either all the sampled row columns or on all the 82 // sketch columns. 83 message SamplerSpec { 84 repeated SketchSpec sketches = 1 [(gogoproto.nullable) = false]; 85 optional uint32 sample_size = 2 [(gogoproto.nullable) = false]; 86 87 // Setting this value enables throttling; this is the fraction of time that 88 // the sampler processors will be idle when the recent CPU usage is high. The 89 // throttling is adaptive so the actual idle fraction will depend on CPU 90 // usage; this value is a ceiling. 91 // 92 // Currently, this field is set only for automatic statistics based on the 93 // value of the cluster setting 94 // sql.stats.automatic_collection.max_fraction_idle. 95 optional double max_fraction_idle = 3 [(gogoproto.nullable) = false]; 96 } 97 98 // SampleAggregatorSpec is the specification of a processor that aggregates the 99 // results from multiple sampler processors and writes out the statistics to 100 // system.table_statistics. 101 // 102 // The input schema it expects matches the output schema of a sampler spec (see 103 // the comment for SamplerSpec for all the details): 104 // 1. sampled row columns: 105 // - sampled columns 106 // - row rank 107 // 2. sketch columns: 108 // - sketch index 109 // - number of rows processed 110 // - number of rows encountered with NULL values on all columns of the sketch 111 // - binary sketch data 112 message SampleAggregatorSpec { 113 repeated SketchSpec sketches = 1 [(gogoproto.nullable) = false]; 114 115 // The processor merges reservoir sample sets into a single 116 // sample set of this size. This must match the sample size 117 // used for each Sampler. 118 optional uint32 sample_size = 2 [(gogoproto.nullable) = false]; 119 120 // The i-th value indicates the ColumnID of the i-th sampled row column. 121 // These are necessary for writing out the statistic data. 122 repeated uint32 sampled_column_ids = 3 [ 123 (gogoproto.customname) = "SampledColumnIDs", 124 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/sqlbase.ColumnID" 125 ]; 126 127 optional uint32 table_id = 4 [ 128 (gogoproto.nullable) = false, 129 (gogoproto.customname) = "TableID", 130 (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/sqlbase.ID" 131 ]; 132 133 reserved 5; 134 135 // JobID is the id of the CREATE STATISTICS job. 136 optional int64 job_id = 6 [ 137 (gogoproto.nullable) = false, 138 (gogoproto.customname) = "JobID" 139 ]; 140 141 // The total number of rows expected in the table based on previous runs of 142 // CREATE STATISTICS. Used for progress reporting. If rows expected is 0, 143 // reported progress is 0 until the very end. 144 optional uint64 rows_expected = 7 [(gogoproto.nullable) = false]; 145 }