github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/execinfrapb/processors_table_stats.proto (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  //
    11  // Processor definitions for distributed SQL APIs. See
    12  // docs/RFCS/distributed_sql.md.
    13  // All the concepts here are "physical plan" concepts.
    14  
    15  syntax = "proto2";
    16  // Beware! This package name must not be changed, even though it doesn't match
    17  // the Go package name, because it defines the Protobuf message names which
    18  // can't be changed without breaking backward compatibility.
    19  package cockroach.sql.distsqlrun;
    20  option go_package = "execinfrapb";
    21  
    22  import "sql/sqlbase/structured.proto";
    23  import "gogoproto/gogo.proto";
    24  
    25  enum SketchType {
    26    // This is the github.com/axiomhq/hyperloglog binary format (as of commit
    27    // 730eea1) for a sketch with precision 14. Values are encoded using their key
    28    // encoding, except integers which are encoded in 8 bytes (little-endian).
    29    HLL_PLUS_PLUS_V1 = 0;
    30  }
    31  
    32  // SketchSpec contains the specification for a generated statistic.
    33  message SketchSpec {
    34    optional SketchType sketch_type = 1 [(gogoproto.nullable) = false];
    35  
    36    // Each value is an index identifying a column in the input stream.
    37    // TODO(radu): currently only one column is supported.
    38    repeated uint32 columns = 2;
    39  
    40    // If set, we generate a histogram for the first column in the sketch.
    41    optional bool generate_histogram = 3 [(gogoproto.nullable) = false];
    42  
    43    // Controls the maximum number of buckets in the histogram.
    44    // Only used by the SampleAggregator.
    45    optional uint32 histogram_max_buckets = 4 [(gogoproto.nullable) = false];
    46  
    47    // Only used by the SampleAggregator.
    48    optional string stat_name = 5 [(gogoproto.nullable) = false];
    49  }
    50  
    51  // SamplerSpec is the specification of a "sampler" processor which
    52  // returns a sample (random subset) of the input columns and computes
    53  // cardinality estimation sketches on sets of columns.
    54  //
    55  // The sampler is configured with a sample size and sets of columns
    56  // for the sketches. It produces one row with global statistics, one
    57  // row with sketch information for each sketch plus at most
    58  // sample_size sampled rows.
    59  //
    60  // The following method is used to do reservoir sampling: we generate a
    61  // "rank" for each row, which is just a random, uniformly distributed
    62  // 64-bit value. The rows with the smallest <sample_size> ranks are selected.
    63  // This method is chosen because it allows to combine sample sets very easily.
    64  //
    65  // The internal schema of the processor is formed of two column
    66  // groups:
    67  //   1. sampled row columns:
    68  //       - columns that map 1-1 to the columns in the input (same
    69  //         schema as the input). Note that columns unused in a histogram are
    70  //         set to NULL.
    71  //       - an INT column with the "rank" of the row; this is a random value
    72  //         associated with the row (necessary for combining sample sets).
    73  //   2. sketch columns:
    74  //       - an INT column indicating the sketch index
    75  //         (0 to len(sketches) - 1).
    76  //       - an INT column indicating the number of rows processed
    77  //       - an INT column indicating the number of rows with NULL values
    78  //         on all columns of the sketch.
    79  //       - a BYTES column with the binary sketch data (format
    80  //         dependent on the sketch type).
    81  // Rows have NULLs on either all the sampled row columns or on all the
    82  // sketch columns.
    83  message SamplerSpec {
    84    repeated SketchSpec sketches = 1 [(gogoproto.nullable) = false];
    85    optional uint32 sample_size = 2 [(gogoproto.nullable) = false];
    86  
    87    // Setting this value enables throttling; this is the fraction of time that
    88    // the sampler processors will be idle when the recent CPU usage is high. The
    89    // throttling is adaptive so the actual idle fraction will depend on CPU
    90    // usage; this value is a ceiling.
    91    //
    92    // Currently, this field is set only for automatic statistics based on the
    93    // value of the cluster setting
    94    // sql.stats.automatic_collection.max_fraction_idle.
    95    optional double max_fraction_idle = 3 [(gogoproto.nullable) = false];
    96  }
    97  
    98  // SampleAggregatorSpec is the specification of a processor that aggregates the
    99  // results from multiple sampler processors and writes out the statistics to
   100  // system.table_statistics.
   101  //
   102  // The input schema it expects matches the output schema of a sampler spec (see
   103  // the comment for SamplerSpec for all the details):
   104  //  1. sampled row columns:
   105  //    - sampled columns
   106  //    - row rank
   107  //  2. sketch columns:
   108  //    - sketch index
   109  //    - number of rows processed
   110  //    - number of rows encountered with NULL values on all columns of the sketch
   111  //    - binary sketch data
   112  message SampleAggregatorSpec {
   113    repeated SketchSpec sketches = 1 [(gogoproto.nullable) = false];
   114  
   115    // The processor merges reservoir sample sets into a single
   116    // sample set of this size. This must match the sample size
   117    // used for each Sampler.
   118    optional uint32 sample_size = 2 [(gogoproto.nullable) = false];
   119  
   120    // The i-th value indicates the ColumnID of the i-th sampled row column.
   121    // These are necessary for writing out the statistic data.
   122    repeated uint32 sampled_column_ids = 3 [
   123      (gogoproto.customname) = "SampledColumnIDs",
   124      (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/sqlbase.ColumnID"
   125    ];
   126  
   127    optional uint32 table_id = 4 [
   128      (gogoproto.nullable) = false,
   129      (gogoproto.customname) = "TableID",
   130      (gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/sql/sqlbase.ID"
   131    ];
   132  
   133    reserved 5;
   134  
   135    // JobID is the id of the CREATE STATISTICS job.
   136    optional int64 job_id = 6 [
   137      (gogoproto.nullable) = false,
   138      (gogoproto.customname) = "JobID"
   139    ];
   140  
   141    // The total number of rows expected in the table based on previous runs of
   142    // CREATE STATISTICS. Used for progress reporting. If rows expected is 0,
   143    // reported progress is 0 until the very end.
   144    optional uint64 rows_expected = 7 [(gogoproto.nullable) = false];
   145  }