github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/histogram.proto (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  //
    11  // Data structures and basic infrastructure for distributed SQL APIs. See
    12  // docs/RFCS/distributed_sql.md.
    13  // All the concepts here are "physical plan" concepts.
    14  
    15  syntax = "proto3";
    16  package cockroach.sql.stats;
    17  option go_package = "stats";
    18  
    19  import "gogoproto/gogo.proto";
    20  import "sql/types/types.proto";
    21  
    22  // HistogramData encodes the data for a histogram, which captures the
    23  // distribution of values on a specific column.
    24  message HistogramData {
    25    message Bucket {
    26      // The estimated number of values that are equal to upper_bound.
    27      int64 num_eq = 1;
    28  
    29      // The estimated number of values in the bucket (excluding those
    30      // that are equal to upper_bound). Splitting the count into two
    31      // makes the histogram effectively equivalent to a histogram with
    32      // twice as many buckets, with every other bucket containing a
    33      // single value. This might be particularly advantageous if the
    34      // histogram algorithm makes sure the top "heavy hitters" (most
    35      // frequent elements) are bucket boundaries (similar to a
    36      // compressed histogram).
    37      int64 num_range = 2;
    38  
    39      // The estimated number of distinct values in the bucket (excluding
    40      // those that are equal to upper_bound). This is a floating point
    41      // value because it is estimated by distributing the known distinct
    42      // count for the column among the buckets, in proportion to the number
    43      // of rows in each bucket. This value is in fact derived from the rest
    44      // of the data, but is included to avoid re-computing it later.
    45      double distinct_range = 4;
    46  
    47      // The upper boundary of the bucket. The column values for the upper bound
    48      // are encoded using the ascending key encoding of the column type.
    49      bytes upper_bound = 3;
    50    }
    51  
    52    // Value type for the column.
    53    sql.sem.types.T column_type = 2;
    54  
    55    // Histogram buckets. Note that NULL values are excluded from the
    56    // histogram.
    57    repeated Bucket buckets = 1 [(gogoproto.nullable) = false];
    58  }