github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/histogram.proto (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 // 11 // Data structures and basic infrastructure for distributed SQL APIs. See 12 // docs/RFCS/distributed_sql.md. 13 // All the concepts here are "physical plan" concepts. 14 15 syntax = "proto3"; 16 package cockroach.sql.stats; 17 option go_package = "stats"; 18 19 import "gogoproto/gogo.proto"; 20 import "sql/types/types.proto"; 21 22 // HistogramData encodes the data for a histogram, which captures the 23 // distribution of values on a specific column. 24 message HistogramData { 25 message Bucket { 26 // The estimated number of values that are equal to upper_bound. 27 int64 num_eq = 1; 28 29 // The estimated number of values in the bucket (excluding those 30 // that are equal to upper_bound). Splitting the count into two 31 // makes the histogram effectively equivalent to a histogram with 32 // twice as many buckets, with every other bucket containing a 33 // single value. This might be particularly advantageous if the 34 // histogram algorithm makes sure the top "heavy hitters" (most 35 // frequent elements) are bucket boundaries (similar to a 36 // compressed histogram). 37 int64 num_range = 2; 38 39 // The estimated number of distinct values in the bucket (excluding 40 // those that are equal to upper_bound). This is a floating point 41 // value because it is estimated by distributing the known distinct 42 // count for the column among the buckets, in proportion to the number 43 // of rows in each bucket. This value is in fact derived from the rest 44 // of the data, but is included to avoid re-computing it later. 45 double distinct_range = 4; 46 47 // The upper boundary of the bucket. The column values for the upper bound 48 // are encoded using the ascending key encoding of the column type. 49 bytes upper_bound = 3; 50 } 51 52 // Value type for the column. 53 sql.sem.types.T column_type = 2; 54 55 // Histogram buckets. Note that NULL values are excluded from the 56 // histogram. 57 repeated Bucket buckets = 1 [(gogoproto.nullable) = false]; 58 }