github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/stats/histogram_test.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package stats
    12  
    13  import (
    14  	"fmt"
    15  	"math"
    16  	"math/rand"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    21  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    22  )
    23  
    24  func TestEquiDepthHistogram(t *testing.T) {
    25  	type expBucket struct {
    26  		upper        int
    27  		numEq        int64
    28  		numLess      int64
    29  		distinctLess float64
    30  	}
    31  	testCases := []struct {
    32  		samples       []int64
    33  		numRows       int64
    34  		distinctCount int64
    35  		maxBuckets    int
    36  		buckets       []expBucket
    37  	}{
    38  		{
    39  			samples:       []int64{1, 2, 4, 5, 5, 9},
    40  			numRows:       6,
    41  			distinctCount: 5,
    42  			maxBuckets:    3,
    43  			buckets: []expBucket{
    44  				{
    45  					// Bucket contains 1.
    46  					upper: 1, numEq: 1, numLess: 0, distinctLess: 0,
    47  				},
    48  				{
    49  					// Bucket contains 2, 4.
    50  					upper: 4, numEq: 1, numLess: 1, distinctLess: 0.73,
    51  				},
    52  				{
    53  					// Bucket contains 5, 5, 9.
    54  					upper: 9, numEq: 1, numLess: 2, distinctLess: 1.27,
    55  				},
    56  			},
    57  		},
    58  		{
    59  			samples:       []int64{1, 1, 1, 1, 2, 2},
    60  			numRows:       6,
    61  			distinctCount: 2,
    62  			maxBuckets:    2,
    63  			buckets: []expBucket{
    64  				{
    65  					// Bucket contains 1, 1, 1, 1.
    66  					upper: 1, numEq: 4, numLess: 0, distinctLess: 0,
    67  				},
    68  				{
    69  					// Bucket contains 2, 2.
    70  					upper: 2, numEq: 2, numLess: 0, distinctLess: 0,
    71  				},
    72  			},
    73  		},
    74  		{
    75  			samples:       []int64{1, 1, 1, 1, 2, 2},
    76  			numRows:       6,
    77  			distinctCount: 2,
    78  			maxBuckets:    3,
    79  			buckets: []expBucket{
    80  				{
    81  					// Bucket contains 1, 1, 1, 1.
    82  					upper: 1, numEq: 4, numLess: 0, distinctLess: 0,
    83  				},
    84  				{
    85  					// Bucket contains 2, 2.
    86  					upper: 2, numEq: 2, numLess: 0, distinctLess: 0,
    87  				},
    88  			},
    89  		},
    90  		{
    91  			samples:       []int64{1, 1, 2, 2, 2, 2},
    92  			numRows:       6,
    93  			distinctCount: 2,
    94  			maxBuckets:    2,
    95  			buckets: []expBucket{
    96  				{
    97  					// Bucket contains 1, 1.
    98  					upper: 1, numEq: 2, numLess: 0, distinctLess: 0,
    99  				},
   100  				{
   101  					// Bucket contains 2, 2, 2, 2.
   102  					upper: 2, numEq: 4, numLess: 0, distinctLess: 0,
   103  				},
   104  			},
   105  		},
   106  		{
   107  			samples:       []int64{1, 1, 2, 2, 2, 2},
   108  			numRows:       6,
   109  			distinctCount: 2,
   110  			maxBuckets:    3,
   111  			buckets: []expBucket{
   112  				{
   113  					// Bucket contains 1, 1.
   114  					upper: 1, numEq: 2, numLess: 0, distinctLess: 0,
   115  				},
   116  				{
   117  					// Bucket contains 2, 2, 2, 2.
   118  					upper: 2, numEq: 4, numLess: 0, distinctLess: 0,
   119  				},
   120  			},
   121  		},
   122  		{
   123  			samples:       []int64{1, 1, 1, 1, 1, 1},
   124  			numRows:       600,
   125  			distinctCount: 1,
   126  			maxBuckets:    10,
   127  			buckets: []expBucket{
   128  				{
   129  					// Bucket contains everything.
   130  					upper: 1, numEq: 600, numLess: 0, distinctLess: 0,
   131  				},
   132  			},
   133  		},
   134  		{
   135  			samples:       []int64{1, 2, 3, 4},
   136  			numRows:       4000,
   137  			distinctCount: 4,
   138  			maxBuckets:    3,
   139  			buckets: []expBucket{
   140  				{
   141  					// Bucket contains 1.
   142  					upper: 1, numEq: 1000, numLess: 0, distinctLess: 0,
   143  				},
   144  				{
   145  					// Bucket contains 2.
   146  					upper: 2, numEq: 1000, numLess: 0, distinctLess: 0,
   147  				},
   148  				{
   149  					// Bucket contains 3, 4.
   150  					upper: 4, numEq: 1000, numLess: 1000, distinctLess: 1,
   151  				},
   152  			},
   153  		},
   154  		{
   155  			samples:       []int64{-9222292034315889200, -9130100296576294525, -9042492057500701159},
   156  			numRows:       3000,
   157  			distinctCount: 300,
   158  			maxBuckets:    2,
   159  			buckets: []expBucket{
   160  				{
   161  					// Bucket contains -9222292034315889200.
   162  					upper: -9222292034315889200, numEq: 1000, numLess: 0, distinctLess: 0,
   163  				},
   164  				{
   165  					// Bucket contains -9130100296576294525, -9042492057500701159.
   166  					upper: -9042492057500701159, numEq: 1000, numLess: 1000, distinctLess: 298,
   167  				},
   168  			},
   169  		},
   170  	}
   171  
   172  	evalCtx := tree.NewTestingEvalContext(cluster.MakeTestingClusterSettings())
   173  
   174  	for i, tc := range testCases {
   175  		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
   176  			samples := make(tree.Datums, len(tc.samples))
   177  			perm := rand.Perm(len(samples))
   178  			for i := range samples {
   179  				// Randomly permute the samples.
   180  				val := tc.samples[perm[i]]
   181  
   182  				samples[i] = tree.NewDInt(tree.DInt(val))
   183  			}
   184  
   185  			h, err := EquiDepthHistogram(evalCtx, samples, tc.numRows, tc.distinctCount, tc.maxBuckets)
   186  			if err != nil {
   187  				t.Fatal(err)
   188  			}
   189  			if len(h.Buckets) != len(tc.buckets) {
   190  				t.Fatalf("Invalid number of buckets %d, expected %d", len(h.Buckets), len(tc.buckets))
   191  			}
   192  			for i, b := range h.Buckets {
   193  				_, val, err := encoding.DecodeVarintAscending(b.UpperBound)
   194  				if err != nil {
   195  					t.Fatal(err)
   196  				}
   197  				exp := tc.buckets[i]
   198  				if val != int64(exp.upper) {
   199  					t.Errorf("bucket %d: incorrect boundary %d, expected %d", i, val, exp.upper)
   200  				}
   201  				if b.NumEq != exp.numEq {
   202  					t.Errorf("bucket %d: incorrect EqRows %d, expected %d", i, b.NumEq, exp.numEq)
   203  				}
   204  				if b.NumRange != exp.numLess {
   205  					t.Errorf("bucket %d: incorrect RangeRows %d, expected %d", i, b.NumRange, exp.numLess)
   206  				}
   207  				// Round to two decimal places.
   208  				distinctRange := math.Round(b.DistinctRange*100.0) / 100.0
   209  				if distinctRange != exp.distinctLess {
   210  					t.Errorf("bucket %d: incorrect DistinctRows %f, expected %f", i, distinctRange, exp.distinctLess)
   211  				}
   212  			}
   213  		})
   214  	}
   215  
   216  	t.Run("invalid-numRows", func(t *testing.T) {
   217  		samples := tree.Datums{tree.NewDInt(1), tree.NewDInt(2), tree.NewDInt(3)}
   218  		_, err := EquiDepthHistogram(
   219  			evalCtx, samples, 2 /* numRows */, 2 /* distinctCount */, 10, /* maxBuckets */
   220  		)
   221  		if err == nil {
   222  			t.Fatal("expected error")
   223  		}
   224  	})
   225  
   226  	t.Run("nulls", func(t *testing.T) {
   227  		samples := tree.Datums{tree.NewDInt(1), tree.NewDInt(2), tree.DNull}
   228  		_, err := EquiDepthHistogram(
   229  			evalCtx, samples, 100 /* numRows */, 3 /* distinctCount */, 10, /* maxBuckets */
   230  		)
   231  		if err == nil {
   232  			t.Fatal("expected error")
   233  		}
   234  	})
   235  }