github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/stats_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package workload
    12  
    13  import (
    14  	"math"
    15  	"testing"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    18  )
    19  
    20  func TestDistinctCount(t *testing.T) {
    21  	defer leaktest.AfterTest(t)()
    22  	test := func(rowCount, maxDistinctCount uint64) {
    23  		n, count := float64(maxDistinctCount), float64(0)
    24  		var expected uint64
    25  		// This calculation should produce the same result as the calculation
    26  		// in DistinctCount, but it's easier to see how this is correct (it's also
    27  		// much less efficient). For each row, we select a new value. The
    28  		// probability that it hasn't been seen before is (n-count)/n, where count
    29  		// is the total number of values seen so far, and n is the number of
    30  		// possible values. This probability is also equivalent to the expected
    31  		// value of the increase in distinct values seen so far, so we calculate
    32  		// the expected total number of distinct values by summing this probability
    33  		// over all rows.
    34  		for i := uint64(0); i < rowCount && expected < maxDistinctCount; i++ {
    35  			count += (n - count) / n
    36  			expected = uint64(int64(math.Round(count)))
    37  		}
    38  
    39  		actual := DistinctCount(rowCount, maxDistinctCount)
    40  		if expected != actual {
    41  			t.Fatalf("For row count %d and max distinct count %d, expected distinct"+
    42  				" count %d but found %d", rowCount, maxDistinctCount, expected, actual)
    43  		}
    44  	}
    45  
    46  	for _, rowCount := range []uint64{0, 1, 10, 100, 1000} {
    47  		for _, maxDistinctCount := range []uint64{1, 10, 100, 1000} {
    48  			test(rowCount, maxDistinctCount)
    49  		}
    50  	}
    51  }