github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/stats_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package workload 12 13 import ( 14 "math" 15 "testing" 16 17 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 18 ) 19 20 func TestDistinctCount(t *testing.T) { 21 defer leaktest.AfterTest(t)() 22 test := func(rowCount, maxDistinctCount uint64) { 23 n, count := float64(maxDistinctCount), float64(0) 24 var expected uint64 25 // This calculation should produce the same result as the calculation 26 // in DistinctCount, but it's easier to see how this is correct (it's also 27 // much less efficient). For each row, we select a new value. The 28 // probability that it hasn't been seen before is (n-count)/n, where count 29 // is the total number of values seen so far, and n is the number of 30 // possible values. This probability is also equivalent to the expected 31 // value of the increase in distinct values seen so far, so we calculate 32 // the expected total number of distinct values by summing this probability 33 // over all rows. 34 for i := uint64(0); i < rowCount && expected < maxDistinctCount; i++ { 35 count += (n - count) / n 36 expected = uint64(int64(math.Round(count))) 37 } 38 39 actual := DistinctCount(rowCount, maxDistinctCount) 40 if expected != actual { 41 t.Fatalf("For row count %d and max distinct count %d, expected distinct"+ 42 " count %d but found %d", rowCount, maxDistinctCount, expected, actual) 43 } 44 } 45 46 for _, rowCount := range []uint64{0, 1, 10, 100, 1000} { 47 for _, maxDistinctCount := range []uint64{1, 10, 100, 1000} { 48 test(rowCount, maxDistinctCount) 49 } 50 } 51 }