github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/ycsb/zipfgenerator.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 // 11 // ZipfGenerator implements the Incrementing Zipfian Random Number Generator from 12 // [1]: "Quickly Generating Billion-Record Synthetic Databases" 13 // by Gray, Sundaresan, Englert, Baclawski, and Weinberger, SIGMOD 1994. 14 15 package ycsb 16 17 import ( 18 "fmt" 19 "math" 20 "math/rand" 21 22 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 23 "github.com/cockroachdb/errors" 24 ) 25 26 const ( 27 // See https://github.com/brianfrankcooper/YCSB/blob/f886c1e7988f8f4965cb88a1fe2f6bad2c61b56d/core/src/main/java/com/yahoo/ycsb/generator/ScrambledZipfianGenerator.java#L33-L35 28 defaultIMax = 10000000000 29 defaultTheta = 0.99 30 defaultZetaN = 26.46902820178302 31 ) 32 33 // ZipfGenerator is a random number generator that generates draws from a Zipf 34 // distribution. Unlike rand.Zipf, this generator supports incrementing the 35 // imax parameter without performing an expensive recomputation of the 36 // underlying hidden parameters, which is a pattern used in [1] for efficiently 37 // generating large volumes of Zipf-distributed records for synthetic data. 38 // Second, rand.Zipf only supports theta <= 1, we suppose all values of theta. 39 type ZipfGenerator struct { 40 // The underlying RNG 41 zipfGenMu ZipfGeneratorMu 42 // supplied values 43 theta float64 44 iMin uint64 45 // internally computed values 46 alpha, zeta2, halfPowTheta float64 47 verbose bool 48 } 49 50 // ZipfGeneratorMu holds variables which must be globally synced. 51 type ZipfGeneratorMu struct { 52 mu syncutil.Mutex 53 r *rand.Rand 54 iMax uint64 55 eta float64 56 zetaN float64 57 } 58 59 // NewZipfGenerator constructs a new ZipfGenerator with the given parameters. 60 // It returns an error if the parameters are outside the accepted range. 61 func NewZipfGenerator( 62 rng *rand.Rand, iMin, iMax uint64, theta float64, verbose bool, 63 ) (*ZipfGenerator, error) { 64 if iMin > iMax { 65 return nil, errors.Errorf("iMin %d > iMax %d", iMin, iMax) 66 } 67 if theta < 0.0 || theta == 1.0 { 68 return nil, errors.Errorf("0 < theta, and theta != 1") 69 } 70 71 z := ZipfGenerator{ 72 iMin: iMin, 73 zipfGenMu: ZipfGeneratorMu{ 74 r: rng, 75 iMax: iMax, 76 }, 77 theta: theta, 78 verbose: verbose, 79 } 80 z.zipfGenMu.mu.Lock() 81 defer z.zipfGenMu.mu.Unlock() 82 83 // Compute hidden parameters 84 zeta2, err := computeZetaFromScratch(2, theta) 85 if err != nil { 86 return nil, errors.Errorf("Could not compute zeta(2,theta): %s", err) 87 } 88 var zetaN float64 89 zetaN, err = computeZetaFromScratch(iMax+1-iMin, theta) 90 if err != nil { 91 return nil, errors.Errorf("Could not compute zeta(%d,theta): %s", iMax, err) 92 } 93 z.alpha = 1.0 / (1.0 - theta) 94 z.zipfGenMu.eta = (1 - math.Pow(2.0/float64(z.zipfGenMu.iMax+1-z.iMin), 1.0-theta)) / (1.0 - zeta2/zetaN) 95 z.zipfGenMu.zetaN = zetaN 96 z.zeta2 = zeta2 97 z.halfPowTheta = 1.0 + math.Pow(0.5, z.theta) 98 return &z, nil 99 } 100 101 // computeZetaIncrementally recomputes zeta(iMax, theta), assuming that 102 // sum = zeta(oldIMax, theta). It returns zeta(iMax, theta), computed incrementally. 103 func computeZetaIncrementally(oldIMax, iMax uint64, theta float64, sum float64) (float64, error) { 104 if iMax < oldIMax { 105 return 0, errors.Errorf("Can't increment iMax backwards!") 106 } 107 for i := oldIMax + 1; i <= iMax; i++ { 108 sum += 1.0 / math.Pow(float64(i), theta) 109 } 110 return sum, nil 111 } 112 113 // The function zeta computes the value 114 // zeta(n, theta) = (1/1)^theta + (1/2)^theta + (1/3)^theta + ... + (1/n)^theta 115 func computeZetaFromScratch(n uint64, theta float64) (float64, error) { 116 if n == defaultIMax && theta == defaultTheta { 117 // Precomputed value, borrowed from ScrambledZipfianGenerator.java. (This is 118 // quite slow to calculate from scratch due to the large n value.) 119 return defaultZetaN, nil 120 } 121 zeta, err := computeZetaIncrementally(0, n, theta, 0.0) 122 if err != nil { 123 return zeta, errors.Errorf("could not compute zeta: %s", err) 124 } 125 return zeta, nil 126 } 127 128 // Uint64 draws a new value between iMin and iMax, with probabilities 129 // according to the Zipf distribution. 130 func (z *ZipfGenerator) Uint64() uint64 { 131 z.zipfGenMu.mu.Lock() 132 u := z.zipfGenMu.r.Float64() 133 uz := u * z.zipfGenMu.zetaN 134 var result uint64 135 if uz < 1.0 { 136 result = z.iMin 137 } else if uz < z.halfPowTheta { 138 result = z.iMin + 1 139 } else { 140 spread := float64(z.zipfGenMu.iMax + 1 - z.iMin) 141 result = z.iMin + uint64(int64(spread*math.Pow(z.zipfGenMu.eta*u-z.zipfGenMu.eta+1.0, z.alpha))) 142 } 143 if z.verbose { 144 fmt.Printf("Uint64[%d, %d] -> %d\n", z.iMin, z.zipfGenMu.iMax, result) 145 } 146 z.zipfGenMu.mu.Unlock() 147 return result 148 } 149 150 // IncrementIMax increments iMax by count and recomputes the internal values 151 // that depend on it. It throws an error if the recomputation failed. 152 func (z *ZipfGenerator) IncrementIMax(count uint64) error { 153 z.zipfGenMu.mu.Lock() 154 zetaN, err := computeZetaIncrementally( 155 z.zipfGenMu.iMax+1-z.iMin, z.zipfGenMu.iMax+count+1-z.iMin, z.theta, z.zipfGenMu.zetaN) 156 if err != nil { 157 z.zipfGenMu.mu.Unlock() 158 return errors.Errorf("Could not incrementally compute zeta: %s", err) 159 } 160 z.zipfGenMu.iMax += count 161 eta := (1 - math.Pow(2.0/float64(z.zipfGenMu.iMax+1-z.iMin), 1.0-z.theta)) / (1.0 - z.zeta2/zetaN) 162 z.zipfGenMu.eta = eta 163 z.zipfGenMu.zetaN = zetaN 164 z.zipfGenMu.mu.Unlock() 165 return nil 166 }