github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/tpch/random.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package tpch 12 13 import ( 14 "bytes" 15 "strconv" 16 "sync" 17 18 "github.com/cockroachdb/cockroach/pkg/util/bufalloc" 19 "github.com/cockroachdb/cockroach/pkg/util/encoding" 20 "github.com/cockroachdb/cockroach/pkg/workload/faker" 21 "golang.org/x/exp/rand" 22 ) 23 24 const alphanumericLen64 = `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890, ` 25 26 // randInt returns a random value between x and y inclusively, with a mean of 27 // (x+y)/2. See 4.2.2.3. 28 func randInt(rng *rand.Rand, x, y int) int { 29 return rng.Intn(y-x+1) + x 30 } 31 32 func randFloat(rng *rand.Rand, x, y, shift int) float32 { 33 return float32(randInt(rng, x, y)) / float32(shift) 34 } 35 36 type textPool interface { 37 // 4.2.2.10: 38 // The term text string[min, max] represents a substring of a 300 MB string 39 // populated according to the pseudo text grammar defined in Clause 4.2.2.14. 40 // The length of the substring is a random number between min and max 41 // inclusive. The substring offset is randomly chosen. 42 // 43 // randString implementations must be threadsafe. 44 randString(rng *rand.Rand, minLen, maxLen int) []byte 45 } 46 47 type fakeTextPool struct { 48 seed uint64 49 once struct { 50 sync.Once 51 buf []byte 52 } 53 } 54 55 // randString implements textPool with a cheaper simulation of the 300 MB 56 // string. It's not to spec both because it's shorter and also because it's not 57 // generated according to the pseudo text grammar. 58 func (p *fakeTextPool) randString(rng *rand.Rand, minLen, maxLen int) []byte { 59 const fakeTextPoolSize = 1 << 20 // 1 MiB 60 p.once.Do(func() { 61 bufRng := rand.New(rand.NewSource(p.seed)) 62 f := faker.NewFaker() 63 // This loop generates random paragraphs and adds them until the length is 64 // >= fakeTextPoolSize. Add some extra capacity so that we don't allocate 65 // and copy on the paragraph that goes over. 66 buf := bytes.NewBuffer(make([]byte, 0, fakeTextPoolSize+1024)) 67 for buf.Len() < fakeTextPoolSize { 68 buf.WriteString(f.Paragraph(bufRng)) 69 buf.WriteString(` `) 70 } 71 p.once.buf = buf.Bytes()[:fakeTextPoolSize:fakeTextPoolSize] 72 }) 73 start := rng.Intn(len(p.once.buf) - maxLen) 74 end := start + rng.Intn(maxLen-minLen) + minLen 75 return p.once.buf[start:end] 76 } 77 78 // randVString returns "a string comprised of randomly generated alphanumeric 79 // characters within a character set of at least 64 symbols. The length of the 80 // string is a random value between min and max inclusive". See 4.2.2.7. 81 func randVString(rng *rand.Rand, a *bufalloc.ByteAllocator, minLen, maxLen int) []byte { 82 var buf []byte 83 *a, buf = a.Alloc(randInt(rng, minLen, maxLen), 0) 84 for i := range buf { 85 buf[i] = alphanumericLen64[rng.Intn(len(alphanumericLen64))] 86 } 87 return buf 88 } 89 90 // randPhone returns a phone number generated according to 4.2.2.9. 91 func randPhone(rng *rand.Rand, a *bufalloc.ByteAllocator, nationKey int16) []byte { 92 var buf []byte 93 *a, buf = a.Alloc(15, 0) 94 buf = buf[:0] 95 96 countryCode := nationKey + 10 97 localNumber1 := randInt(rng, 100, 999) 98 localNumber2 := randInt(rng, 100, 999) 99 localNumber3 := randInt(rng, 1000, 9999) 100 buf = strconv.AppendInt(buf, int64(countryCode), 10) 101 buf = append(buf, '-') 102 buf = strconv.AppendInt(buf, int64(localNumber1), 10) 103 buf = append(buf, '-') 104 buf = strconv.AppendInt(buf, int64(localNumber2), 10) 105 buf = append(buf, '-') 106 buf = strconv.AppendInt(buf, int64(localNumber3), 10) 107 return buf 108 } 109 110 var randPartNames = [...]string{ 111 "almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", "blanched", "blue", 112 "blush", "brown", "burlywood", "burnished", "chartreuse", "chiffon", "chocolate", "coral", 113 "cornflower", "cornsilk", "cream", "cyan", "dark", "deep", "dim", "dodger", "drab", "firebrick", 114 "floral", "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew", 115 "hot", "indian", "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen", 116 "magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo", 117 "navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", "pink", "plum", "powder", 118 "puff", "purple", "red", "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell", 119 "sienna", "sky", "slate", "smoke", "snow", "spring", "steel", "tan", "thistle", "tomato", 120 "turquoise", "violet", "wheat", "white", "yellow", 121 } 122 123 const maxPartNameLen = 10 124 const nPartNames = 5 125 126 // randPartName concatenates 5 random unique strings from randPartNames, separated 127 // by spaces. 128 func randPartName(rng *rand.Rand, namePerm []int, a *bufalloc.ByteAllocator) []byte { 129 // do nPartNames iterations of rand.Perm, to get a random 5-subset of the 130 // indexes into randPartNames. 131 for i := 0; i < nPartNames; i++ { 132 j := rng.Intn(i + 1) 133 namePerm[i] = namePerm[j] 134 namePerm[j] = i 135 } 136 var buf []byte 137 *a, buf = a.Alloc(maxPartNameLen*nPartNames+nPartNames, 0) 138 buf = buf[:0] 139 for i := 0; i < nPartNames; i++ { 140 if i != 0 { 141 buf = append(buf, byte(' ')) 142 } 143 buf = append(buf, randPartNames[namePerm[i]]...) 144 } 145 return buf 146 } 147 148 const manufacturerString = "Manufacturer#" 149 150 func randMfgr(rng *rand.Rand, a *bufalloc.ByteAllocator) (byte, []byte) { 151 var buf []byte 152 *a, buf = a.Alloc(len(manufacturerString)+1, 0) 153 154 copy(buf, manufacturerString) 155 m := byte(rng.Intn(5) + '1') 156 buf[len(buf)-1] = m 157 return m, buf 158 } 159 160 const brandString = "Brand#" 161 162 func randBrand(rng *rand.Rand, a *bufalloc.ByteAllocator, m byte) []byte { 163 var buf []byte 164 *a, buf = a.Alloc(len(brandString)+2, 0) 165 166 copy(buf, brandString) 167 n := byte(rng.Intn(5) + '1') 168 buf[len(buf)-2] = m 169 buf[len(buf)-1] = n 170 return buf 171 } 172 173 const clerkString = "Clerk#" 174 175 func randClerk(rng *rand.Rand, a *bufalloc.ByteAllocator, scaleFactor int) []byte { 176 var buf []byte 177 *a, buf = a.Alloc(len(clerkString)+9, 0) 178 copy(buf, clerkString) 179 ninePaddedInt(buf[len(clerkString):], int64(randInt(rng, 1, scaleFactor*1000))) 180 return buf 181 } 182 183 const supplierString = "Supplier#" 184 185 func supplierName(a *bufalloc.ByteAllocator, suppKey int64) []byte { 186 var buf []byte 187 *a, buf = a.Alloc(len(supplierString)+9, 0) 188 copy(buf, supplierString) 189 ninePaddedInt(buf[len(supplierString):], suppKey) 190 return buf 191 } 192 193 const customerString = "Customer#" 194 195 func customerName(a *bufalloc.ByteAllocator, custKey int64) []byte { 196 var buf []byte 197 *a, buf = a.Alloc(len(customerString)+9, 0) 198 copy(buf, customerString) 199 ninePaddedInt(buf[len(customerString):], custKey) 200 return buf 201 } 202 203 const ninePadding = `000000000` 204 205 func ninePaddedInt(buf []byte, x int64) { 206 buf = buf[:len(ninePadding)] 207 intLen := len(strconv.AppendInt(buf[:0], x, 10)) 208 numZeros := len(ninePadding) - intLen 209 copy(buf[numZeros:], buf[:intLen]) 210 copy(buf[:numZeros], ninePadding[:numZeros]) 211 } 212 213 func randSyllables( 214 rng *rand.Rand, a *bufalloc.ByteAllocator, maxLen int, syllables [][]string, 215 ) []byte { 216 var buf []byte 217 *a, buf = a.Alloc(maxLen, 0) 218 buf = buf[:0] 219 220 for i, syl := range syllables { 221 if i != 0 { 222 buf = append(buf, ' ') 223 buf = append(buf, syl[rng.Intn(len(syl))]...) 224 } 225 } 226 return buf 227 } 228 229 var typeSyllables = [][]string{ 230 {"STANDARD", "SMALL", "MEDIUM", "LARGE", "ECONOMY", "PROMO"}, 231 {"ANODIZED", "BURNISHED", "PLATED", "POLISHED", "BRUSHED"}, 232 {"TIN", "NICKEL", "BRASS", "STEEL", "COPPER"}, 233 } 234 235 const maxTypeLen = 25 236 237 func randType(rng *rand.Rand, a *bufalloc.ByteAllocator) []byte { 238 return randSyllables(rng, a, maxTypeLen, typeSyllables) 239 } 240 241 var containerSyllables = [][]string{ 242 {"SM", "MED", "JUMBO", "WRAP"}, 243 {"BOX", "BAG", "JAR", "PKG", "PACK", "CAN", "DRUM"}, 244 } 245 246 const maxContainerLen = 10 247 248 func randContainer(rng *rand.Rand, a *bufalloc.ByteAllocator) []byte { 249 return randSyllables(rng, a, maxContainerLen, containerSyllables) 250 } 251 252 var segments = []string{ 253 "AUTOMOBILE", "BUILDING", "FURNITURE", "MACHINERY", "HOUSEHOLD", 254 } 255 256 func randSegment(rng *rand.Rand) []byte { 257 return encoding.UnsafeConvertStringToBytes(segments[rng.Intn(len(segments))]) 258 } 259 260 var priorities = []string{ 261 "1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED", 262 } 263 264 func randPriority(rng *rand.Rand) []byte { 265 return encoding.UnsafeConvertStringToBytes(priorities[rng.Intn(len(priorities))]) 266 } 267 268 var instructions = []string{ 269 "DELIVER IN PERSON", 270 "COLLECT COD", "NONE", 271 "TAKE BACK RETURN", 272 } 273 274 func randInstruction(rng *rand.Rand) []byte { 275 return encoding.UnsafeConvertStringToBytes(instructions[rng.Intn(len(instructions))]) 276 } 277 278 var modes = []string{ 279 "REG AIR", "AIR", "RAIL", "SHIP", "TRUCK", "MAIL", "FOB", 280 } 281 282 func randMode(rng *rand.Rand) []byte { 283 return []byte(modes[rng.Intn(len(modes))]) 284 }