github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/tpch/random.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tpch
    12  
    13  import (
    14  	"bytes"
    15  	"strconv"
    16  	"sync"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/util/bufalloc"
    19  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    20  	"github.com/cockroachdb/cockroach/pkg/workload/faker"
    21  	"golang.org/x/exp/rand"
    22  )
    23  
    24  const alphanumericLen64 = `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890, `
    25  
    26  // randInt returns a random value between x and y inclusively, with a mean of
    27  // (x+y)/2. See 4.2.2.3.
    28  func randInt(rng *rand.Rand, x, y int) int {
    29  	return rng.Intn(y-x+1) + x
    30  }
    31  
    32  func randFloat(rng *rand.Rand, x, y, shift int) float32 {
    33  	return float32(randInt(rng, x, y)) / float32(shift)
    34  }
    35  
    36  type textPool interface {
    37  	// 4.2.2.10:
    38  	// The term text string[min, max] represents a substring of a 300 MB string
    39  	// populated according to the pseudo text grammar defined in Clause 4.2.2.14.
    40  	// The length of the substring is a random number between min and max
    41  	// inclusive. The substring offset is randomly chosen.
    42  	//
    43  	// randString implementations must be threadsafe.
    44  	randString(rng *rand.Rand, minLen, maxLen int) []byte
    45  }
    46  
    47  type fakeTextPool struct {
    48  	seed uint64
    49  	once struct {
    50  		sync.Once
    51  		buf []byte
    52  	}
    53  }
    54  
    55  // randString implements textPool with a cheaper simulation of the 300 MB
    56  // string. It's not to spec both because it's shorter and also because it's not
    57  // generated according to the pseudo text grammar.
    58  func (p *fakeTextPool) randString(rng *rand.Rand, minLen, maxLen int) []byte {
    59  	const fakeTextPoolSize = 1 << 20 // 1 MiB
    60  	p.once.Do(func() {
    61  		bufRng := rand.New(rand.NewSource(p.seed))
    62  		f := faker.NewFaker()
    63  		// This loop generates random paragraphs and adds them until the length is
    64  		// >= fakeTextPoolSize. Add some extra capacity so that we don't allocate
    65  		// and copy on the paragraph that goes over.
    66  		buf := bytes.NewBuffer(make([]byte, 0, fakeTextPoolSize+1024))
    67  		for buf.Len() < fakeTextPoolSize {
    68  			buf.WriteString(f.Paragraph(bufRng))
    69  			buf.WriteString(` `)
    70  		}
    71  		p.once.buf = buf.Bytes()[:fakeTextPoolSize:fakeTextPoolSize]
    72  	})
    73  	start := rng.Intn(len(p.once.buf) - maxLen)
    74  	end := start + rng.Intn(maxLen-minLen) + minLen
    75  	return p.once.buf[start:end]
    76  }
    77  
    78  // randVString returns "a string comprised of randomly generated alphanumeric
    79  // characters within a character set of at least 64 symbols. The length of the
    80  // string is a random value between min and max inclusive". See 4.2.2.7.
    81  func randVString(rng *rand.Rand, a *bufalloc.ByteAllocator, minLen, maxLen int) []byte {
    82  	var buf []byte
    83  	*a, buf = a.Alloc(randInt(rng, minLen, maxLen), 0)
    84  	for i := range buf {
    85  		buf[i] = alphanumericLen64[rng.Intn(len(alphanumericLen64))]
    86  	}
    87  	return buf
    88  }
    89  
    90  // randPhone returns a phone number generated according to 4.2.2.9.
    91  func randPhone(rng *rand.Rand, a *bufalloc.ByteAllocator, nationKey int16) []byte {
    92  	var buf []byte
    93  	*a, buf = a.Alloc(15, 0)
    94  	buf = buf[:0]
    95  
    96  	countryCode := nationKey + 10
    97  	localNumber1 := randInt(rng, 100, 999)
    98  	localNumber2 := randInt(rng, 100, 999)
    99  	localNumber3 := randInt(rng, 1000, 9999)
   100  	buf = strconv.AppendInt(buf, int64(countryCode), 10)
   101  	buf = append(buf, '-')
   102  	buf = strconv.AppendInt(buf, int64(localNumber1), 10)
   103  	buf = append(buf, '-')
   104  	buf = strconv.AppendInt(buf, int64(localNumber2), 10)
   105  	buf = append(buf, '-')
   106  	buf = strconv.AppendInt(buf, int64(localNumber3), 10)
   107  	return buf
   108  }
   109  
   110  var randPartNames = [...]string{
   111  	"almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", "blanched", "blue",
   112  	"blush", "brown", "burlywood", "burnished", "chartreuse", "chiffon", "chocolate", "coral",
   113  	"cornflower", "cornsilk", "cream", "cyan", "dark", "deep", "dim", "dodger", "drab", "firebrick",
   114  	"floral", "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew",
   115  	"hot", "indian", "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen",
   116  	"magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo",
   117  	"navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", "pink", "plum", "powder",
   118  	"puff", "purple", "red", "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell",
   119  	"sienna", "sky", "slate", "smoke", "snow", "spring", "steel", "tan", "thistle", "tomato",
   120  	"turquoise", "violet", "wheat", "white", "yellow",
   121  }
   122  
   123  const maxPartNameLen = 10
   124  const nPartNames = 5
   125  
   126  // randPartName concatenates 5 random unique strings from randPartNames, separated
   127  // by spaces.
   128  func randPartName(rng *rand.Rand, namePerm []int, a *bufalloc.ByteAllocator) []byte {
   129  	// do nPartNames iterations of rand.Perm, to get a random 5-subset of the
   130  	// indexes into randPartNames.
   131  	for i := 0; i < nPartNames; i++ {
   132  		j := rng.Intn(i + 1)
   133  		namePerm[i] = namePerm[j]
   134  		namePerm[j] = i
   135  	}
   136  	var buf []byte
   137  	*a, buf = a.Alloc(maxPartNameLen*nPartNames+nPartNames, 0)
   138  	buf = buf[:0]
   139  	for i := 0; i < nPartNames; i++ {
   140  		if i != 0 {
   141  			buf = append(buf, byte(' '))
   142  		}
   143  		buf = append(buf, randPartNames[namePerm[i]]...)
   144  	}
   145  	return buf
   146  }
   147  
   148  const manufacturerString = "Manufacturer#"
   149  
   150  func randMfgr(rng *rand.Rand, a *bufalloc.ByteAllocator) (byte, []byte) {
   151  	var buf []byte
   152  	*a, buf = a.Alloc(len(manufacturerString)+1, 0)
   153  
   154  	copy(buf, manufacturerString)
   155  	m := byte(rng.Intn(5) + '1')
   156  	buf[len(buf)-1] = m
   157  	return m, buf
   158  }
   159  
   160  const brandString = "Brand#"
   161  
   162  func randBrand(rng *rand.Rand, a *bufalloc.ByteAllocator, m byte) []byte {
   163  	var buf []byte
   164  	*a, buf = a.Alloc(len(brandString)+2, 0)
   165  
   166  	copy(buf, brandString)
   167  	n := byte(rng.Intn(5) + '1')
   168  	buf[len(buf)-2] = m
   169  	buf[len(buf)-1] = n
   170  	return buf
   171  }
   172  
   173  const clerkString = "Clerk#"
   174  
   175  func randClerk(rng *rand.Rand, a *bufalloc.ByteAllocator, scaleFactor int) []byte {
   176  	var buf []byte
   177  	*a, buf = a.Alloc(len(clerkString)+9, 0)
   178  	copy(buf, clerkString)
   179  	ninePaddedInt(buf[len(clerkString):], int64(randInt(rng, 1, scaleFactor*1000)))
   180  	return buf
   181  }
   182  
   183  const supplierString = "Supplier#"
   184  
   185  func supplierName(a *bufalloc.ByteAllocator, suppKey int64) []byte {
   186  	var buf []byte
   187  	*a, buf = a.Alloc(len(supplierString)+9, 0)
   188  	copy(buf, supplierString)
   189  	ninePaddedInt(buf[len(supplierString):], suppKey)
   190  	return buf
   191  }
   192  
   193  const customerString = "Customer#"
   194  
   195  func customerName(a *bufalloc.ByteAllocator, custKey int64) []byte {
   196  	var buf []byte
   197  	*a, buf = a.Alloc(len(customerString)+9, 0)
   198  	copy(buf, customerString)
   199  	ninePaddedInt(buf[len(customerString):], custKey)
   200  	return buf
   201  }
   202  
   203  const ninePadding = `000000000`
   204  
   205  func ninePaddedInt(buf []byte, x int64) {
   206  	buf = buf[:len(ninePadding)]
   207  	intLen := len(strconv.AppendInt(buf[:0], x, 10))
   208  	numZeros := len(ninePadding) - intLen
   209  	copy(buf[numZeros:], buf[:intLen])
   210  	copy(buf[:numZeros], ninePadding[:numZeros])
   211  }
   212  
   213  func randSyllables(
   214  	rng *rand.Rand, a *bufalloc.ByteAllocator, maxLen int, syllables [][]string,
   215  ) []byte {
   216  	var buf []byte
   217  	*a, buf = a.Alloc(maxLen, 0)
   218  	buf = buf[:0]
   219  
   220  	for i, syl := range syllables {
   221  		if i != 0 {
   222  			buf = append(buf, ' ')
   223  			buf = append(buf, syl[rng.Intn(len(syl))]...)
   224  		}
   225  	}
   226  	return buf
   227  }
   228  
   229  var typeSyllables = [][]string{
   230  	{"STANDARD", "SMALL", "MEDIUM", "LARGE", "ECONOMY", "PROMO"},
   231  	{"ANODIZED", "BURNISHED", "PLATED", "POLISHED", "BRUSHED"},
   232  	{"TIN", "NICKEL", "BRASS", "STEEL", "COPPER"},
   233  }
   234  
   235  const maxTypeLen = 25
   236  
   237  func randType(rng *rand.Rand, a *bufalloc.ByteAllocator) []byte {
   238  	return randSyllables(rng, a, maxTypeLen, typeSyllables)
   239  }
   240  
   241  var containerSyllables = [][]string{
   242  	{"SM", "MED", "JUMBO", "WRAP"},
   243  	{"BOX", "BAG", "JAR", "PKG", "PACK", "CAN", "DRUM"},
   244  }
   245  
   246  const maxContainerLen = 10
   247  
   248  func randContainer(rng *rand.Rand, a *bufalloc.ByteAllocator) []byte {
   249  	return randSyllables(rng, a, maxContainerLen, containerSyllables)
   250  }
   251  
   252  var segments = []string{
   253  	"AUTOMOBILE", "BUILDING", "FURNITURE", "MACHINERY", "HOUSEHOLD",
   254  }
   255  
   256  func randSegment(rng *rand.Rand) []byte {
   257  	return encoding.UnsafeConvertStringToBytes(segments[rng.Intn(len(segments))])
   258  }
   259  
   260  var priorities = []string{
   261  	"1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED",
   262  }
   263  
   264  func randPriority(rng *rand.Rand) []byte {
   265  	return encoding.UnsafeConvertStringToBytes(priorities[rng.Intn(len(priorities))])
   266  }
   267  
   268  var instructions = []string{
   269  	"DELIVER IN PERSON",
   270  	"COLLECT COD", "NONE",
   271  	"TAKE BACK RETURN",
   272  }
   273  
   274  func randInstruction(rng *rand.Rand) []byte {
   275  	return encoding.UnsafeConvertStringToBytes(instructions[rng.Intn(len(instructions))])
   276  }
   277  
   278  var modes = []string{
   279  	"REG AIR", "AIR", "RAIL", "SHIP", "TRUCK", "MAIL", "FOB",
   280  }
   281  
   282  func randMode(rng *rand.Rand) []byte {
   283  	return []byte(modes[rng.Intn(len(modes))])
   284  }