gitlab.com/CoiaPrant/sqlite3@v1.19.1/tpch/dbgen.go (about)

     1  // Copyright 2032 The Sqlite Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"bufio"
     9  	"database/sql"
    10  	"fmt"
    11  	"math"
    12  	"math/big"
    13  	"os"
    14  	"path/filepath"
    15  	"strconv"
    16  	"strings"
    17  	"time"
    18  
    19  	"modernc.org/mathutil"
    20  	"gitlab.com/CoiaPrant/sqlite3/tpch/driver"
    21  )
    22  
    23  var (
    24  	// .2.2.12 All dates must be computed using the following values:
    25  	//
    26  	// STARTDATE = 1992-01-01 CURRENTDATE = 1995-06-17 ENDDATE = 1998-12-31
    27  	StartDate   = time.Date(1992, 1, 1, 0, 0, 0, 0, time.UTC)
    28  	CurrentDate = time.Date(1995, 6, 17, 12, 12, 0, 0, time.UTC)
    29  	EndDate     = time.Date(1998, 12, 31, 23, 59, 59, 999999999, time.UTC)
    30  
    31  	seed, _ = mathutil.NewFCBig(big.NewInt(0), big.NewInt(math.MaxInt64), true)
    32  	prices  []int32
    33  	maxRecs = -1
    34  )
    35  
    36  func ns2time(ns int64) time.Time { return time.Unix(ns/1e9, ns%1e9).UTC() }
    37  
    38  // 1.3 Datatype Definitions
    39  //
    40  // 1.3.1 The following datatype definitions apply to the list of columns of
    41  // each table:
    42  //
    43  // - Identifier means that the column must be able to hold any key value
    44  // generated for that column and be able to support at least 2,147,483,647
    45  // unique values;
    46  //
    47  // Comment: A common implementation of this datatype will be an integer.
    48  // However, for SF greater than 300 some column values will exceed the range of
    49  // integer values supported by a 4-byte integer. A test sponsor may use some
    50  // other datatype such as 8-byte integer, decimal or character string to
    51  // implement the identifier datatype;
    52  //
    53  // - Integer means that the column must be able to exactly represent integer
    54  // values (i.e., values in increments of 1) in the range of at least
    55  // -2,147,483,646 to 2,147,483,647.
    56  //
    57  // - Decimal means that the column must be able to represent values in the
    58  // range -9,999,999,999.99 to +9,999,999,999.99 in increments of 0.01; the
    59  // values can be either represented exactly or interpreted to be in this range;
    60  //
    61  // - Big Decimal is of the Decimal datatype as defined above, with the
    62  // additional property that it must be large enough to represent the aggregated
    63  // values stored in temporary tables created within query variants;
    64  //
    65  // - Fixed text, size N means that the column must be able to hold any string
    66  // of characters of a fixed length of N.
    67  //
    68  // Comment: If the string it holds is shorter than N characters, then trailing
    69  // spaces must be stored in the database or the database must automatically pad
    70  // with spaces upon retrieval such that a CHAR_LENGTH() function will return N.
    71  //
    72  // - Variable text, size N means that the column must be able to hold any
    73  // string of characters of a variable length with a maximum length of N.
    74  // Columns defined as "variable text, size N" may optionally be implemented as
    75  // "fixed text, size N";
    76  //
    77  // - Date is a value whose external representation can be expressed as
    78  // YYYY-MM-DD, where all characters are numeric. A date must be able to express
    79  // any day within at least 14 consecutive years. There is no requirement
    80  // specific to the internal representation of a date.
    81  //
    82  // Comment: The implementation datatype chosen by the test sponsor for a
    83  // particular datatype definition must be applied consistently to all the
    84  // instances of that datatype definition in the schema, except for identifier
    85  // columns, whose datatype may be selected to satisfy database scaling
    86  // requirements.
    87  //
    88  // 1.3.2 The symbol SF is used in this document to represent the scale factor
    89  // for the database (see Clause 4: ).
    90  
    91  type rng struct {
    92  	r *mathutil.FCBig
    93  }
    94  
    95  func newRng(lo, hi int64) *rng {
    96  	r, err := mathutil.NewFCBig(big.NewInt(lo), big.NewInt(hi), true)
    97  	if err != nil {
    98  		panic("internal error")
    99  	}
   100  
   101  	r.Seed(seed.Next().Int64())
   102  	return &rng{r}
   103  }
   104  
   105  func (r *rng) n() int64 {
   106  	return r.r.Next().Int64()
   107  }
   108  
   109  // 4.2.2.2 The term unique within [x] represents any one value within a set of
   110  // x values between 1 and x, unique within the scope of rows being populated.
   111  func uniqueWithin(x int64) *rng { return newRng(1, x) }
   112  
   113  // 4.2.2.3 The notation random value [x .. y] represents a random value between
   114  // x and y inclusively, with a mean of (x+y)/2, and with the same number of
   115  // digits of precision as shown. For example, [0.01 .. 100.00] has 10,000
   116  // unique values, whereas [1..100] has only 100 unique values.
   117  func (r *rng) randomValue(min, max int64) int64 {
   118  	return min + r.n()%(max-min+1)
   119  }
   120  
   121  // 4.2.2.7 The notation random v-string [min, max] represents a string
   122  // comprised of randomly generated alphanumeric characters within a character
   123  // set of at least 64 symbols. The length of the string is a random value
   124  // between min and max inclusive.
   125  func (r *rng) vString(min, max int) string {
   126  	l := min + int(r.n())%(max-min+1)
   127  	b := make([]byte, l)
   128  	for i := range b {
   129  		b[i] = '0' + byte(r.n()%64)
   130  	}
   131  	return string(b)
   132  }
   133  
   134  // 4.2.2.9 The term phone number represents a string of numeric characters
   135  // separated by hyphens and generated as follows:
   136  //
   137  // Let i be an index into the list of strings Nations (i.e., ALGERIA is 0,
   138  // ARGENTINA is 1, etc., see Clause 4.2.3),
   139  //
   140  // Let country_code be the sub-string representation of the number (i + 10),
   141  //
   142  // Let local_number1 be random [100 .. 999],
   143  //
   144  // Let local_number2 be random [100 .. 999],
   145  //
   146  // Let local_number3 be random [1000 .. 9999],
   147  //
   148  // The phone number string is obtained by concatenating the following
   149  // sub-strings:
   150  //
   151  // country_code, "-", local_number1, "-", local_number2, "-", local_number3
   152  func (r *rng) phoneNumber(i int) string {
   153  	return fmt.Sprintf("%v-%v-%v-%v", i+10, 100+r.n()%900, 100+r.n()%900, 1000+r.n()%9000)
   154  }
   155  
   156  // 4.2.2.10 The term text string[min, max] represents a substring of a 300 MB
   157  // string populated according to the pseudo text grammar defined in Clause
   158  // 4.2.2.14. The length of the substring is a random number between min and max
   159  // inclusive. The substring offset is randomly chosen.
   160  func (r *rng) textString(min, max int64) string {
   161  	off := r.n() % (int64(len(pseudotext)) - max)
   162  	l := min + r.n()%(max-min+1)
   163  	return string(pseudotext[off : off+l])
   164  }
   165  
   166  var (
   167  	types1 = []string{
   168  		"STANDARD",
   169  		"SMALL",
   170  		"MEDIUM",
   171  		"LARGE",
   172  		"ECONOMY",
   173  		"PROMO",
   174  	}
   175  	types2 = []string{
   176  		"ANODIZED",
   177  		"BURNISHED",
   178  		"PLATED",
   179  		"POLISHED",
   180  		"BRUSHED",
   181  	}
   182  	types3 = []string{
   183  		"TIN",
   184  		"NICKEL",
   185  		"BRASS",
   186  		"STELL",
   187  		"COPPER",
   188  	}
   189  )
   190  
   191  // 4.2.2.13
   192  
   193  func (r *rng) types() string {
   194  	return types1[int(r.n())%len(types1)] + " " + types2[int(r.n())%len(types2)] + " " + types3[int(r.n())%len(types3)]
   195  }
   196  
   197  var (
   198  	containers1 = []string{
   199  		"SM",
   200  		"LG",
   201  		"MED",
   202  		"JUMBO",
   203  		"WRAP",
   204  	}
   205  	containers2 = []string{
   206  		"CASE",
   207  		"BOX",
   208  		"BAG",
   209  		"JAR",
   210  		"PKG",
   211  		"PACK",
   212  		"CAN",
   213  		"DRUM",
   214  	}
   215  )
   216  
   217  func (r *rng) containers() string {
   218  	return containers1[int(r.n())%len(containers1)] + " " + containers2[int(r.n())%len(containers2)]
   219  }
   220  
   221  var segments1 = []string{
   222  	"AUTOMOBILE",
   223  	"BUILDING",
   224  	"FURNITURE",
   225  	"MACHINERY",
   226  	"HOUSEHOLD",
   227  }
   228  
   229  func (r *rng) segments() string {
   230  	return segments1[int(r.n())%len(segments1)]
   231  }
   232  
   233  var priorities1 = []string{
   234  	"1-URGENT",
   235  	"2-HIGH",
   236  	"3-MEDIUM",
   237  	"4-NOT SPECIFIED",
   238  	"5-LOW",
   239  }
   240  
   241  func (r *rng) priorities() string {
   242  	return priorities1[int(r.n())%len(priorities1)]
   243  }
   244  
   245  var instructions1 = []string{
   246  	"DELIVER IN PERSON",
   247  	"COLLECT COD",
   248  	"NONE",
   249  	"TAKE BACK RETURN",
   250  }
   251  
   252  func (r *rng) instructions() string {
   253  	return instructions1[int(r.n())%len(instructions1)]
   254  }
   255  
   256  var modes1 = []string{
   257  	"REG AIR",
   258  	"AIR",
   259  	"RAIL",
   260  	"SHIP",
   261  	"TRUCK",
   262  	"MAIL",
   263  	"FOB",
   264  }
   265  
   266  func (r *rng) modes() string {
   267  	return modes1[int(r.n())%len(modes1)]
   268  }
   269  
   270  var nouns1 = []string{
   271  	"foxes",
   272  	"ideas",
   273  	"theodolites",
   274  	"pinto",
   275  	"beans",
   276  	"instructions",
   277  	"dependencies",
   278  	"excuses",
   279  	"platelets",
   280  	"asymptotes",
   281  	"courts",
   282  	"dolphins",
   283  	"multipliers",
   284  	"sauternes",
   285  	"warthogs",
   286  	"frets",
   287  	"dinos",
   288  	"attainments",
   289  	"somas",
   290  	"Tiresias'",
   291  	"patterns",
   292  	"forges",
   293  	"braids",
   294  	"hockey",
   295  	"players",
   296  	"frays",
   297  	"warhorses",
   298  	"dugouts",
   299  	"notornis",
   300  	"epitaphs",
   301  	"pearls",
   302  	"tithes",
   303  	"waters",
   304  	"orbits",
   305  	"gifts",
   306  	"sheaves",
   307  	"depths",
   308  	"sentiments",
   309  	"decoys",
   310  	"realms",
   311  	"pains",
   312  	"grouches",
   313  	"escapades",
   314  }
   315  
   316  func (r *rng) nouns() string {
   317  	return nouns1[int(r.n())%len(nouns1)]
   318  }
   319  
   320  var verbs1 = []string{
   321  	"sleep",
   322  	"wake",
   323  	"are",
   324  	"cajole",
   325  	"haggle",
   326  	"nag",
   327  	"use",
   328  	"boost",
   329  	"affix",
   330  	"detect",
   331  	"integrate",
   332  	"maintain",
   333  	"nod",
   334  	"was",
   335  	"lose",
   336  	"sublate",
   337  	"solve",
   338  	"thrash",
   339  	"promise",
   340  	"engage",
   341  	"hinder",
   342  	"print",
   343  	"x-ray",
   344  	"breach",
   345  	"eat",
   346  	"grow",
   347  	"impress",
   348  	"mold",
   349  	"poach",
   350  	"serve",
   351  	"run",
   352  	"dazzle",
   353  	"snooze",
   354  	"doze",
   355  	"unwind",
   356  	"kindle",
   357  	"play",
   358  	"hang",
   359  	"believe",
   360  	"doubt",
   361  }
   362  
   363  func (r *rng) verbs() string {
   364  	return verbs1[int(r.n())%len(verbs1)]
   365  }
   366  
   367  var adjectives1 = []string{
   368  	"furious",
   369  	"sly",
   370  	"careful",
   371  	"blithe",
   372  	"quick",
   373  	"fluffy",
   374  	"slow",
   375  	"quiet",
   376  	"ruthless",
   377  	"thin",
   378  	"close",
   379  	"dogged",
   380  	"daring",
   381  	"brave",
   382  	"stealthy",
   383  	"permanent",
   384  	"enticing",
   385  	"idle",
   386  	"busy",
   387  	"regular",
   388  	"final",
   389  	"ironic",
   390  	"even",
   391  	"bold",
   392  	"silent",
   393  }
   394  
   395  func (r *rng) adjectives() string {
   396  	return adjectives1[int(r.n())%len(adjectives1)]
   397  }
   398  
   399  var adverbs1 = []string{
   400  	"sometimes",
   401  	"always",
   402  	"never",
   403  	"furiously",
   404  	"slyly",
   405  	"carefully",
   406  	"blithely",
   407  	"quickly",
   408  	"fluffily",
   409  	"slowly",
   410  	"quietly",
   411  	"ruthlessly",
   412  	"thinly",
   413  	"closely",
   414  	"doggedly",
   415  	"daringly",
   416  	"bravely",
   417  	"stealthily",
   418  	"permanently",
   419  	"enticingly",
   420  	"idly",
   421  	"busily",
   422  	"regularly",
   423  	"finally",
   424  	"ironically",
   425  	"evenly",
   426  	"boldly",
   427  	"silently",
   428  }
   429  
   430  func (r *rng) adverbs() string {
   431  	return adverbs1[int(r.n())%len(adverbs1)]
   432  }
   433  
   434  var prepositions1 = []string{
   435  	"about",
   436  	"above",
   437  	"according to",
   438  	"across",
   439  	"after",
   440  	"against",
   441  	"along",
   442  	"alongside of",
   443  	"among",
   444  	"around",
   445  	"at",
   446  	"atop",
   447  	"before",
   448  	"behind",
   449  	"beneath",
   450  	"beside",
   451  	"besides",
   452  	"between",
   453  	"beyond",
   454  	"by",
   455  	"despite",
   456  	"during",
   457  	"except",
   458  	"for",
   459  	"from",
   460  	"in place of",
   461  	"inside",
   462  	"instead of",
   463  	"into",
   464  	"near",
   465  	"of",
   466  	"on",
   467  	"outside",
   468  	"over",
   469  	"past",
   470  	"since",
   471  	"through",
   472  	"throughout",
   473  	"to",
   474  	"toward",
   475  	"under",
   476  	"until",
   477  	"up",
   478  	"upon",
   479  	"without",
   480  	"with",
   481  	"within",
   482  }
   483  
   484  func (r *rng) prepositions() string {
   485  	return prepositions1[int(r.n())%len(prepositions1)]
   486  }
   487  
   488  var auxiliaries1 = []string{
   489  	"do",
   490  	"may",
   491  	"might",
   492  	"shall",
   493  	"will",
   494  	"would",
   495  	"can",
   496  	"could",
   497  	"should",
   498  	"ought to",
   499  	"must",
   500  	"will have to",
   501  	"shall have to",
   502  	"could have to",
   503  	"should have to",
   504  	"must have to",
   505  	"need to",
   506  	"try to",
   507  }
   508  
   509  func (r *rng) auxiliaries() string {
   510  	return auxiliaries1[int(r.n())%len(auxiliaries1)]
   511  }
   512  
   513  var terminators1 = []string{
   514  	".",
   515  	";",
   516  	"!",
   517  	":",
   518  	"?",
   519  	"--",
   520  }
   521  
   522  func (r *rng) terminators() string {
   523  	return terminators1[int(r.n())%len(terminators1)]
   524  }
   525  
   526  var pseudotext []byte
   527  
   528  func genPseudotext() (err error) {
   529  	pth := filepath.Join("testdata", "pseudotext")
   530  	if _, err = os.Stat(pth); err == nil {
   531  		return fmt.Errorf("file already exists: %s", pth)
   532  	}
   533  
   534  	if !os.IsNotExist(err) {
   535  		return err
   536  	}
   537  
   538  	if err = os.MkdirAll("testdata", 0766); err != nil {
   539  		return err
   540  	}
   541  
   542  	f, err := os.Create(pth)
   543  	if err != nil {
   544  		return err
   545  	}
   546  
   547  	defer func() {
   548  		if cerr := f.Close(); cerr != nil && err == nil {
   549  			err = cerr
   550  		}
   551  	}()
   552  
   553  	w := bufio.NewWriter(f)
   554  
   555  	defer func() {
   556  		if ferr := w.Flush(); ferr != nil && err == nil {
   557  			err = ferr
   558  		}
   559  	}()
   560  
   561  	const sz = 300 * 1e6
   562  	r := newRng(0, math.MaxInt64)
   563  
   564  	nounPhrase := func() string {
   565  		switch r.n() % 4 {
   566  		case 0: // noun phrase:<noun>
   567  			return r.nouns()
   568  		case 1: // |<adjective> <noun>
   569  			return r.adjectives() + " " + r.nouns()
   570  		case 2: // |<adjective>, <adjective> <noun>
   571  			return r.adjectives() + ", " + r.adjectives() + " " + r.nouns()
   572  		case 3: // |<adverb> <adjective> <noun>
   573  			return r.adverbs() + " " + r.adjectives() + " " + r.nouns()
   574  		}
   575  		panic("internal error")
   576  	}
   577  
   578  	verbPhrase := func() string {
   579  		switch r.n() % 4 {
   580  		case 0: // verb phrase:<verb>
   581  			return r.verbs()
   582  		case 1: // |<auxiliary> <verb>
   583  			return r.auxiliaries() + " " + r.verbs()
   584  		case 2: // |<verb> <adverb>
   585  			return r.verbs() + " " + r.adverbs()
   586  		case 3: // |<auxiliary> <verb> <adverb>
   587  			return r.auxiliaries() + " " + r.verbs() + " " + r.adverbs()
   588  		}
   589  		panic("internal error")
   590  	}
   591  
   592  	prepositionalPhrase := func() string {
   593  		// prepositional phrase: <preposition> the <noun phrase>
   594  		return r.prepositions() + " the " + nounPhrase()
   595  	}
   596  
   597  	sentence := func() string {
   598  		switch r.n() % 5 {
   599  		case 0: // sentence:<noun phrase> <verb phrase> <terminator>
   600  			return nounPhrase() + " " + verbPhrase() + r.terminators()
   601  		case 1: // |<noun phrase> <verb phrase> <prepositional phrase> <terminator>
   602  			return nounPhrase() + " " + verbPhrase() + " " + prepositionalPhrase() + r.terminators()
   603  		case 2: // |<noun phrase> <verb phrase> <noun phrase> <terminator>
   604  			return nounPhrase() + " " + verbPhrase() + " " + nounPhrase() + r.terminators()
   605  		case 3: // |<noun phrase> <prepositional phrase> <verb phrase> <noun phrase> <terminator>
   606  			return nounPhrase() + " " + prepositionalPhrase() + " " + verbPhrase() + " " + nounPhrase() + r.terminators()
   607  		case 4: // |<noun phrase> <prepositional phrase> <verb phrase> <prepositional phrase> <terminator>
   608  			return nounPhrase() + " " + prepositionalPhrase() + " " + verbPhrase() + " " + prepositionalPhrase() + r.terminators()
   609  		}
   610  		panic("internal error")
   611  	}
   612  
   613  	n := 0
   614  	for n < sz {
   615  		s := sentence() + " "
   616  		if _, err = w.WriteString(s); err != nil {
   617  			return err
   618  		}
   619  
   620  		n += len(s)
   621  	}
   622  	return nil
   623  }
   624  
   625  func pthForSUT(sut driver.SUT, sf int) string {
   626  	return filepath.Join("testdata", sut.Name(), "sf"+strconv.Itoa(sf))
   627  }
   628  
   629  func dbGen(sut driver.SUT, sf int) (err error) {
   630  	if pseudotext, err = os.ReadFile(filepath.Join("testdata", "pseudotext")); err != nil {
   631  		return fmt.Errorf("Run this program with -pseudotext: %v", err)
   632  	}
   633  
   634  	pth := pthForSUT(sut, sf)
   635  	if err = os.MkdirAll(pth, 0766); err != nil {
   636  		return err
   637  	}
   638  
   639  	if err = sut.SetWD(pth); err != nil {
   640  		return err
   641  	}
   642  
   643  	db, err := sut.OpenDB()
   644  	if err != nil {
   645  		return err
   646  	}
   647  
   648  	defer func() {
   649  		if cerr := db.Close(); cerr != nil && err == nil {
   650  			err = cerr
   651  		}
   652  	}()
   653  
   654  	if err = sut.CreateTables(); err != nil {
   655  		return err
   656  	}
   657  
   658  	if err = genSupplier(db, sf, sut); err != nil {
   659  		return err
   660  	}
   661  
   662  	if err = genPartAndPartSupp(db, sf, sut); err != nil {
   663  		return err
   664  	}
   665  
   666  	if err = genCustomerAndOrders(db, sf, sut); err != nil {
   667  		return err
   668  	}
   669  
   670  	if err = genNation(db, sf, sut); err != nil {
   671  		return err
   672  	}
   673  
   674  	return genRegion(db, sf, sut)
   675  }
   676  
   677  func genSupplier(db *sql.DB, sf int, sut driver.SUT) (err error) {
   678  	recs := 10000
   679  	if n := maxRecs; n >= 0 {
   680  		recs = n
   681  	}
   682  
   683  	keyrng := uniqueWithin(int64(sf) * int64(recs))
   684  	rng5 := uniqueWithin(int64(sf) * int64(recs))
   685  	sf5rows := make(map[int64]bool)
   686  	for i := 0; i < sf*5; i++ {
   687  		sf5rows[rng5.n()] = true
   688  		sf5rows[rng5.n()] = false
   689  	}
   690  	rng := newRng(0, math.MaxInt64)
   691  	tx, err := db.Begin()
   692  	if err != nil {
   693  		return err
   694  	}
   695  	stmt, err := tx.Prepare(sut.InsertSupplier())
   696  	if err != nil {
   697  		return err
   698  	}
   699  
   700  	// SF * 10,000 rows in the SUPPLIER table with:
   701  	// S_SUPPKEY unique within [SF * 10,000].
   702  	// S_NAME text appended with minimum 9 digits with leading zeros ["Supplie#r", S_SUPPKEY].
   703  	// S_ADDRESS random v-string[10,40].
   704  	// S_NATIONKEY random value [0 .. 24].
   705  	// S_PHONE generated according to Clause 4.2.2.9.
   706  	// S_ACCTBAL random value [-999.99 .. 9,999.99].
   707  	// S_COMMENT text string [25,100].
   708  	//	SF * 5 rows are randomly selected to hold at a random position
   709  	//	a string matching "Customer%Complaints". Another SF * 5 rows
   710  	//	are randomly selected to hold at a random position a string
   711  	//	matching "Customer%Recommends", where % is a wildcard that
   712  	//	denotes zero or more characters.
   713  	for i := 0; i < sf*recs; i++ {
   714  		sSuppKey := keyrng.n()
   715  		nk := int(rng.n() % 25)
   716  		sComment := rng.textString(25, 100)
   717  		if b, ok := sf5rows[sSuppKey]; ok {
   718  			s := "Complaints"
   719  			if b {
   720  				s = "Recommends"
   721  			}
   722  			s = "Customer" + rng.vString(0, 4) + s
   723  			off := int(rng.randomValue(0, int64(len(sComment)-len(s))))
   724  			sComment = sComment[:off] + s + sComment[off+len(s):]
   725  		}
   726  		if _, err := stmt.Exec(
   727  			sSuppKey,
   728  			fmt.Sprintf("Supplier#%09d", sSuppKey),
   729  			rng.vString(10, 40),
   730  			nk,
   731  			rng.phoneNumber(nk),
   732  			rng.randomValue(-99999, 999999),
   733  			sComment,
   734  		); err != nil {
   735  			return err
   736  		}
   737  	}
   738  
   739  	return tx.Commit()
   740  }
   741  
   742  var pnames1 = []string{
   743  	"almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", "blanched", "blue",
   744  	"blush", "brown", "burlywood", "burnished", "chartreuse", "chiffon", "chocolate", "coral",
   745  	"cornflower", "cornsilk", "cream", "cyan", "dark", "deep", "dim", "dodger", "drab", "firebrick",
   746  	"floral", "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew",
   747  	"hot", "indian", "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen",
   748  	"magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo",
   749  	"navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", "pink", "plum", "powder",
   750  	"puff", "purple", "red", "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell", "sienna",
   751  	"sky", "slate", "smoke", "snow", "spring", "steel", "tan", "thistle", "tomato", "turquoise", "violet",
   752  	"wheat", "white", "yellow",
   753  }
   754  
   755  func genPartAndPartSupp(db *sql.DB, sf int, sut driver.SUT) (err error) {
   756  	recs := 200000
   757  	if n := maxRecs; n >= 0 {
   758  		recs = n
   759  	}
   760  
   761  	prices = make([]int32, sf*recs)
   762  	a := make([]string, 5)
   763  	var tx *sql.Tx
   764  	var stmt, stmt2 *sql.Stmt
   765  
   766  	// SF * 200,000 rows in the PART table with:
   767  	// P_PARTKEY unique within [SF * 200,000].
   768  	// P_NAME generated by concatenating five unique randomly selected strings from the following list,
   769  	// separated by a single space:
   770  	//	{"almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", "blanched", "blue",
   771  	//	"blush", "brown", "burlywood", "burnished", "chartreuse", "chiffon", "chocolate", "coral",
   772  	//	"cornflower", "cornsilk", "cream", "cyan", "dark", "deep", "dim", "dodger", "drab", "firebrick",
   773  	//	"floral", "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew",
   774  	//	"hot", "indian", "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen",
   775  	//	"magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo",
   776  	//	"navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", "pink", "plum", "powder",
   777  	//	"puff", "purple", "red", "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell", "sienna",
   778  	//	"sky", "slate", "smoke", "snow", "spring", "steel", "tan", "thistle", "tomato", "turquoise", "violet",
   779  	//	"wheat", "white", "yellow"}.
   780  	// P_MFGR text appended with digit ["Manufacturer#",M], where M = random value [1,5].
   781  	// P_BRAND text appended with digits ["Brand#",MN], where N = random value [1,5] and M is defined
   782  	//	while generating P_MFGR.
   783  	// P_TYPE random string [Types].
   784  	// P_SIZE random value [1 .. 50].
   785  	// P_CONTAINER random string [Containers].
   786  	// P_RETAILPRICE = (90000 + ((P_PARTKEY/10) modulo 20001 ) + 100 * (P_PARTKEY modulo 1000))/100
   787  	// P_COMMENT text string [5,22].
   788  	keyrng := uniqueWithin(int64(sf) * int64(recs))
   789  	rng := newRng(0, math.MaxInt64)
   790  	for i := 0; i < sf*recs; i++ {
   791  		if i%1000 == 0 {
   792  			if i != 0 {
   793  				if err = tx.Commit(); err != nil {
   794  					return err
   795  				}
   796  			}
   797  
   798  			if tx, err = db.Begin(); err != nil {
   799  				return err
   800  			}
   801  
   802  			if stmt, err = tx.Prepare(sut.InsertPart()); err != nil {
   803  				return err
   804  			}
   805  
   806  			if stmt2, err = tx.Prepare(sut.InsertPartSupp()); err != nil {
   807  				return err
   808  			}
   809  		}
   810  		pPartKey := keyrng.n()
   811  		a = a[:0]
   812  		for len(a) < 5 {
   813  		again:
   814  			s := pnames1[rng.n()%int64(len(pnames1))]
   815  			for _, v := range a {
   816  				if s == v {
   817  					goto again
   818  				}
   819  			}
   820  
   821  			a = append(a, s)
   822  		}
   823  		m := rng.randomValue(1, 5)
   824  		pRetailPrice := 90000 + ((pPartKey / 10) % 20001) + 100*(pPartKey%1000)
   825  		prices[pPartKey-1] = int32(pRetailPrice)
   826  		if _, err := stmt.Exec(
   827  			pPartKey,
   828  			strings.Join(a, " "),
   829  			fmt.Sprintf("Manufacturer#%d", m),
   830  			fmt.Sprintf("Brand#%d%d", m, rng.randomValue(1, 5)),
   831  			rng.types(),
   832  			rng.randomValue(1, 50),
   833  			rng.containers(),
   834  			pRetailPrice,
   835  			rng.textString(5, 22),
   836  		); err != nil {
   837  			return err
   838  		}
   839  
   840  		// For each row in the PART table, four rows in PartSupp table with:
   841  		// PS_PARTKEY = P_PARTKEY.
   842  		// PS_SUPPKEY = (ps_partkey + (i * (( S/4 ) + (int)(ps_partkey-1 )/S)))) modulo S + 1
   843  		//	where i is the ith supplier within [0 .. 3] and S = SF * 10,000.
   844  		// PS_AVAILQTY random value [1 .. 9,999].
   845  		// PS_SUPPLYCOST random value [1.00 .. 1,000.00].
   846  		// PS_COMMENT text string [49,198].
   847  		s := int64(sf) * 10000
   848  		psPartKey := pPartKey
   849  		for i := 0; i < 4; i++ {
   850  			if _, err := stmt2.Exec(
   851  				psPartKey,
   852  				(psPartKey+(int64(i)*((s/4)+(psPartKey-1)/s)))%(s+1),
   853  				rng.randomValue(1, 9999),
   854  				rng.randomValue(100, 100000),
   855  				rng.textString(49, 198),
   856  			); err != nil {
   857  				return err
   858  			}
   859  		}
   860  	}
   861  
   862  	return tx.Commit()
   863  }
   864  
   865  func genCustomerAndOrders(db *sql.DB, sf int, sut driver.SUT) (err error) {
   866  	recs := 150000
   867  	if n := maxRecs; n >= 0 {
   868  		recs = n
   869  	}
   870  
   871  	var tx *sql.Tx
   872  	var stmt, stmt2, stmt3 *sql.Stmt
   873  	s := int64(sf) * 10000
   874  
   875  	minDate := StartDate.UnixNano()
   876  	maxDate := EndDate.UnixNano() - 151*24*int64(time.Hour)
   877  
   878  	// SF * 150,000 rows in CUSTOMER table with:
   879  	// C_CUSTKEY unique within [SF * 150,000].
   880  	// C_NAME text appended with minimum 9 digits with leading zeros ["Customer#", C_CUSTKEY].
   881  	// C_ADDRESS random v-string [10,40].
   882  	// C_NATIONKEY random value [0 .. 24].
   883  	// C_PHONE generated according to Clause 4.2.2.9.
   884  	// C_ACCTBAL random value [-999.99 .. 9,999.99].
   885  	// C_MKTSEGMENT random string [Segments].
   886  	// C_COMMENT text string [29,116].
   887  	keyrng := uniqueWithin(int64(sf) * int64(recs))
   888  	keyrng2 := uniqueWithin(int64(sf) * 10 * int64(recs))
   889  	rng := newRng(0, math.MaxInt64)
   890  	for i := 0; i < sf*recs; i++ {
   891  		if i%1000 == 0 {
   892  			if i != 0 {
   893  				if err = tx.Commit(); err != nil {
   894  					return err
   895  				}
   896  			}
   897  
   898  			if tx, err = db.Begin(); err != nil {
   899  				return err
   900  			}
   901  
   902  			if stmt, err = tx.Prepare(sut.InsertCustomer()); err != nil {
   903  				return err
   904  			}
   905  
   906  			if stmt2, err = tx.Prepare(sut.InsertOrders()); err != nil {
   907  				return err
   908  			}
   909  
   910  			if stmt3, err = tx.Prepare(sut.InsertLineItem()); err != nil {
   911  				return err
   912  			}
   913  		}
   914  		cCustKey := keyrng.n()
   915  		nk := rng.randomValue(0, 24)
   916  		if _, err := stmt.Exec(
   917  			cCustKey,
   918  			fmt.Sprintf("Customer#%09d", cCustKey),
   919  			rng.vString(10, 40),
   920  			nk,
   921  			rng.phoneNumber(int(nk)),
   922  			rng.randomValue(-99999, 999999),
   923  			rng.segments(),
   924  			rng.textString(29, 116),
   925  		); err != nil {
   926  			return err
   927  		}
   928  
   929  		// For each row in the CUSTOMER table, ten rows in the ORDERS
   930  		// table with: O_ORDERKEY unique within [SF * 1,500,000 * 4].
   931  		//
   932  		// Comment: The ORDERS and LINEITEM tables are sparsely
   933  		// populated by generating a key value that causes the first 8
   934  		// keys of each 32 to be populated, yielding a 25% use of the
   935  		// key range. Test sponsors must not take advantage of this
   936  		// aspect of the benchmark. For example, horizontally
   937  		// partitioning the test database onto different devices in
   938  		// order to place unused areas onto separate peripherals is
   939  		// prohibited.
   940  		//
   941  		// O_CUSTKEY = random value [1 .. (SF * 150,000)].
   942  		//	The generation of this random value must be such that
   943  		//	O_CUSTKEY modulo 3 is not zero.
   944  		//
   945  		//	Comment: Orders are not present for all customers. Every
   946  		//	third customer (in C_CUSTKEY order) is not assigned any
   947  		//	order.
   948  		//
   949  		// O_ORDERSTATUS set to the following value:
   950  		//	"F" if all lineitems of this order have L_LINESTATUS set to "F".
   951  		//	"O" if all lineitems of this order have L_LINESTATUS set to "O".
   952  		//	"P" otherwise.
   953  		// O_TOTALPRICE computed as:
   954  		//	sum (L_EXTENDEDPRICE * (1+L_TAX) * (1-L_DISCOUNT)) for all LINEITEM of this order.
   955  		// O_ORDERDATE uniformly distributed between STARTDATE and (ENDDATE - 151 days).
   956  		// O_ORDERPRIORITY random string [Priorities].
   957  		// O_CLERK text appended with minimum 9 digits with leading zeros ["Clerk#", C] where C = random value [000000001 .. (SF * 1000)].
   958  		// O_SHIPPRIORITY set to 0.
   959  		// O_COMMENT text string [19,78].
   960  
   961  		for i := 0; i < 10; i++ {
   962  			var oCustKey int64
   963  			for {
   964  				oCustKey = rng.randomValue(1, int64(sf)*int64(recs))
   965  				if oCustKey%3 != 0 {
   966  					break
   967  				}
   968  			}
   969  			oOrderKey := keyrng2.n() - 1                    // Zero base.
   970  			oOrderKey = oOrderKey/8*32 + oOrderKey%8 + 1    // 1 based, sparseness as specified above.
   971  			oOrderDate := rng.randomValue(minDate, maxDate) // unix ns
   972  			oOrderStatus := "X"
   973  			var oTotalPrice int64
   974  
   975  			{
   976  				// For each row in the ORDERS table, a random number of rows within [1 .. 7] in the LINEITEM table with:
   977  				// L_ORDERKEY = O_ORDERKEY.
   978  				// L_PARTKEY random value [1 .. (SF * 200,000)].
   979  				// L_SUPPKEY = (L_PARTKEY + (i * (( S/4 ) + (int)(L_partkey-1 )/S)))) modulo S + 1
   980  				//	where i is the corresponding supplier within [0 .. 3] and S = SF * 10,000.
   981  				// L_LINENUMBER unique within [7].
   982  				// L_QUANTITY random value [1 .. 50].
   983  				// L_EXTENDEDPRICE = L_QUANTITY * P_RETAILPRICE
   984  				//	Where P_RETAILPRICE is from the part with P_PARTKEY = L_PARTKEY.
   985  				// L_DISCOUNT random value [0.00 .. 0.10].
   986  				// L_TAX random value [0.00 .. 0.08].
   987  				// L_RETURNFLAG set to a value selected as follows:
   988  				//	If L_RECEIPTDATE <= CURRENTDATE
   989  				//	then either "R" or "A" is selected at random
   990  				//	else "N" is selected.
   991  				// L_LINESTATUS set the following value:
   992  				//	"O" if L_SHIPDATE > CURRENTDATE
   993  				//	"F" otherwise.
   994  				// L_SHIPDATE = O_ORDERDATE + random value [1 .. 121].
   995  				// L_COMMITDATE = O_ORDERDATE + random value [30 .. 90].
   996  				// L_RECEIPTDATE = L_SHIPDATE + random value [1 .. 30].
   997  				// L_SHIPINSTRUCT random string [Instructions].
   998  				// L_SHIPMODE random string [Modes].
   999  				// L_COMMENT text string [10,43].
  1000  				n := int(rng.randomValue(1, 7))
  1001  				lRng := uniqueWithin(7)
  1002  				qty := rng.randomValue(100, 5000)
  1003  				lShipDate := ns2time(oOrderDate + rng.randomValue(1, 121)*24*int64(time.Hour))
  1004  				lCommitDate := ns2time(oOrderDate + rng.randomValue(30, 90)*24*int64(time.Hour))
  1005  				lReceiptDate := ns2time(oOrderDate + rng.randomValue(1, 30)*24*int64(time.Hour))
  1006  				lReturnFlag := "X"
  1007  				switch {
  1008  				case lReceiptDate.Before(CurrentDate) || lReceiptDate.Equal(CurrentDate):
  1009  					if rng.n()&1 == 0 {
  1010  						lReturnFlag = "R"
  1011  						break
  1012  					}
  1013  
  1014  					lReturnFlag = "A"
  1015  				default:
  1016  					lReturnFlag = "N"
  1017  				}
  1018  				lLineStatus := "F"
  1019  				if lShipDate.After(CurrentDate) {
  1020  					lLineStatus = "O"
  1021  				}
  1022  				switch {
  1023  				case oOrderStatus == "X":
  1024  					oOrderStatus = lLineStatus
  1025  				case oOrderStatus != lLineStatus:
  1026  					oOrderStatus = "P"
  1027  				}
  1028  				for i := 0; i < n; i++ {
  1029  					lPartKey := rng.randomValue(1, int64(len(prices)))
  1030  					pRetailPrice := int64(prices[lPartKey-1])
  1031  					lExtendedPrice := qty * pRetailPrice / 100
  1032  					lTax := rng.randomValue(0, 8)
  1033  					lDiscount := rng.randomValue(0, 10)
  1034  					oTotalPrice += lExtendedPrice * (100 + lTax) * (100 - lDiscount) / 100 / 100
  1035  					if _, err := stmt3.Exec(
  1036  						oOrderKey,
  1037  						lPartKey,
  1038  						(lPartKey+(int64(i)*(s/4+(lPartKey-1)/s)))%(s+1),
  1039  						lRng.n(),
  1040  						qty,
  1041  						lExtendedPrice,
  1042  						lDiscount,
  1043  						lTax,
  1044  						lReturnFlag,
  1045  						lLineStatus,
  1046  						lShipDate,
  1047  						lCommitDate,
  1048  						lReceiptDate,
  1049  						rng.instructions(),
  1050  						rng.modes(),
  1051  						rng.textString(10, 43),
  1052  					); err != nil {
  1053  						return err
  1054  					}
  1055  				}
  1056  			}
  1057  
  1058  			if _, err := stmt2.Exec(
  1059  				oOrderKey,
  1060  				oCustKey,
  1061  				oOrderStatus,
  1062  				oTotalPrice,
  1063  				ns2time(oOrderDate/1e9),
  1064  				rng.priorities(),
  1065  				fmt.Sprintf("Clerk#%09d", rng.randomValue(1, int64(sf)*1000)),
  1066  				0,
  1067  				rng.textString(19, 78),
  1068  			); err != nil {
  1069  				return err
  1070  			}
  1071  		}
  1072  	}
  1073  
  1074  	return tx.Commit()
  1075  }
  1076  
  1077  var nations = []struct {
  1078  	name      string
  1079  	regionKey int
  1080  }{
  1081  	{"ALGERIA", 0},
  1082  	{"ARGENTINA", 1},
  1083  	{"BRAZIL", 1},
  1084  	{"CANADA", 1},
  1085  	{"EGYPT", 4},
  1086  	{"ETHIOPIA", 0},
  1087  	{"FRANCE", 3},
  1088  	{"GERMANY", 3},
  1089  	{"INDIA", 2},
  1090  	{"INDONESIA", 2},
  1091  	{"IRAN", 4},
  1092  	{"IRAQ", 4},
  1093  	{"JAPAN", 2},
  1094  	{"JORDAN", 4},
  1095  	{"KENYA", 0},
  1096  	{"MOROCCO", 0},
  1097  	{"MOZAMBIQUE", 0},
  1098  	{"PERU", 1},
  1099  	{"CHINA", 2},
  1100  	{"ROMANIA", 3},
  1101  	{"SAUDI ARABIA", 4},
  1102  	{"VIETNAM", 2},
  1103  	{"RUSSIA", 3},
  1104  	{"UNITED KINGDOM", 3},
  1105  	{"UNITED STATES", 1},
  1106  }
  1107  
  1108  func genNation(db *sql.DB, sf int, sut driver.SUT) (err error) {
  1109  	rng := newRng(0, math.MaxInt64)
  1110  	tx, err := db.Begin()
  1111  	if err != nil {
  1112  		return err
  1113  	}
  1114  
  1115  	stmt, err := tx.Prepare(sut.InsertNation())
  1116  	if err != nil {
  1117  		return err
  1118  	}
  1119  
  1120  	// 25 rows in the NATION table with:
  1121  	// N_NATIONKEY unique value between 0 and 24.
  1122  	// N_NAME string from the following series of (N_NATIONKEY, N_NAME, N_REGIONKEY).
  1123  	// (0, ALGERIA, 0);(1, ARGENTINA, 1);(2, BRAZIL, 1);
  1124  	// (3, CANADA, 1);(4, EGYPT, 4);(5, ETHIOPIA, 0);
  1125  	// (6, FRANCE, 3);(7, GERMANY, 3);(8, INDIA, 2);
  1126  	// (9, INDONESIA, 2);(10, IRAN, 4);(11, IRAQ, 4);
  1127  	// (12, JAPAN, 2);(13, JORDAN, 4);(14, KENYA, 0);
  1128  	// (15, MOROCCO, 0);(16, MOZAMBIQUE, 0);(17, PERU, 1);
  1129  	// (18, CHINA, 2);(19, ROMANIA, 3);(20, SAUDI ARABIA, 4);
  1130  	// (21, VIETNAM, 2);(22, RUSSIA, 3);(23, UNITED KINGDOM, 3);
  1131  	// (24, UNITED STATES, 1)
  1132  	// N_REGIONKEY is taken from the series above.
  1133  	// N_COMMENT text string [31,114].
  1134  	for i, v := range nations {
  1135  		if _, err := stmt.Exec(
  1136  			int64(i),
  1137  			v.name,
  1138  			int64(v.regionKey),
  1139  			rng.textString(31, 114),
  1140  		); err != nil {
  1141  			return err
  1142  		}
  1143  	}
  1144  	return tx.Commit()
  1145  }
  1146  
  1147  var regions1 = []string{
  1148  	"AFRICA",
  1149  	"AMERICA",
  1150  	"ASIA",
  1151  	"EUROPE",
  1152  	"MIDDLE EAST",
  1153  }
  1154  
  1155  func (r *rng) regions() string {
  1156  	return regions1[int(r.n())%len(regions1)]
  1157  }
  1158  
  1159  func genRegion(db *sql.DB, sf int, sut driver.SUT) (err error) {
  1160  	rng := newRng(0, math.MaxInt64)
  1161  	tx, err := db.Begin()
  1162  	if err != nil {
  1163  		return err
  1164  	}
  1165  
  1166  	stmt, err := tx.Prepare(sut.InsertRegion())
  1167  	if err != nil {
  1168  		return err
  1169  	}
  1170  
  1171  	// 5 rows in the REGION table with:
  1172  	// R_REGIONKEY unique value between 0 and 4.
  1173  	// R_NAME string from the following series of (R_REGIONKEY, R_NAME).
  1174  	// (0, AFRICA);(1, AMERICA);
  1175  	// (2, ASIA);
  1176  	// (3, EUROPE);(4, MIDDLE EAST)
  1177  	// R_COMMENT text string [31,115].
  1178  	for i, v := range regions1 {
  1179  		if _, err := stmt.Exec(
  1180  			int64(i),
  1181  			v,
  1182  			rng.textString(31, 115),
  1183  		); err != nil {
  1184  			return err
  1185  		}
  1186  	}
  1187  	return tx.Commit()
  1188  }