gitlab.com/CoiaPrant/sqlite3@v1.19.1/tpch/dbgen.go (about) 1 // Copyright 2032 The Sqlite Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "bufio" 9 "database/sql" 10 "fmt" 11 "math" 12 "math/big" 13 "os" 14 "path/filepath" 15 "strconv" 16 "strings" 17 "time" 18 19 "modernc.org/mathutil" 20 "gitlab.com/CoiaPrant/sqlite3/tpch/driver" 21 ) 22 23 var ( 24 // .2.2.12 All dates must be computed using the following values: 25 // 26 // STARTDATE = 1992-01-01 CURRENTDATE = 1995-06-17 ENDDATE = 1998-12-31 27 StartDate = time.Date(1992, 1, 1, 0, 0, 0, 0, time.UTC) 28 CurrentDate = time.Date(1995, 6, 17, 12, 12, 0, 0, time.UTC) 29 EndDate = time.Date(1998, 12, 31, 23, 59, 59, 999999999, time.UTC) 30 31 seed, _ = mathutil.NewFCBig(big.NewInt(0), big.NewInt(math.MaxInt64), true) 32 prices []int32 33 maxRecs = -1 34 ) 35 36 func ns2time(ns int64) time.Time { return time.Unix(ns/1e9, ns%1e9).UTC() } 37 38 // 1.3 Datatype Definitions 39 // 40 // 1.3.1 The following datatype definitions apply to the list of columns of 41 // each table: 42 // 43 // - Identifier means that the column must be able to hold any key value 44 // generated for that column and be able to support at least 2,147,483,647 45 // unique values; 46 // 47 // Comment: A common implementation of this datatype will be an integer. 48 // However, for SF greater than 300 some column values will exceed the range of 49 // integer values supported by a 4-byte integer. A test sponsor may use some 50 // other datatype such as 8-byte integer, decimal or character string to 51 // implement the identifier datatype; 52 // 53 // - Integer means that the column must be able to exactly represent integer 54 // values (i.e., values in increments of 1) in the range of at least 55 // -2,147,483,646 to 2,147,483,647. 56 // 57 // - Decimal means that the column must be able to represent values in the 58 // range -9,999,999,999.99 to +9,999,999,999.99 in increments of 0.01; the 59 // values can be either represented exactly or interpreted to be in this range; 60 // 61 // - Big Decimal is of the Decimal datatype as defined above, with the 62 // additional property that it must be large enough to represent the aggregated 63 // values stored in temporary tables created within query variants; 64 // 65 // - Fixed text, size N means that the column must be able to hold any string 66 // of characters of a fixed length of N. 67 // 68 // Comment: If the string it holds is shorter than N characters, then trailing 69 // spaces must be stored in the database or the database must automatically pad 70 // with spaces upon retrieval such that a CHAR_LENGTH() function will return N. 71 // 72 // - Variable text, size N means that the column must be able to hold any 73 // string of characters of a variable length with a maximum length of N. 74 // Columns defined as "variable text, size N" may optionally be implemented as 75 // "fixed text, size N"; 76 // 77 // - Date is a value whose external representation can be expressed as 78 // YYYY-MM-DD, where all characters are numeric. A date must be able to express 79 // any day within at least 14 consecutive years. There is no requirement 80 // specific to the internal representation of a date. 81 // 82 // Comment: The implementation datatype chosen by the test sponsor for a 83 // particular datatype definition must be applied consistently to all the 84 // instances of that datatype definition in the schema, except for identifier 85 // columns, whose datatype may be selected to satisfy database scaling 86 // requirements. 87 // 88 // 1.3.2 The symbol SF is used in this document to represent the scale factor 89 // for the database (see Clause 4: ). 90 91 type rng struct { 92 r *mathutil.FCBig 93 } 94 95 func newRng(lo, hi int64) *rng { 96 r, err := mathutil.NewFCBig(big.NewInt(lo), big.NewInt(hi), true) 97 if err != nil { 98 panic("internal error") 99 } 100 101 r.Seed(seed.Next().Int64()) 102 return &rng{r} 103 } 104 105 func (r *rng) n() int64 { 106 return r.r.Next().Int64() 107 } 108 109 // 4.2.2.2 The term unique within [x] represents any one value within a set of 110 // x values between 1 and x, unique within the scope of rows being populated. 111 func uniqueWithin(x int64) *rng { return newRng(1, x) } 112 113 // 4.2.2.3 The notation random value [x .. y] represents a random value between 114 // x and y inclusively, with a mean of (x+y)/2, and with the same number of 115 // digits of precision as shown. For example, [0.01 .. 100.00] has 10,000 116 // unique values, whereas [1..100] has only 100 unique values. 117 func (r *rng) randomValue(min, max int64) int64 { 118 return min + r.n()%(max-min+1) 119 } 120 121 // 4.2.2.7 The notation random v-string [min, max] represents a string 122 // comprised of randomly generated alphanumeric characters within a character 123 // set of at least 64 symbols. The length of the string is a random value 124 // between min and max inclusive. 125 func (r *rng) vString(min, max int) string { 126 l := min + int(r.n())%(max-min+1) 127 b := make([]byte, l) 128 for i := range b { 129 b[i] = '0' + byte(r.n()%64) 130 } 131 return string(b) 132 } 133 134 // 4.2.2.9 The term phone number represents a string of numeric characters 135 // separated by hyphens and generated as follows: 136 // 137 // Let i be an index into the list of strings Nations (i.e., ALGERIA is 0, 138 // ARGENTINA is 1, etc., see Clause 4.2.3), 139 // 140 // Let country_code be the sub-string representation of the number (i + 10), 141 // 142 // Let local_number1 be random [100 .. 999], 143 // 144 // Let local_number2 be random [100 .. 999], 145 // 146 // Let local_number3 be random [1000 .. 9999], 147 // 148 // The phone number string is obtained by concatenating the following 149 // sub-strings: 150 // 151 // country_code, "-", local_number1, "-", local_number2, "-", local_number3 152 func (r *rng) phoneNumber(i int) string { 153 return fmt.Sprintf("%v-%v-%v-%v", i+10, 100+r.n()%900, 100+r.n()%900, 1000+r.n()%9000) 154 } 155 156 // 4.2.2.10 The term text string[min, max] represents a substring of a 300 MB 157 // string populated according to the pseudo text grammar defined in Clause 158 // 4.2.2.14. The length of the substring is a random number between min and max 159 // inclusive. The substring offset is randomly chosen. 160 func (r *rng) textString(min, max int64) string { 161 off := r.n() % (int64(len(pseudotext)) - max) 162 l := min + r.n()%(max-min+1) 163 return string(pseudotext[off : off+l]) 164 } 165 166 var ( 167 types1 = []string{ 168 "STANDARD", 169 "SMALL", 170 "MEDIUM", 171 "LARGE", 172 "ECONOMY", 173 "PROMO", 174 } 175 types2 = []string{ 176 "ANODIZED", 177 "BURNISHED", 178 "PLATED", 179 "POLISHED", 180 "BRUSHED", 181 } 182 types3 = []string{ 183 "TIN", 184 "NICKEL", 185 "BRASS", 186 "STELL", 187 "COPPER", 188 } 189 ) 190 191 // 4.2.2.13 192 193 func (r *rng) types() string { 194 return types1[int(r.n())%len(types1)] + " " + types2[int(r.n())%len(types2)] + " " + types3[int(r.n())%len(types3)] 195 } 196 197 var ( 198 containers1 = []string{ 199 "SM", 200 "LG", 201 "MED", 202 "JUMBO", 203 "WRAP", 204 } 205 containers2 = []string{ 206 "CASE", 207 "BOX", 208 "BAG", 209 "JAR", 210 "PKG", 211 "PACK", 212 "CAN", 213 "DRUM", 214 } 215 ) 216 217 func (r *rng) containers() string { 218 return containers1[int(r.n())%len(containers1)] + " " + containers2[int(r.n())%len(containers2)] 219 } 220 221 var segments1 = []string{ 222 "AUTOMOBILE", 223 "BUILDING", 224 "FURNITURE", 225 "MACHINERY", 226 "HOUSEHOLD", 227 } 228 229 func (r *rng) segments() string { 230 return segments1[int(r.n())%len(segments1)] 231 } 232 233 var priorities1 = []string{ 234 "1-URGENT", 235 "2-HIGH", 236 "3-MEDIUM", 237 "4-NOT SPECIFIED", 238 "5-LOW", 239 } 240 241 func (r *rng) priorities() string { 242 return priorities1[int(r.n())%len(priorities1)] 243 } 244 245 var instructions1 = []string{ 246 "DELIVER IN PERSON", 247 "COLLECT COD", 248 "NONE", 249 "TAKE BACK RETURN", 250 } 251 252 func (r *rng) instructions() string { 253 return instructions1[int(r.n())%len(instructions1)] 254 } 255 256 var modes1 = []string{ 257 "REG AIR", 258 "AIR", 259 "RAIL", 260 "SHIP", 261 "TRUCK", 262 "MAIL", 263 "FOB", 264 } 265 266 func (r *rng) modes() string { 267 return modes1[int(r.n())%len(modes1)] 268 } 269 270 var nouns1 = []string{ 271 "foxes", 272 "ideas", 273 "theodolites", 274 "pinto", 275 "beans", 276 "instructions", 277 "dependencies", 278 "excuses", 279 "platelets", 280 "asymptotes", 281 "courts", 282 "dolphins", 283 "multipliers", 284 "sauternes", 285 "warthogs", 286 "frets", 287 "dinos", 288 "attainments", 289 "somas", 290 "Tiresias'", 291 "patterns", 292 "forges", 293 "braids", 294 "hockey", 295 "players", 296 "frays", 297 "warhorses", 298 "dugouts", 299 "notornis", 300 "epitaphs", 301 "pearls", 302 "tithes", 303 "waters", 304 "orbits", 305 "gifts", 306 "sheaves", 307 "depths", 308 "sentiments", 309 "decoys", 310 "realms", 311 "pains", 312 "grouches", 313 "escapades", 314 } 315 316 func (r *rng) nouns() string { 317 return nouns1[int(r.n())%len(nouns1)] 318 } 319 320 var verbs1 = []string{ 321 "sleep", 322 "wake", 323 "are", 324 "cajole", 325 "haggle", 326 "nag", 327 "use", 328 "boost", 329 "affix", 330 "detect", 331 "integrate", 332 "maintain", 333 "nod", 334 "was", 335 "lose", 336 "sublate", 337 "solve", 338 "thrash", 339 "promise", 340 "engage", 341 "hinder", 342 "print", 343 "x-ray", 344 "breach", 345 "eat", 346 "grow", 347 "impress", 348 "mold", 349 "poach", 350 "serve", 351 "run", 352 "dazzle", 353 "snooze", 354 "doze", 355 "unwind", 356 "kindle", 357 "play", 358 "hang", 359 "believe", 360 "doubt", 361 } 362 363 func (r *rng) verbs() string { 364 return verbs1[int(r.n())%len(verbs1)] 365 } 366 367 var adjectives1 = []string{ 368 "furious", 369 "sly", 370 "careful", 371 "blithe", 372 "quick", 373 "fluffy", 374 "slow", 375 "quiet", 376 "ruthless", 377 "thin", 378 "close", 379 "dogged", 380 "daring", 381 "brave", 382 "stealthy", 383 "permanent", 384 "enticing", 385 "idle", 386 "busy", 387 "regular", 388 "final", 389 "ironic", 390 "even", 391 "bold", 392 "silent", 393 } 394 395 func (r *rng) adjectives() string { 396 return adjectives1[int(r.n())%len(adjectives1)] 397 } 398 399 var adverbs1 = []string{ 400 "sometimes", 401 "always", 402 "never", 403 "furiously", 404 "slyly", 405 "carefully", 406 "blithely", 407 "quickly", 408 "fluffily", 409 "slowly", 410 "quietly", 411 "ruthlessly", 412 "thinly", 413 "closely", 414 "doggedly", 415 "daringly", 416 "bravely", 417 "stealthily", 418 "permanently", 419 "enticingly", 420 "idly", 421 "busily", 422 "regularly", 423 "finally", 424 "ironically", 425 "evenly", 426 "boldly", 427 "silently", 428 } 429 430 func (r *rng) adverbs() string { 431 return adverbs1[int(r.n())%len(adverbs1)] 432 } 433 434 var prepositions1 = []string{ 435 "about", 436 "above", 437 "according to", 438 "across", 439 "after", 440 "against", 441 "along", 442 "alongside of", 443 "among", 444 "around", 445 "at", 446 "atop", 447 "before", 448 "behind", 449 "beneath", 450 "beside", 451 "besides", 452 "between", 453 "beyond", 454 "by", 455 "despite", 456 "during", 457 "except", 458 "for", 459 "from", 460 "in place of", 461 "inside", 462 "instead of", 463 "into", 464 "near", 465 "of", 466 "on", 467 "outside", 468 "over", 469 "past", 470 "since", 471 "through", 472 "throughout", 473 "to", 474 "toward", 475 "under", 476 "until", 477 "up", 478 "upon", 479 "without", 480 "with", 481 "within", 482 } 483 484 func (r *rng) prepositions() string { 485 return prepositions1[int(r.n())%len(prepositions1)] 486 } 487 488 var auxiliaries1 = []string{ 489 "do", 490 "may", 491 "might", 492 "shall", 493 "will", 494 "would", 495 "can", 496 "could", 497 "should", 498 "ought to", 499 "must", 500 "will have to", 501 "shall have to", 502 "could have to", 503 "should have to", 504 "must have to", 505 "need to", 506 "try to", 507 } 508 509 func (r *rng) auxiliaries() string { 510 return auxiliaries1[int(r.n())%len(auxiliaries1)] 511 } 512 513 var terminators1 = []string{ 514 ".", 515 ";", 516 "!", 517 ":", 518 "?", 519 "--", 520 } 521 522 func (r *rng) terminators() string { 523 return terminators1[int(r.n())%len(terminators1)] 524 } 525 526 var pseudotext []byte 527 528 func genPseudotext() (err error) { 529 pth := filepath.Join("testdata", "pseudotext") 530 if _, err = os.Stat(pth); err == nil { 531 return fmt.Errorf("file already exists: %s", pth) 532 } 533 534 if !os.IsNotExist(err) { 535 return err 536 } 537 538 if err = os.MkdirAll("testdata", 0766); err != nil { 539 return err 540 } 541 542 f, err := os.Create(pth) 543 if err != nil { 544 return err 545 } 546 547 defer func() { 548 if cerr := f.Close(); cerr != nil && err == nil { 549 err = cerr 550 } 551 }() 552 553 w := bufio.NewWriter(f) 554 555 defer func() { 556 if ferr := w.Flush(); ferr != nil && err == nil { 557 err = ferr 558 } 559 }() 560 561 const sz = 300 * 1e6 562 r := newRng(0, math.MaxInt64) 563 564 nounPhrase := func() string { 565 switch r.n() % 4 { 566 case 0: // noun phrase:<noun> 567 return r.nouns() 568 case 1: // |<adjective> <noun> 569 return r.adjectives() + " " + r.nouns() 570 case 2: // |<adjective>, <adjective> <noun> 571 return r.adjectives() + ", " + r.adjectives() + " " + r.nouns() 572 case 3: // |<adverb> <adjective> <noun> 573 return r.adverbs() + " " + r.adjectives() + " " + r.nouns() 574 } 575 panic("internal error") 576 } 577 578 verbPhrase := func() string { 579 switch r.n() % 4 { 580 case 0: // verb phrase:<verb> 581 return r.verbs() 582 case 1: // |<auxiliary> <verb> 583 return r.auxiliaries() + " " + r.verbs() 584 case 2: // |<verb> <adverb> 585 return r.verbs() + " " + r.adverbs() 586 case 3: // |<auxiliary> <verb> <adverb> 587 return r.auxiliaries() + " " + r.verbs() + " " + r.adverbs() 588 } 589 panic("internal error") 590 } 591 592 prepositionalPhrase := func() string { 593 // prepositional phrase: <preposition> the <noun phrase> 594 return r.prepositions() + " the " + nounPhrase() 595 } 596 597 sentence := func() string { 598 switch r.n() % 5 { 599 case 0: // sentence:<noun phrase> <verb phrase> <terminator> 600 return nounPhrase() + " " + verbPhrase() + r.terminators() 601 case 1: // |<noun phrase> <verb phrase> <prepositional phrase> <terminator> 602 return nounPhrase() + " " + verbPhrase() + " " + prepositionalPhrase() + r.terminators() 603 case 2: // |<noun phrase> <verb phrase> <noun phrase> <terminator> 604 return nounPhrase() + " " + verbPhrase() + " " + nounPhrase() + r.terminators() 605 case 3: // |<noun phrase> <prepositional phrase> <verb phrase> <noun phrase> <terminator> 606 return nounPhrase() + " " + prepositionalPhrase() + " " + verbPhrase() + " " + nounPhrase() + r.terminators() 607 case 4: // |<noun phrase> <prepositional phrase> <verb phrase> <prepositional phrase> <terminator> 608 return nounPhrase() + " " + prepositionalPhrase() + " " + verbPhrase() + " " + prepositionalPhrase() + r.terminators() 609 } 610 panic("internal error") 611 } 612 613 n := 0 614 for n < sz { 615 s := sentence() + " " 616 if _, err = w.WriteString(s); err != nil { 617 return err 618 } 619 620 n += len(s) 621 } 622 return nil 623 } 624 625 func pthForSUT(sut driver.SUT, sf int) string { 626 return filepath.Join("testdata", sut.Name(), "sf"+strconv.Itoa(sf)) 627 } 628 629 func dbGen(sut driver.SUT, sf int) (err error) { 630 if pseudotext, err = os.ReadFile(filepath.Join("testdata", "pseudotext")); err != nil { 631 return fmt.Errorf("Run this program with -pseudotext: %v", err) 632 } 633 634 pth := pthForSUT(sut, sf) 635 if err = os.MkdirAll(pth, 0766); err != nil { 636 return err 637 } 638 639 if err = sut.SetWD(pth); err != nil { 640 return err 641 } 642 643 db, err := sut.OpenDB() 644 if err != nil { 645 return err 646 } 647 648 defer func() { 649 if cerr := db.Close(); cerr != nil && err == nil { 650 err = cerr 651 } 652 }() 653 654 if err = sut.CreateTables(); err != nil { 655 return err 656 } 657 658 if err = genSupplier(db, sf, sut); err != nil { 659 return err 660 } 661 662 if err = genPartAndPartSupp(db, sf, sut); err != nil { 663 return err 664 } 665 666 if err = genCustomerAndOrders(db, sf, sut); err != nil { 667 return err 668 } 669 670 if err = genNation(db, sf, sut); err != nil { 671 return err 672 } 673 674 return genRegion(db, sf, sut) 675 } 676 677 func genSupplier(db *sql.DB, sf int, sut driver.SUT) (err error) { 678 recs := 10000 679 if n := maxRecs; n >= 0 { 680 recs = n 681 } 682 683 keyrng := uniqueWithin(int64(sf) * int64(recs)) 684 rng5 := uniqueWithin(int64(sf) * int64(recs)) 685 sf5rows := make(map[int64]bool) 686 for i := 0; i < sf*5; i++ { 687 sf5rows[rng5.n()] = true 688 sf5rows[rng5.n()] = false 689 } 690 rng := newRng(0, math.MaxInt64) 691 tx, err := db.Begin() 692 if err != nil { 693 return err 694 } 695 stmt, err := tx.Prepare(sut.InsertSupplier()) 696 if err != nil { 697 return err 698 } 699 700 // SF * 10,000 rows in the SUPPLIER table with: 701 // S_SUPPKEY unique within [SF * 10,000]. 702 // S_NAME text appended with minimum 9 digits with leading zeros ["Supplie#r", S_SUPPKEY]. 703 // S_ADDRESS random v-string[10,40]. 704 // S_NATIONKEY random value [0 .. 24]. 705 // S_PHONE generated according to Clause 4.2.2.9. 706 // S_ACCTBAL random value [-999.99 .. 9,999.99]. 707 // S_COMMENT text string [25,100]. 708 // SF * 5 rows are randomly selected to hold at a random position 709 // a string matching "Customer%Complaints". Another SF * 5 rows 710 // are randomly selected to hold at a random position a string 711 // matching "Customer%Recommends", where % is a wildcard that 712 // denotes zero or more characters. 713 for i := 0; i < sf*recs; i++ { 714 sSuppKey := keyrng.n() 715 nk := int(rng.n() % 25) 716 sComment := rng.textString(25, 100) 717 if b, ok := sf5rows[sSuppKey]; ok { 718 s := "Complaints" 719 if b { 720 s = "Recommends" 721 } 722 s = "Customer" + rng.vString(0, 4) + s 723 off := int(rng.randomValue(0, int64(len(sComment)-len(s)))) 724 sComment = sComment[:off] + s + sComment[off+len(s):] 725 } 726 if _, err := stmt.Exec( 727 sSuppKey, 728 fmt.Sprintf("Supplier#%09d", sSuppKey), 729 rng.vString(10, 40), 730 nk, 731 rng.phoneNumber(nk), 732 rng.randomValue(-99999, 999999), 733 sComment, 734 ); err != nil { 735 return err 736 } 737 } 738 739 return tx.Commit() 740 } 741 742 var pnames1 = []string{ 743 "almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", "blanched", "blue", 744 "blush", "brown", "burlywood", "burnished", "chartreuse", "chiffon", "chocolate", "coral", 745 "cornflower", "cornsilk", "cream", "cyan", "dark", "deep", "dim", "dodger", "drab", "firebrick", 746 "floral", "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew", 747 "hot", "indian", "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen", 748 "magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo", 749 "navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", "pink", "plum", "powder", 750 "puff", "purple", "red", "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell", "sienna", 751 "sky", "slate", "smoke", "snow", "spring", "steel", "tan", "thistle", "tomato", "turquoise", "violet", 752 "wheat", "white", "yellow", 753 } 754 755 func genPartAndPartSupp(db *sql.DB, sf int, sut driver.SUT) (err error) { 756 recs := 200000 757 if n := maxRecs; n >= 0 { 758 recs = n 759 } 760 761 prices = make([]int32, sf*recs) 762 a := make([]string, 5) 763 var tx *sql.Tx 764 var stmt, stmt2 *sql.Stmt 765 766 // SF * 200,000 rows in the PART table with: 767 // P_PARTKEY unique within [SF * 200,000]. 768 // P_NAME generated by concatenating five unique randomly selected strings from the following list, 769 // separated by a single space: 770 // {"almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", "blanched", "blue", 771 // "blush", "brown", "burlywood", "burnished", "chartreuse", "chiffon", "chocolate", "coral", 772 // "cornflower", "cornsilk", "cream", "cyan", "dark", "deep", "dim", "dodger", "drab", "firebrick", 773 // "floral", "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", "honeydew", 774 // "hot", "indian", "ivory", "khaki", "lace", "lavender", "lawn", "lemon", "light", "lime", "linen", 775 // "magenta", "maroon", "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo", 776 // "navy", "olive", "orange", "orchid", "pale", "papaya", "peach", "peru", "pink", "plum", "powder", 777 // "puff", "purple", "red", "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell", "sienna", 778 // "sky", "slate", "smoke", "snow", "spring", "steel", "tan", "thistle", "tomato", "turquoise", "violet", 779 // "wheat", "white", "yellow"}. 780 // P_MFGR text appended with digit ["Manufacturer#",M], where M = random value [1,5]. 781 // P_BRAND text appended with digits ["Brand#",MN], where N = random value [1,5] and M is defined 782 // while generating P_MFGR. 783 // P_TYPE random string [Types]. 784 // P_SIZE random value [1 .. 50]. 785 // P_CONTAINER random string [Containers]. 786 // P_RETAILPRICE = (90000 + ((P_PARTKEY/10) modulo 20001 ) + 100 * (P_PARTKEY modulo 1000))/100 787 // P_COMMENT text string [5,22]. 788 keyrng := uniqueWithin(int64(sf) * int64(recs)) 789 rng := newRng(0, math.MaxInt64) 790 for i := 0; i < sf*recs; i++ { 791 if i%1000 == 0 { 792 if i != 0 { 793 if err = tx.Commit(); err != nil { 794 return err 795 } 796 } 797 798 if tx, err = db.Begin(); err != nil { 799 return err 800 } 801 802 if stmt, err = tx.Prepare(sut.InsertPart()); err != nil { 803 return err 804 } 805 806 if stmt2, err = tx.Prepare(sut.InsertPartSupp()); err != nil { 807 return err 808 } 809 } 810 pPartKey := keyrng.n() 811 a = a[:0] 812 for len(a) < 5 { 813 again: 814 s := pnames1[rng.n()%int64(len(pnames1))] 815 for _, v := range a { 816 if s == v { 817 goto again 818 } 819 } 820 821 a = append(a, s) 822 } 823 m := rng.randomValue(1, 5) 824 pRetailPrice := 90000 + ((pPartKey / 10) % 20001) + 100*(pPartKey%1000) 825 prices[pPartKey-1] = int32(pRetailPrice) 826 if _, err := stmt.Exec( 827 pPartKey, 828 strings.Join(a, " "), 829 fmt.Sprintf("Manufacturer#%d", m), 830 fmt.Sprintf("Brand#%d%d", m, rng.randomValue(1, 5)), 831 rng.types(), 832 rng.randomValue(1, 50), 833 rng.containers(), 834 pRetailPrice, 835 rng.textString(5, 22), 836 ); err != nil { 837 return err 838 } 839 840 // For each row in the PART table, four rows in PartSupp table with: 841 // PS_PARTKEY = P_PARTKEY. 842 // PS_SUPPKEY = (ps_partkey + (i * (( S/4 ) + (int)(ps_partkey-1 )/S)))) modulo S + 1 843 // where i is the ith supplier within [0 .. 3] and S = SF * 10,000. 844 // PS_AVAILQTY random value [1 .. 9,999]. 845 // PS_SUPPLYCOST random value [1.00 .. 1,000.00]. 846 // PS_COMMENT text string [49,198]. 847 s := int64(sf) * 10000 848 psPartKey := pPartKey 849 for i := 0; i < 4; i++ { 850 if _, err := stmt2.Exec( 851 psPartKey, 852 (psPartKey+(int64(i)*((s/4)+(psPartKey-1)/s)))%(s+1), 853 rng.randomValue(1, 9999), 854 rng.randomValue(100, 100000), 855 rng.textString(49, 198), 856 ); err != nil { 857 return err 858 } 859 } 860 } 861 862 return tx.Commit() 863 } 864 865 func genCustomerAndOrders(db *sql.DB, sf int, sut driver.SUT) (err error) { 866 recs := 150000 867 if n := maxRecs; n >= 0 { 868 recs = n 869 } 870 871 var tx *sql.Tx 872 var stmt, stmt2, stmt3 *sql.Stmt 873 s := int64(sf) * 10000 874 875 minDate := StartDate.UnixNano() 876 maxDate := EndDate.UnixNano() - 151*24*int64(time.Hour) 877 878 // SF * 150,000 rows in CUSTOMER table with: 879 // C_CUSTKEY unique within [SF * 150,000]. 880 // C_NAME text appended with minimum 9 digits with leading zeros ["Customer#", C_CUSTKEY]. 881 // C_ADDRESS random v-string [10,40]. 882 // C_NATIONKEY random value [0 .. 24]. 883 // C_PHONE generated according to Clause 4.2.2.9. 884 // C_ACCTBAL random value [-999.99 .. 9,999.99]. 885 // C_MKTSEGMENT random string [Segments]. 886 // C_COMMENT text string [29,116]. 887 keyrng := uniqueWithin(int64(sf) * int64(recs)) 888 keyrng2 := uniqueWithin(int64(sf) * 10 * int64(recs)) 889 rng := newRng(0, math.MaxInt64) 890 for i := 0; i < sf*recs; i++ { 891 if i%1000 == 0 { 892 if i != 0 { 893 if err = tx.Commit(); err != nil { 894 return err 895 } 896 } 897 898 if tx, err = db.Begin(); err != nil { 899 return err 900 } 901 902 if stmt, err = tx.Prepare(sut.InsertCustomer()); err != nil { 903 return err 904 } 905 906 if stmt2, err = tx.Prepare(sut.InsertOrders()); err != nil { 907 return err 908 } 909 910 if stmt3, err = tx.Prepare(sut.InsertLineItem()); err != nil { 911 return err 912 } 913 } 914 cCustKey := keyrng.n() 915 nk := rng.randomValue(0, 24) 916 if _, err := stmt.Exec( 917 cCustKey, 918 fmt.Sprintf("Customer#%09d", cCustKey), 919 rng.vString(10, 40), 920 nk, 921 rng.phoneNumber(int(nk)), 922 rng.randomValue(-99999, 999999), 923 rng.segments(), 924 rng.textString(29, 116), 925 ); err != nil { 926 return err 927 } 928 929 // For each row in the CUSTOMER table, ten rows in the ORDERS 930 // table with: O_ORDERKEY unique within [SF * 1,500,000 * 4]. 931 // 932 // Comment: The ORDERS and LINEITEM tables are sparsely 933 // populated by generating a key value that causes the first 8 934 // keys of each 32 to be populated, yielding a 25% use of the 935 // key range. Test sponsors must not take advantage of this 936 // aspect of the benchmark. For example, horizontally 937 // partitioning the test database onto different devices in 938 // order to place unused areas onto separate peripherals is 939 // prohibited. 940 // 941 // O_CUSTKEY = random value [1 .. (SF * 150,000)]. 942 // The generation of this random value must be such that 943 // O_CUSTKEY modulo 3 is not zero. 944 // 945 // Comment: Orders are not present for all customers. Every 946 // third customer (in C_CUSTKEY order) is not assigned any 947 // order. 948 // 949 // O_ORDERSTATUS set to the following value: 950 // "F" if all lineitems of this order have L_LINESTATUS set to "F". 951 // "O" if all lineitems of this order have L_LINESTATUS set to "O". 952 // "P" otherwise. 953 // O_TOTALPRICE computed as: 954 // sum (L_EXTENDEDPRICE * (1+L_TAX) * (1-L_DISCOUNT)) for all LINEITEM of this order. 955 // O_ORDERDATE uniformly distributed between STARTDATE and (ENDDATE - 151 days). 956 // O_ORDERPRIORITY random string [Priorities]. 957 // O_CLERK text appended with minimum 9 digits with leading zeros ["Clerk#", C] where C = random value [000000001 .. (SF * 1000)]. 958 // O_SHIPPRIORITY set to 0. 959 // O_COMMENT text string [19,78]. 960 961 for i := 0; i < 10; i++ { 962 var oCustKey int64 963 for { 964 oCustKey = rng.randomValue(1, int64(sf)*int64(recs)) 965 if oCustKey%3 != 0 { 966 break 967 } 968 } 969 oOrderKey := keyrng2.n() - 1 // Zero base. 970 oOrderKey = oOrderKey/8*32 + oOrderKey%8 + 1 // 1 based, sparseness as specified above. 971 oOrderDate := rng.randomValue(minDate, maxDate) // unix ns 972 oOrderStatus := "X" 973 var oTotalPrice int64 974 975 { 976 // For each row in the ORDERS table, a random number of rows within [1 .. 7] in the LINEITEM table with: 977 // L_ORDERKEY = O_ORDERKEY. 978 // L_PARTKEY random value [1 .. (SF * 200,000)]. 979 // L_SUPPKEY = (L_PARTKEY + (i * (( S/4 ) + (int)(L_partkey-1 )/S)))) modulo S + 1 980 // where i is the corresponding supplier within [0 .. 3] and S = SF * 10,000. 981 // L_LINENUMBER unique within [7]. 982 // L_QUANTITY random value [1 .. 50]. 983 // L_EXTENDEDPRICE = L_QUANTITY * P_RETAILPRICE 984 // Where P_RETAILPRICE is from the part with P_PARTKEY = L_PARTKEY. 985 // L_DISCOUNT random value [0.00 .. 0.10]. 986 // L_TAX random value [0.00 .. 0.08]. 987 // L_RETURNFLAG set to a value selected as follows: 988 // If L_RECEIPTDATE <= CURRENTDATE 989 // then either "R" or "A" is selected at random 990 // else "N" is selected. 991 // L_LINESTATUS set the following value: 992 // "O" if L_SHIPDATE > CURRENTDATE 993 // "F" otherwise. 994 // L_SHIPDATE = O_ORDERDATE + random value [1 .. 121]. 995 // L_COMMITDATE = O_ORDERDATE + random value [30 .. 90]. 996 // L_RECEIPTDATE = L_SHIPDATE + random value [1 .. 30]. 997 // L_SHIPINSTRUCT random string [Instructions]. 998 // L_SHIPMODE random string [Modes]. 999 // L_COMMENT text string [10,43]. 1000 n := int(rng.randomValue(1, 7)) 1001 lRng := uniqueWithin(7) 1002 qty := rng.randomValue(100, 5000) 1003 lShipDate := ns2time(oOrderDate + rng.randomValue(1, 121)*24*int64(time.Hour)) 1004 lCommitDate := ns2time(oOrderDate + rng.randomValue(30, 90)*24*int64(time.Hour)) 1005 lReceiptDate := ns2time(oOrderDate + rng.randomValue(1, 30)*24*int64(time.Hour)) 1006 lReturnFlag := "X" 1007 switch { 1008 case lReceiptDate.Before(CurrentDate) || lReceiptDate.Equal(CurrentDate): 1009 if rng.n()&1 == 0 { 1010 lReturnFlag = "R" 1011 break 1012 } 1013 1014 lReturnFlag = "A" 1015 default: 1016 lReturnFlag = "N" 1017 } 1018 lLineStatus := "F" 1019 if lShipDate.After(CurrentDate) { 1020 lLineStatus = "O" 1021 } 1022 switch { 1023 case oOrderStatus == "X": 1024 oOrderStatus = lLineStatus 1025 case oOrderStatus != lLineStatus: 1026 oOrderStatus = "P" 1027 } 1028 for i := 0; i < n; i++ { 1029 lPartKey := rng.randomValue(1, int64(len(prices))) 1030 pRetailPrice := int64(prices[lPartKey-1]) 1031 lExtendedPrice := qty * pRetailPrice / 100 1032 lTax := rng.randomValue(0, 8) 1033 lDiscount := rng.randomValue(0, 10) 1034 oTotalPrice += lExtendedPrice * (100 + lTax) * (100 - lDiscount) / 100 / 100 1035 if _, err := stmt3.Exec( 1036 oOrderKey, 1037 lPartKey, 1038 (lPartKey+(int64(i)*(s/4+(lPartKey-1)/s)))%(s+1), 1039 lRng.n(), 1040 qty, 1041 lExtendedPrice, 1042 lDiscount, 1043 lTax, 1044 lReturnFlag, 1045 lLineStatus, 1046 lShipDate, 1047 lCommitDate, 1048 lReceiptDate, 1049 rng.instructions(), 1050 rng.modes(), 1051 rng.textString(10, 43), 1052 ); err != nil { 1053 return err 1054 } 1055 } 1056 } 1057 1058 if _, err := stmt2.Exec( 1059 oOrderKey, 1060 oCustKey, 1061 oOrderStatus, 1062 oTotalPrice, 1063 ns2time(oOrderDate/1e9), 1064 rng.priorities(), 1065 fmt.Sprintf("Clerk#%09d", rng.randomValue(1, int64(sf)*1000)), 1066 0, 1067 rng.textString(19, 78), 1068 ); err != nil { 1069 return err 1070 } 1071 } 1072 } 1073 1074 return tx.Commit() 1075 } 1076 1077 var nations = []struct { 1078 name string 1079 regionKey int 1080 }{ 1081 {"ALGERIA", 0}, 1082 {"ARGENTINA", 1}, 1083 {"BRAZIL", 1}, 1084 {"CANADA", 1}, 1085 {"EGYPT", 4}, 1086 {"ETHIOPIA", 0}, 1087 {"FRANCE", 3}, 1088 {"GERMANY", 3}, 1089 {"INDIA", 2}, 1090 {"INDONESIA", 2}, 1091 {"IRAN", 4}, 1092 {"IRAQ", 4}, 1093 {"JAPAN", 2}, 1094 {"JORDAN", 4}, 1095 {"KENYA", 0}, 1096 {"MOROCCO", 0}, 1097 {"MOZAMBIQUE", 0}, 1098 {"PERU", 1}, 1099 {"CHINA", 2}, 1100 {"ROMANIA", 3}, 1101 {"SAUDI ARABIA", 4}, 1102 {"VIETNAM", 2}, 1103 {"RUSSIA", 3}, 1104 {"UNITED KINGDOM", 3}, 1105 {"UNITED STATES", 1}, 1106 } 1107 1108 func genNation(db *sql.DB, sf int, sut driver.SUT) (err error) { 1109 rng := newRng(0, math.MaxInt64) 1110 tx, err := db.Begin() 1111 if err != nil { 1112 return err 1113 } 1114 1115 stmt, err := tx.Prepare(sut.InsertNation()) 1116 if err != nil { 1117 return err 1118 } 1119 1120 // 25 rows in the NATION table with: 1121 // N_NATIONKEY unique value between 0 and 24. 1122 // N_NAME string from the following series of (N_NATIONKEY, N_NAME, N_REGIONKEY). 1123 // (0, ALGERIA, 0);(1, ARGENTINA, 1);(2, BRAZIL, 1); 1124 // (3, CANADA, 1);(4, EGYPT, 4);(5, ETHIOPIA, 0); 1125 // (6, FRANCE, 3);(7, GERMANY, 3);(8, INDIA, 2); 1126 // (9, INDONESIA, 2);(10, IRAN, 4);(11, IRAQ, 4); 1127 // (12, JAPAN, 2);(13, JORDAN, 4);(14, KENYA, 0); 1128 // (15, MOROCCO, 0);(16, MOZAMBIQUE, 0);(17, PERU, 1); 1129 // (18, CHINA, 2);(19, ROMANIA, 3);(20, SAUDI ARABIA, 4); 1130 // (21, VIETNAM, 2);(22, RUSSIA, 3);(23, UNITED KINGDOM, 3); 1131 // (24, UNITED STATES, 1) 1132 // N_REGIONKEY is taken from the series above. 1133 // N_COMMENT text string [31,114]. 1134 for i, v := range nations { 1135 if _, err := stmt.Exec( 1136 int64(i), 1137 v.name, 1138 int64(v.regionKey), 1139 rng.textString(31, 114), 1140 ); err != nil { 1141 return err 1142 } 1143 } 1144 return tx.Commit() 1145 } 1146 1147 var regions1 = []string{ 1148 "AFRICA", 1149 "AMERICA", 1150 "ASIA", 1151 "EUROPE", 1152 "MIDDLE EAST", 1153 } 1154 1155 func (r *rng) regions() string { 1156 return regions1[int(r.n())%len(regions1)] 1157 } 1158 1159 func genRegion(db *sql.DB, sf int, sut driver.SUT) (err error) { 1160 rng := newRng(0, math.MaxInt64) 1161 tx, err := db.Begin() 1162 if err != nil { 1163 return err 1164 } 1165 1166 stmt, err := tx.Prepare(sut.InsertRegion()) 1167 if err != nil { 1168 return err 1169 } 1170 1171 // 5 rows in the REGION table with: 1172 // R_REGIONKEY unique value between 0 and 4. 1173 // R_NAME string from the following series of (R_REGIONKEY, R_NAME). 1174 // (0, AFRICA);(1, AMERICA); 1175 // (2, ASIA); 1176 // (3, EUROPE);(4, MIDDLE EAST) 1177 // R_COMMENT text string [31,115]. 1178 for i, v := range regions1 { 1179 if _, err := stmt.Exec( 1180 int64(i), 1181 v, 1182 rng.textString(31, 115), 1183 ); err != nil { 1184 return err 1185 } 1186 } 1187 return tx.Commit() 1188 }