vitess.io/vitess@v0.16.2/go/vt/vttest/randomdata.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vttest
    18  
    19  import (
    20  	"fmt"
    21  	"math/rand"
    22  	"strings"
    23  )
    24  
    25  // FieldGenerator is a callback that generates the value of a random field in
    26  // when seeding the database with random data. `name` is the name of the column
    27  // where the field belongs, `t` is its SQL tyoe, and `rng` is the RNG currently
    28  // being used, as to ensure reproducible generation between runs.
    29  // A FieldGenerator must return the raw SQL data for the field, ready to be
    30  // placed into a SQL statement. The returned value will _NOT_ be escaped.
    31  type FieldGenerator func(name, t string, rng *rand.Rand) (string, error)
    32  
    33  // SeedConfig are the settings to enable the initialization of the
    34  // local cluster with random data. This struct must be set in Config
    35  // before Setup() is called.
    36  type SeedConfig struct {
    37  	// RngSeed is the seed uset to initialize the random number
    38  	// generator that will be used to fill the database with
    39  	// random data. Multiple runs with the same seed will result
    40  	// in the same initial data
    41  	RngSeed int
    42  
    43  	// MinSize is the minimum number of initial rows in each tale shard
    44  	MinSize int
    45  
    46  	// MaxSize is the maximum number of initial rows in each table shard
    47  	MaxSize int
    48  
    49  	// NullProbability is the chance to initialize a field a NULL value.
    50  	// Only applies to fields that can contain NULL values
    51  	NullProbability float64
    52  
    53  	// RandomField is a callback to generate the value of a random field
    54  	RandomField FieldGenerator
    55  }
    56  
    57  // SeedConfigDefaults returns the default values for SeedConfig
    58  func SeedConfigDefaults() *SeedConfig {
    59  	return &SeedConfig{
    60  		RngSeed:         rand.Int(),
    61  		MinSize:         1000,
    62  		MaxSize:         10000,
    63  		NullProbability: 0.1,
    64  	}
    65  }
    66  
    67  const batchInsertSize = 1000
    68  
    69  func (db *LocalCluster) batchInsert(dbname, table string, fields []string, rows [][]string) error {
    70  	var (
    71  		fieldNames = strings.Join(fields, ",")
    72  		values     []string
    73  		sql        string
    74  	)
    75  
    76  	for _, row := range rows {
    77  		values = append(values, "("+strings.Join(row, ",")+")")
    78  	}
    79  
    80  	sql = fmt.Sprintf("INSERT IGNORE INTO %s (%s) VALUES %s",
    81  		table, fieldNames, strings.Join(values, ","),
    82  	)
    83  
    84  	return db.Execute([]string{sql}, dbname)
    85  }
    86  
    87  func (db *LocalCluster) randomField(name, t string, allowNull bool, rng *rand.Rand) (string, error) {
    88  	if allowNull && rng.Float64() < db.Seed.NullProbability {
    89  		return "NULL", nil
    90  	}
    91  	return db.Seed.RandomField(name, t, rng)
    92  }
    93  
    94  func (db *LocalCluster) populateTable(dbname, table string, rng *rand.Rand) error {
    95  	fieldInfo, err := db.Query(fmt.Sprintf("DESCRIBE %s", table), dbname, 1024)
    96  	if err != nil {
    97  		return err
    98  	}
    99  
   100  	var (
   101  		minRows    = db.Seed.MinSize
   102  		maxRows    = db.Seed.MaxSize
   103  		numRows    = rng.Intn(maxRows-minRows) + minRows
   104  		rows       [][]string
   105  		fieldNames []string
   106  	)
   107  
   108  	for i := 0; i < numRows; i++ {
   109  		var fields []string
   110  		for _, row := range fieldInfo.Rows {
   111  			fieldName := row[0].ToString()
   112  			fieldType := row[1].ToString()
   113  			allowNull := row[2].ToString() == "YES"
   114  
   115  			f, err := db.randomField(fieldName, fieldType, allowNull, rng)
   116  			if err != nil {
   117  				return err
   118  			}
   119  			fields = append(fields, f)
   120  		}
   121  		rows = append(rows, fields)
   122  	}
   123  
   124  	for _, row := range fieldInfo.Rows {
   125  		fieldNames = append(fieldNames, row[0].ToString())
   126  	}
   127  
   128  	for i := 0; i < len(rows); i += batchInsertSize {
   129  		if err := db.batchInsert(dbname, table, fieldNames, rows); err != nil {
   130  			return err
   131  		}
   132  	}
   133  
   134  	return nil
   135  }
   136  
   137  func (db *LocalCluster) populateShard(dbname string, rng *rand.Rand) error {
   138  	q, err := db.Query("SHOW TABLES", dbname, 1024)
   139  	if err != nil {
   140  		return err
   141  	}
   142  
   143  	for _, row := range q.Rows {
   144  		if err := db.populateTable(dbname, row[0].ToString(), rng); err != nil {
   145  			return err
   146  		}
   147  	}
   148  	return nil
   149  }
   150  
   151  func (db *LocalCluster) populateWithRandomData() error {
   152  	rng := rand.New(rand.NewSource(int64(db.Seed.RngSeed)))
   153  	for _, kpb := range db.Topology.Keyspaces {
   154  		if kpb.ServedFrom != "" {
   155  			continue
   156  		}
   157  		for _, dbname := range db.shardNames(kpb) {
   158  			if err := db.populateShard(dbname, rng); err != nil {
   159  				return err
   160  			}
   161  		}
   162  	}
   163  	return nil
   164  }