vitess.io/vitess@v0.16.2/go/test/stress/stress.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package stress
    18  
    19  import (
    20  	"fmt"
    21  	"math/rand"
    22  	"sync"
    23  	"sync/atomic"
    24  	"testing"
    25  	"time"
    26  
    27  	"vitess.io/vitess/go/mysql"
    28  )
    29  
    30  const (
    31  	// Template used to create new table in the database.
    32  	// TODO: have dynamic schemas
    33  	templateNewTable = `create table %s (
    34  	id bigint,
    35  	val varchar(64),
    36  	primary key (id)
    37  ) Engine=InnoDB
    38  `
    39  )
    40  
    41  type (
    42  	table struct {
    43  		name         string
    44  		rows, nextID int
    45  		mu           sync.Mutex
    46  	}
    47  
    48  	// Stresser is responsible for stressing a Vitess cluster based on a given Config.
    49  	// Stressing a Vitess cluster is achieved by spawning several clients that continuously
    50  	// send queries to the cluster.
    51  	//
    52  	// The Stresser uses SELECT, INSERT and DELETE statements to stress the cluster. Queries
    53  	// are made against tables that are generated when calling Stresser.Start().
    54  	// For each query, we keep its status (failed or succeeded) and at the end of the stress,
    55  	// when calling Stresser.Stop() or Stresser.StopAfter(), we assert that all queries have
    56  	// succeeded, otherwise the Stresser will fail the test.
    57  	//
    58  	// This behavior can be changed by the use of Stresser.AllowFailure(bool) and the AllowFailure
    59  	// field of Config.
    60  	//
    61  	// Below is an a sample usage of the Stresser:
    62  	//
    63  	//		// copy the DefaultConfig and set your own mysql.ConnParams
    64  	//		cfg := stress.DefaultConfig
    65  	//		cfg.ConnParams = &mysql.ConnParams{Port: 8888, Host: "localhost", DbName: "ks"}
    66  	//		s := stress.New(t, cfg).Start()
    67  	//
    68  	//		// your end to end test here
    69  	//
    70  	//		s.Stop() // stop the Stresser and assert its results
    71  	//
    72  	Stresser struct {
    73  		cfg      Config
    74  		doneCh   chan result
    75  		tbls     []*table
    76  		duration time.Duration
    77  		start    time.Time
    78  		t        *testing.T
    79  		finish   uint32
    80  		cfgMu    sync.Mutex
    81  	}
    82  
    83  	// Config contains all of the Stresser configuration.
    84  	Config struct {
    85  		// MaximumDuration during which each client can stress the cluster.
    86  		MaximumDuration time.Duration
    87  
    88  		// MinimumDuration during which each client must stress the cluster.
    89  		MinimumDuration time.Duration
    90  
    91  		// PrintErrLogs enables or disables the rendering of MySQL error logs.
    92  		PrintErrLogs bool
    93  
    94  		// PrintLogs enables or disables the rendering of Stresser logs.
    95  		PrintLogs bool
    96  
    97  		// NumberOfTables to create in the cluster.
    98  		NumberOfTables int
    99  
   100  		// TableNamePrefix defines which prefix will be used for name of the auto-generated tables.
   101  		TableNamePrefix string
   102  
   103  		// InsertInterval defines at which interval each insert queries should be sent.
   104  		InsertInterval time.Duration
   105  
   106  		// DeleteInterval defines at which interval each delete queries should be sent.
   107  		DeleteInterval time.Duration
   108  
   109  		// SelectInterval defines at which interval each select queries should be sent.
   110  		SelectInterval time.Duration
   111  
   112  		// SelectLimit defines the maximum number of row select queries can query at once.
   113  		SelectLimit int
   114  
   115  		// ConnParams is the mysql.ConnParams that should be use to create new clients.
   116  		ConnParams *mysql.ConnParams
   117  
   118  		// MaxClient is the maximum number of concurrent client stressing the cluster.
   119  		MaxClient int
   120  
   121  		// AllowFailure determines whether failing queries are allowed or not.
   122  		// All queries that fail while this setting is set to true will not be counted
   123  		// by Stresser.Stop's assertion.
   124  		AllowFailure bool
   125  	}
   126  )
   127  
   128  // DefaultConfig is the default configuration used by the stresser.
   129  var DefaultConfig = Config{
   130  	MaximumDuration: 120 * time.Second,
   131  	MinimumDuration: 1 * time.Second,
   132  	PrintErrLogs:    false,
   133  	PrintLogs:       false,
   134  	NumberOfTables:  100,
   135  	TableNamePrefix: "stress_t",
   136  	InsertInterval:  10 * time.Microsecond,
   137  	DeleteInterval:  15 * time.Microsecond,
   138  	SelectInterval:  2 * time.Microsecond,
   139  	SelectLimit:     500,
   140  	MaxClient:       10,
   141  	AllowFailure:    false,
   142  }
   143  
   144  // AllowFailure will set the AllowFailure setting to the given value.
   145  // Allowing failure means that all incoming queries that fail will be
   146  // counted in result's QPS and total queries, however they will not
   147  // be marked as "meaningful failure". Meaningful failures represent the
   148  // failures that must fail the current test.
   149  func (s *Stresser) AllowFailure(allow bool) {
   150  	s.cfgMu.Lock()
   151  	defer s.cfgMu.Unlock()
   152  	s.cfg.AllowFailure = allow
   153  }
   154  
   155  // New creates a new Stresser based on the given Config.
   156  func New(t *testing.T, cfg Config) *Stresser {
   157  	return &Stresser{
   158  		cfg:    cfg,
   159  		doneCh: make(chan result),
   160  		t:      t,
   161  	}
   162  }
   163  
   164  // Stop the Stresser immediately once Config.MinimumDuration is reached.
   165  // To override Config.MinimumDuration, one can call Stresser.StopAfter with
   166  // a value of 0.
   167  // Once the Stresser has stopped, the function asserts that all results are
   168  // successful, and then prints them to the standard output.
   169  func (s *Stresser) Stop() {
   170  	if time.Since(s.start) > s.cfg.MinimumDuration {
   171  		s.StopAfter(0)
   172  	} else {
   173  		s.StopAfter(s.cfg.MinimumDuration - time.Since(s.start))
   174  	}
   175  }
   176  
   177  // StopAfter stops the Stresser after the given duration. The function will then
   178  // assert that all the results are successful, and finally prints them to the standard
   179  // output.
   180  func (s *Stresser) StopAfter(after time.Duration) {
   181  	if s.start.IsZero() {
   182  		s.t.Log("Load testing was not started.")
   183  		return
   184  	}
   185  	timeoutCh := time.After(after)
   186  	select {
   187  	case res := <-s.doneCh:
   188  		if s.cfg.PrintLogs {
   189  			res.print(s.t.Logf, s.duration.Seconds())
   190  		}
   191  		if !res.assert() {
   192  			s.t.Errorf("Requires no failed queries")
   193  		}
   194  	case <-timeoutCh:
   195  		atomic.StoreUint32(&s.finish, 1)
   196  		res := <-s.doneCh
   197  		if s.cfg.PrintLogs {
   198  			res.print(s.t.Logf, s.duration.Seconds())
   199  		}
   200  		if !res.assert() {
   201  			s.t.Errorf("Requires no failed queries")
   202  		}
   203  	}
   204  }
   205  
   206  // SetConn allows us to change the mysql.ConnParams of a Stresser at runtime.
   207  // Setting a new mysql.ConnParams will automatically create new MySQL client using
   208  // the new configuration.
   209  func (s *Stresser) SetConn(conn *mysql.ConnParams) *Stresser {
   210  	s.cfgMu.Lock()
   211  	defer s.cfgMu.Unlock()
   212  	s.cfg.ConnParams = conn
   213  	return s
   214  }
   215  
   216  // Start stressing the Vitess cluster.
   217  // This method will start by creating the MySQL tables in the Vitess cluster based
   218  // on the maximum number of table set through Config.NumberOfTables.
   219  // The method will then start a goroutine that will spawn one or more clients.
   220  // These clients will be responsible for stressing the cluster until Config.MaximumDuration
   221  // is reached, or until Stresser.Stop() or Stresser.StopAfter() are called.
   222  //
   223  // This method returns a pointer to its Stresser to allow chained function call, like:
   224  //
   225  //	s := stress.New(t, cfg).Start()
   226  //	s.Stop()
   227  func (s *Stresser) Start() *Stresser {
   228  	if s.cfg.PrintLogs {
   229  		s.t.Log("Starting load testing ...")
   230  	}
   231  	s.tbls = s.createTables(s.cfg.NumberOfTables)
   232  	s.start = time.Now()
   233  	go s.startClients()
   234  	return s
   235  }
   236  
   237  func generateNewTables(prefix string, nb int) []*table {
   238  	tbls := make([]*table, 0, nb)
   239  	for i := 0; i < nb; i++ {
   240  		tbls = append(tbls, &table{
   241  			name: fmt.Sprintf("%s%d", prefix, i),
   242  		})
   243  	}
   244  	return tbls
   245  }
   246  
   247  func (s *Stresser) createTables(nb int) []*table {
   248  	conn := newClient(s.t, s.cfg.ConnParams)
   249  	defer conn.Close()
   250  
   251  	tbls := generateNewTables(s.cfg.TableNamePrefix, nb)
   252  	for _, tbl := range tbls {
   253  		s.exec(conn, fmt.Sprintf(templateNewTable, tbl.name))
   254  	}
   255  	return tbls
   256  }
   257  
   258  // startClients is responsible for concurrently starting all the clients,
   259  // fetching their results, and computing a single final result which is
   260  // then publish in Stresser.doneCh.
   261  func (s *Stresser) startClients() {
   262  	maxClient := s.cfg.MaxClient
   263  	resultCh := make(chan result, maxClient)
   264  
   265  	// Start the concurrent clients.
   266  	for i := 0; i < maxClient; i++ {
   267  		go s.startStressClient(resultCh)
   268  	}
   269  
   270  	// Wait for the different clients to publish their results.
   271  	perClientResults := make([]result, 0, maxClient)
   272  	for i := 0; i < maxClient; i++ {
   273  		newResult := <-resultCh
   274  		perClientResults = append(perClientResults, newResult)
   275  	}
   276  
   277  	// Calculate how long it took for all the client to finish stressing
   278  	// the cluster.
   279  	s.duration = time.Since(s.start)
   280  
   281  	// Based on all the clients' results, compute a single result.
   282  	var finalResult result
   283  	for _, r := range perClientResults {
   284  		finalResult.inserts = sumQueryCounts(finalResult.inserts, r.inserts)
   285  		finalResult.selects = sumQueryCounts(finalResult.selects, r.selects)
   286  		finalResult.deletes = sumQueryCounts(finalResult.deletes, r.deletes)
   287  	}
   288  	s.doneCh <- finalResult
   289  }
   290  
   291  // startStressClient creates a client that will stress the cluster.
   292  // This function is supposed to be called as many times as we want
   293  // to have concurrent clients stressing the cluster.
   294  // Once the client is done stressing the cluster, results are published
   295  // in the given chan result.
   296  func (s *Stresser) startStressClient(resultCh chan result) {
   297  	s.cfgMu.Lock()
   298  	connParams := s.cfg.ConnParams
   299  	s.cfgMu.Unlock()
   300  
   301  	conn := newClient(s.t, connParams)
   302  	defer conn.Close()
   303  
   304  	var res result
   305  
   306  	// Create a timeout based on the Stresser maximum duration and the time
   307  	// that has already elapsed since the Stresser was started.
   308  	timeout := time.After(s.cfg.MaximumDuration - time.Since(s.start))
   309  
   310  outer:
   311  	for !s.finished() {
   312  
   313  		// Update the connection parameters is Stresser has new ones, and
   314  		// create a new client using the new parameters.
   315  		// This allows us to change the target (server we are stressing) at
   316  		// runtime without having to create a new Stresser.
   317  		s.cfgMu.Lock()
   318  		if connParams != s.cfg.ConnParams {
   319  			connParams = s.cfg.ConnParams
   320  			conn.Close()
   321  			conn = newClient(s.t, connParams)
   322  		}
   323  		s.cfgMu.Unlock()
   324  
   325  		select {
   326  		case <-timeout: // Case where the Stresser has reached its maximum duration
   327  			break outer
   328  		case <-time.After(s.cfg.DeleteInterval):
   329  			s.deleteFromRandomTable(conn, &res)
   330  		case <-time.After(s.cfg.InsertInterval):
   331  			s.insertToRandomTable(conn, &res)
   332  		case <-time.After(s.cfg.SelectInterval):
   333  			s.selectFromRandomTable(conn, &res)
   334  		}
   335  	}
   336  	resultCh <- res
   337  }
   338  
   339  func (s *Stresser) finished() bool {
   340  	return atomic.LoadUint32(&s.finish) == 1
   341  }
   342  
   343  // deleteFromRandomTable will delete the last row of a random table.
   344  // If the random table contains no row, the query will not be sent.
   345  func (s *Stresser) deleteFromRandomTable(conn *mysql.Conn, r *result) {
   346  	tblI := rand.Int() % len(s.tbls)
   347  	s.tbls[tblI].mu.Lock()
   348  	defer s.tbls[tblI].mu.Unlock()
   349  
   350  	// no row to delete
   351  	if s.tbls[tblI].rows == 0 {
   352  		return
   353  	}
   354  
   355  	query := fmt.Sprintf("delete from %s where id = %d", s.tbls[tblI].name, s.tbls[tblI].nextID-1)
   356  	if s.exec(conn, query) != nil {
   357  		s.tbls[tblI].nextID--
   358  		s.tbls[tblI].rows--
   359  		r.deletes.success++
   360  	} else {
   361  		r.deletes.failure++
   362  		s.cfgMu.Lock()
   363  		if !s.cfg.AllowFailure {
   364  			r.deletes.meaningfulFailure++
   365  		}
   366  		s.cfgMu.Unlock()
   367  	}
   368  }
   369  
   370  // insertToRandomTable inserts a new row into a random table.
   371  func (s *Stresser) insertToRandomTable(conn *mysql.Conn, r *result) {
   372  	tblI := rand.Int() % len(s.tbls)
   373  	s.tbls[tblI].mu.Lock()
   374  	defer s.tbls[tblI].mu.Unlock()
   375  
   376  	query := fmt.Sprintf("insert into %s(id, val) values(%d, 'name')", s.tbls[tblI].name, s.tbls[tblI].nextID)
   377  	if s.exec(conn, query) != nil {
   378  		s.tbls[tblI].nextID++
   379  		s.tbls[tblI].rows++
   380  		r.inserts.success++
   381  	} else {
   382  		r.inserts.failure++
   383  		s.cfgMu.Lock()
   384  		if !s.cfg.AllowFailure {
   385  			r.inserts.meaningfulFailure++
   386  		}
   387  		s.cfgMu.Unlock()
   388  	}
   389  }
   390  
   391  // selectFromRandomTable selects all the rows (up to Config.SelectLimit) of a
   392  // random table. If the table contains no row, the query will not be sent.
   393  func (s *Stresser) selectFromRandomTable(conn *mysql.Conn, r *result) {
   394  	tblI := rand.Int() % len(s.tbls)
   395  	s.tbls[tblI].mu.Lock()
   396  	defer s.tbls[tblI].mu.Unlock()
   397  
   398  	// no row to select
   399  	if s.tbls[tblI].rows == 0 {
   400  		return
   401  	}
   402  
   403  	query := fmt.Sprintf("select * from %s limit %d", s.tbls[tblI].name, s.cfg.SelectLimit)
   404  	expLength := s.tbls[tblI].rows
   405  	if expLength > s.cfg.SelectLimit {
   406  		expLength = s.cfg.SelectLimit
   407  	}
   408  	if s.assertLength(conn, query, expLength) {
   409  		r.selects.success++
   410  	} else {
   411  		r.selects.failure++
   412  		s.cfgMu.Lock()
   413  		if !s.cfg.AllowFailure {
   414  			r.selects.meaningfulFailure++
   415  		}
   416  		s.cfgMu.Unlock()
   417  	}
   418  }