vitess.io/vitess@v0.16.2/go/test/stress/stress.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package stress 18 19 import ( 20 "fmt" 21 "math/rand" 22 "sync" 23 "sync/atomic" 24 "testing" 25 "time" 26 27 "vitess.io/vitess/go/mysql" 28 ) 29 30 const ( 31 // Template used to create new table in the database. 32 // TODO: have dynamic schemas 33 templateNewTable = `create table %s ( 34 id bigint, 35 val varchar(64), 36 primary key (id) 37 ) Engine=InnoDB 38 ` 39 ) 40 41 type ( 42 table struct { 43 name string 44 rows, nextID int 45 mu sync.Mutex 46 } 47 48 // Stresser is responsible for stressing a Vitess cluster based on a given Config. 49 // Stressing a Vitess cluster is achieved by spawning several clients that continuously 50 // send queries to the cluster. 51 // 52 // The Stresser uses SELECT, INSERT and DELETE statements to stress the cluster. Queries 53 // are made against tables that are generated when calling Stresser.Start(). 54 // For each query, we keep its status (failed or succeeded) and at the end of the stress, 55 // when calling Stresser.Stop() or Stresser.StopAfter(), we assert that all queries have 56 // succeeded, otherwise the Stresser will fail the test. 57 // 58 // This behavior can be changed by the use of Stresser.AllowFailure(bool) and the AllowFailure 59 // field of Config. 60 // 61 // Below is an a sample usage of the Stresser: 62 // 63 // // copy the DefaultConfig and set your own mysql.ConnParams 64 // cfg := stress.DefaultConfig 65 // cfg.ConnParams = &mysql.ConnParams{Port: 8888, Host: "localhost", DbName: "ks"} 66 // s := stress.New(t, cfg).Start() 67 // 68 // // your end to end test here 69 // 70 // s.Stop() // stop the Stresser and assert its results 71 // 72 Stresser struct { 73 cfg Config 74 doneCh chan result 75 tbls []*table 76 duration time.Duration 77 start time.Time 78 t *testing.T 79 finish uint32 80 cfgMu sync.Mutex 81 } 82 83 // Config contains all of the Stresser configuration. 84 Config struct { 85 // MaximumDuration during which each client can stress the cluster. 86 MaximumDuration time.Duration 87 88 // MinimumDuration during which each client must stress the cluster. 89 MinimumDuration time.Duration 90 91 // PrintErrLogs enables or disables the rendering of MySQL error logs. 92 PrintErrLogs bool 93 94 // PrintLogs enables or disables the rendering of Stresser logs. 95 PrintLogs bool 96 97 // NumberOfTables to create in the cluster. 98 NumberOfTables int 99 100 // TableNamePrefix defines which prefix will be used for name of the auto-generated tables. 101 TableNamePrefix string 102 103 // InsertInterval defines at which interval each insert queries should be sent. 104 InsertInterval time.Duration 105 106 // DeleteInterval defines at which interval each delete queries should be sent. 107 DeleteInterval time.Duration 108 109 // SelectInterval defines at which interval each select queries should be sent. 110 SelectInterval time.Duration 111 112 // SelectLimit defines the maximum number of row select queries can query at once. 113 SelectLimit int 114 115 // ConnParams is the mysql.ConnParams that should be use to create new clients. 116 ConnParams *mysql.ConnParams 117 118 // MaxClient is the maximum number of concurrent client stressing the cluster. 119 MaxClient int 120 121 // AllowFailure determines whether failing queries are allowed or not. 122 // All queries that fail while this setting is set to true will not be counted 123 // by Stresser.Stop's assertion. 124 AllowFailure bool 125 } 126 ) 127 128 // DefaultConfig is the default configuration used by the stresser. 129 var DefaultConfig = Config{ 130 MaximumDuration: 120 * time.Second, 131 MinimumDuration: 1 * time.Second, 132 PrintErrLogs: false, 133 PrintLogs: false, 134 NumberOfTables: 100, 135 TableNamePrefix: "stress_t", 136 InsertInterval: 10 * time.Microsecond, 137 DeleteInterval: 15 * time.Microsecond, 138 SelectInterval: 2 * time.Microsecond, 139 SelectLimit: 500, 140 MaxClient: 10, 141 AllowFailure: false, 142 } 143 144 // AllowFailure will set the AllowFailure setting to the given value. 145 // Allowing failure means that all incoming queries that fail will be 146 // counted in result's QPS and total queries, however they will not 147 // be marked as "meaningful failure". Meaningful failures represent the 148 // failures that must fail the current test. 149 func (s *Stresser) AllowFailure(allow bool) { 150 s.cfgMu.Lock() 151 defer s.cfgMu.Unlock() 152 s.cfg.AllowFailure = allow 153 } 154 155 // New creates a new Stresser based on the given Config. 156 func New(t *testing.T, cfg Config) *Stresser { 157 return &Stresser{ 158 cfg: cfg, 159 doneCh: make(chan result), 160 t: t, 161 } 162 } 163 164 // Stop the Stresser immediately once Config.MinimumDuration is reached. 165 // To override Config.MinimumDuration, one can call Stresser.StopAfter with 166 // a value of 0. 167 // Once the Stresser has stopped, the function asserts that all results are 168 // successful, and then prints them to the standard output. 169 func (s *Stresser) Stop() { 170 if time.Since(s.start) > s.cfg.MinimumDuration { 171 s.StopAfter(0) 172 } else { 173 s.StopAfter(s.cfg.MinimumDuration - time.Since(s.start)) 174 } 175 } 176 177 // StopAfter stops the Stresser after the given duration. The function will then 178 // assert that all the results are successful, and finally prints them to the standard 179 // output. 180 func (s *Stresser) StopAfter(after time.Duration) { 181 if s.start.IsZero() { 182 s.t.Log("Load testing was not started.") 183 return 184 } 185 timeoutCh := time.After(after) 186 select { 187 case res := <-s.doneCh: 188 if s.cfg.PrintLogs { 189 res.print(s.t.Logf, s.duration.Seconds()) 190 } 191 if !res.assert() { 192 s.t.Errorf("Requires no failed queries") 193 } 194 case <-timeoutCh: 195 atomic.StoreUint32(&s.finish, 1) 196 res := <-s.doneCh 197 if s.cfg.PrintLogs { 198 res.print(s.t.Logf, s.duration.Seconds()) 199 } 200 if !res.assert() { 201 s.t.Errorf("Requires no failed queries") 202 } 203 } 204 } 205 206 // SetConn allows us to change the mysql.ConnParams of a Stresser at runtime. 207 // Setting a new mysql.ConnParams will automatically create new MySQL client using 208 // the new configuration. 209 func (s *Stresser) SetConn(conn *mysql.ConnParams) *Stresser { 210 s.cfgMu.Lock() 211 defer s.cfgMu.Unlock() 212 s.cfg.ConnParams = conn 213 return s 214 } 215 216 // Start stressing the Vitess cluster. 217 // This method will start by creating the MySQL tables in the Vitess cluster based 218 // on the maximum number of table set through Config.NumberOfTables. 219 // The method will then start a goroutine that will spawn one or more clients. 220 // These clients will be responsible for stressing the cluster until Config.MaximumDuration 221 // is reached, or until Stresser.Stop() or Stresser.StopAfter() are called. 222 // 223 // This method returns a pointer to its Stresser to allow chained function call, like: 224 // 225 // s := stress.New(t, cfg).Start() 226 // s.Stop() 227 func (s *Stresser) Start() *Stresser { 228 if s.cfg.PrintLogs { 229 s.t.Log("Starting load testing ...") 230 } 231 s.tbls = s.createTables(s.cfg.NumberOfTables) 232 s.start = time.Now() 233 go s.startClients() 234 return s 235 } 236 237 func generateNewTables(prefix string, nb int) []*table { 238 tbls := make([]*table, 0, nb) 239 for i := 0; i < nb; i++ { 240 tbls = append(tbls, &table{ 241 name: fmt.Sprintf("%s%d", prefix, i), 242 }) 243 } 244 return tbls 245 } 246 247 func (s *Stresser) createTables(nb int) []*table { 248 conn := newClient(s.t, s.cfg.ConnParams) 249 defer conn.Close() 250 251 tbls := generateNewTables(s.cfg.TableNamePrefix, nb) 252 for _, tbl := range tbls { 253 s.exec(conn, fmt.Sprintf(templateNewTable, tbl.name)) 254 } 255 return tbls 256 } 257 258 // startClients is responsible for concurrently starting all the clients, 259 // fetching their results, and computing a single final result which is 260 // then publish in Stresser.doneCh. 261 func (s *Stresser) startClients() { 262 maxClient := s.cfg.MaxClient 263 resultCh := make(chan result, maxClient) 264 265 // Start the concurrent clients. 266 for i := 0; i < maxClient; i++ { 267 go s.startStressClient(resultCh) 268 } 269 270 // Wait for the different clients to publish their results. 271 perClientResults := make([]result, 0, maxClient) 272 for i := 0; i < maxClient; i++ { 273 newResult := <-resultCh 274 perClientResults = append(perClientResults, newResult) 275 } 276 277 // Calculate how long it took for all the client to finish stressing 278 // the cluster. 279 s.duration = time.Since(s.start) 280 281 // Based on all the clients' results, compute a single result. 282 var finalResult result 283 for _, r := range perClientResults { 284 finalResult.inserts = sumQueryCounts(finalResult.inserts, r.inserts) 285 finalResult.selects = sumQueryCounts(finalResult.selects, r.selects) 286 finalResult.deletes = sumQueryCounts(finalResult.deletes, r.deletes) 287 } 288 s.doneCh <- finalResult 289 } 290 291 // startStressClient creates a client that will stress the cluster. 292 // This function is supposed to be called as many times as we want 293 // to have concurrent clients stressing the cluster. 294 // Once the client is done stressing the cluster, results are published 295 // in the given chan result. 296 func (s *Stresser) startStressClient(resultCh chan result) { 297 s.cfgMu.Lock() 298 connParams := s.cfg.ConnParams 299 s.cfgMu.Unlock() 300 301 conn := newClient(s.t, connParams) 302 defer conn.Close() 303 304 var res result 305 306 // Create a timeout based on the Stresser maximum duration and the time 307 // that has already elapsed since the Stresser was started. 308 timeout := time.After(s.cfg.MaximumDuration - time.Since(s.start)) 309 310 outer: 311 for !s.finished() { 312 313 // Update the connection parameters is Stresser has new ones, and 314 // create a new client using the new parameters. 315 // This allows us to change the target (server we are stressing) at 316 // runtime without having to create a new Stresser. 317 s.cfgMu.Lock() 318 if connParams != s.cfg.ConnParams { 319 connParams = s.cfg.ConnParams 320 conn.Close() 321 conn = newClient(s.t, connParams) 322 } 323 s.cfgMu.Unlock() 324 325 select { 326 case <-timeout: // Case where the Stresser has reached its maximum duration 327 break outer 328 case <-time.After(s.cfg.DeleteInterval): 329 s.deleteFromRandomTable(conn, &res) 330 case <-time.After(s.cfg.InsertInterval): 331 s.insertToRandomTable(conn, &res) 332 case <-time.After(s.cfg.SelectInterval): 333 s.selectFromRandomTable(conn, &res) 334 } 335 } 336 resultCh <- res 337 } 338 339 func (s *Stresser) finished() bool { 340 return atomic.LoadUint32(&s.finish) == 1 341 } 342 343 // deleteFromRandomTable will delete the last row of a random table. 344 // If the random table contains no row, the query will not be sent. 345 func (s *Stresser) deleteFromRandomTable(conn *mysql.Conn, r *result) { 346 tblI := rand.Int() % len(s.tbls) 347 s.tbls[tblI].mu.Lock() 348 defer s.tbls[tblI].mu.Unlock() 349 350 // no row to delete 351 if s.tbls[tblI].rows == 0 { 352 return 353 } 354 355 query := fmt.Sprintf("delete from %s where id = %d", s.tbls[tblI].name, s.tbls[tblI].nextID-1) 356 if s.exec(conn, query) != nil { 357 s.tbls[tblI].nextID-- 358 s.tbls[tblI].rows-- 359 r.deletes.success++ 360 } else { 361 r.deletes.failure++ 362 s.cfgMu.Lock() 363 if !s.cfg.AllowFailure { 364 r.deletes.meaningfulFailure++ 365 } 366 s.cfgMu.Unlock() 367 } 368 } 369 370 // insertToRandomTable inserts a new row into a random table. 371 func (s *Stresser) insertToRandomTable(conn *mysql.Conn, r *result) { 372 tblI := rand.Int() % len(s.tbls) 373 s.tbls[tblI].mu.Lock() 374 defer s.tbls[tblI].mu.Unlock() 375 376 query := fmt.Sprintf("insert into %s(id, val) values(%d, 'name')", s.tbls[tblI].name, s.tbls[tblI].nextID) 377 if s.exec(conn, query) != nil { 378 s.tbls[tblI].nextID++ 379 s.tbls[tblI].rows++ 380 r.inserts.success++ 381 } else { 382 r.inserts.failure++ 383 s.cfgMu.Lock() 384 if !s.cfg.AllowFailure { 385 r.inserts.meaningfulFailure++ 386 } 387 s.cfgMu.Unlock() 388 } 389 } 390 391 // selectFromRandomTable selects all the rows (up to Config.SelectLimit) of a 392 // random table. If the table contains no row, the query will not be sent. 393 func (s *Stresser) selectFromRandomTable(conn *mysql.Conn, r *result) { 394 tblI := rand.Int() % len(s.tbls) 395 s.tbls[tblI].mu.Lock() 396 defer s.tbls[tblI].mu.Unlock() 397 398 // no row to select 399 if s.tbls[tblI].rows == 0 { 400 return 401 } 402 403 query := fmt.Sprintf("select * from %s limit %d", s.tbls[tblI].name, s.cfg.SelectLimit) 404 expLength := s.tbls[tblI].rows 405 if expLength > s.cfg.SelectLimit { 406 expLength = s.cfg.SelectLimit 407 } 408 if s.assertLength(conn, query, expLength) { 409 r.selects.success++ 410 } else { 411 r.selects.failure++ 412 s.cfgMu.Lock() 413 if !s.cfg.AllowFailure { 414 r.selects.meaningfulFailure++ 415 } 416 s.cfgMu.Unlock() 417 } 418 }