vitess.io/vitess@v0.16.2/go/test/endtoend/vtorc/utils/utils.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package utils 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "os/exec" 24 "path" 25 "strings" 26 "testing" 27 "time" 28 29 "github.com/stretchr/testify/assert" 30 "github.com/stretchr/testify/require" 31 32 "vitess.io/vitess/go/json2" 33 "vitess.io/vitess/go/mysql" 34 "vitess.io/vitess/go/sqltypes" 35 "vitess.io/vitess/go/test/endtoend/cluster" 36 "vitess.io/vitess/go/vt/log" 37 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 38 "vitess.io/vitess/go/vt/topo" 39 "vitess.io/vitess/go/vt/topo/topoproto" 40 41 // Register topo implementations. 42 _ "vitess.io/vitess/go/vt/topo/consultopo" 43 _ "vitess.io/vitess/go/vt/topo/etcd2topo" 44 _ "vitess.io/vitess/go/vt/topo/k8stopo" 45 _ "vitess.io/vitess/go/vt/topo/zk2topo" 46 ) 47 48 const ( 49 keyspaceName = "ks" 50 shardName = "0" 51 Hostname = "localhost" 52 Cell1 = "zone1" 53 Cell2 = "zone2" 54 ) 55 56 // CellInfo stores the information regarding 1 cell including the tablets it contains 57 type CellInfo struct { 58 CellName string 59 ReplicaTablets []*cluster.Vttablet 60 RdonlyTablets []*cluster.Vttablet 61 // constants that should be set in TestMain 62 NumReplicas int 63 NumRdonly int 64 UIDBase int 65 } 66 67 // VTOrcClusterInfo stores the information for a cluster. This is supposed to be used only for VTOrc tests. 68 type VTOrcClusterInfo struct { 69 ClusterInstance *cluster.LocalProcessCluster 70 Ts *topo.Server 71 CellInfos []*CellInfo 72 VtctldClientProcess *cluster.VtctldClientProcess 73 lastUsedValue int 74 } 75 76 // CreateClusterAndStartTopo starts the cluster and topology service 77 func CreateClusterAndStartTopo(cellInfos []*CellInfo) (*VTOrcClusterInfo, error) { 78 clusterInstance := cluster.NewCluster(Cell1, Hostname) 79 80 // Start topo server 81 err := clusterInstance.StartTopo() 82 if err != nil { 83 return nil, err 84 } 85 86 // Adding another cell in the same cluster 87 err = clusterInstance.TopoProcess.ManageTopoDir("mkdir", "/vitess/"+Cell2) 88 if err != nil { 89 return nil, err 90 } 91 err = clusterInstance.VtctlProcess.AddCellInfo(Cell2) 92 if err != nil { 93 return nil, err 94 } 95 96 // create the vttablets 97 err = createVttablets(clusterInstance, cellInfos) 98 if err != nil { 99 return nil, err 100 } 101 102 // store the vtctldclient process 103 vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", clusterInstance.VtctldProcess.GrpcPort, clusterInstance.TmpDirectory) 104 105 // create topo server connection 106 ts, err := topo.OpenServer(*clusterInstance.TopoFlavorString(), clusterInstance.VtctlProcess.TopoGlobalAddress, clusterInstance.VtctlProcess.TopoGlobalRoot) 107 return &VTOrcClusterInfo{ 108 ClusterInstance: clusterInstance, 109 Ts: ts, 110 CellInfos: cellInfos, 111 lastUsedValue: 100, 112 VtctldClientProcess: vtctldClientProcess, 113 }, err 114 } 115 116 // createVttablets is used to create the vttablets for all the tests 117 func createVttablets(clusterInstance *cluster.LocalProcessCluster, cellInfos []*CellInfo) error { 118 keyspace := &cluster.Keyspace{Name: keyspaceName} 119 shard0 := &cluster.Shard{Name: shardName} 120 121 // creating tablets by hand instead of using StartKeyspace because we don't want to call InitShardPrimary 122 var tablets []*cluster.Vttablet 123 for _, cellInfo := range cellInfos { 124 for i := 0; i < cellInfo.NumReplicas; i++ { 125 vttabletInstance := clusterInstance.NewVttabletInstance("replica", cellInfo.UIDBase, cellInfo.CellName) 126 cellInfo.UIDBase++ 127 tablets = append(tablets, vttabletInstance) 128 cellInfo.ReplicaTablets = append(cellInfo.ReplicaTablets, vttabletInstance) 129 } 130 for i := 0; i < cellInfo.NumRdonly; i++ { 131 vttabletInstance := clusterInstance.NewVttabletInstance("rdonly", cellInfo.UIDBase, cellInfo.CellName) 132 cellInfo.UIDBase++ 133 tablets = append(tablets, vttabletInstance) 134 cellInfo.RdonlyTablets = append(cellInfo.RdonlyTablets, vttabletInstance) 135 } 136 } 137 clusterInstance.VtTabletExtraArgs = []string{ 138 "--lock_tables_timeout", "5s", 139 "--disable_active_reparents", 140 } 141 // Initialize Cluster 142 shard0.Vttablets = tablets 143 err := clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard0}) 144 if err != nil { 145 return err 146 } 147 //Start MySql 148 var mysqlCtlProcessList []*exec.Cmd 149 for _, tablet := range shard0.Vttablets { 150 log.Infof("Starting MySql for tablet %v", tablet.Alias) 151 proc, err := tablet.MysqlctlProcess.StartProcess() 152 if err != nil { 153 return err 154 } 155 mysqlCtlProcessList = append(mysqlCtlProcessList, proc) 156 } 157 // Wait for mysql processes to start 158 for _, proc := range mysqlCtlProcessList { 159 err := proc.Wait() 160 if err != nil { 161 return err 162 } 163 } 164 for _, tablet := range shard0.Vttablets { 165 // Reset status, don't wait for the tablet status. We will check it later 166 tablet.VttabletProcess.ServingStatus = "" 167 // Start the tablet 168 err := tablet.VttabletProcess.Setup() 169 if err != nil { 170 return err 171 } 172 } 173 for _, tablet := range shard0.Vttablets { 174 err := tablet.VttabletProcess.WaitForTabletStatuses([]string{"SERVING", "NOT_SERVING"}) 175 if err != nil { 176 return err 177 } 178 } 179 180 // we also need to wait for the tablet type to change from restore to replica, before we delete a tablet from the topology 181 // otherwise it will notice that their is no record for the tablet in the topology when it tries to update its state and shutdown itself! 182 for _, tablet := range shard0.Vttablets { 183 err := tablet.VttabletProcess.WaitForTabletTypes([]string{"replica", "rdonly"}) 184 if err != nil { 185 return err 186 } 187 } 188 189 return nil 190 } 191 192 // shutdownVttablets shuts down all the vttablets and removes them from the topology 193 func shutdownVttablets(clusterInfo *VTOrcClusterInfo) error { 194 // reset the shard primary 195 err := resetShardPrimary(clusterInfo.Ts) 196 if err != nil { 197 return err 198 } 199 200 for _, vttablet := range clusterInfo.ClusterInstance.Keyspaces[0].Shards[0].Vttablets { 201 // we need to stop a vttablet only if it is not shutdown 202 if !vttablet.VttabletProcess.IsShutdown() { 203 // Stop the vttablets 204 err := vttablet.VttabletProcess.TearDown() 205 if err != nil { 206 return err 207 } 208 // Remove the tablet record for this tablet 209 } 210 err = clusterInfo.ClusterInstance.VtctlclientProcess.ExecuteCommand("DeleteTablet", vttablet.Alias) 211 if err != nil { 212 return err 213 } 214 } 215 clusterInfo.ClusterInstance.Keyspaces[0].Shards[0].Vttablets = nil 216 return nil 217 } 218 219 // resetShardPrimary resets the shard's primary 220 func resetShardPrimary(ts *topo.Server) (err error) { 221 // lock the shard 222 ctx, unlock, lockErr := ts.LockShard(context.Background(), keyspaceName, shardName, "resetShardPrimary-vtorc-endtoend-test") 223 if lockErr != nil { 224 return lockErr 225 } 226 defer unlock(&err) 227 228 // update the shard record's primary 229 if _, err = ts.UpdateShardFields(ctx, keyspaceName, shardName, func(si *topo.ShardInfo) error { 230 si.PrimaryAlias = nil 231 return nil 232 }); err != nil { 233 return err 234 } 235 return 236 } 237 238 // StartVTOrcs is used to start the vtorcs with the given extra arguments 239 func StartVTOrcs(t *testing.T, clusterInfo *VTOrcClusterInfo, orcExtraArgs []string, config cluster.VTOrcConfiguration, count int) { 240 t.Helper() 241 // Start vtorc 242 for i := 0; i < count; i++ { 243 vtorcProcess := clusterInfo.ClusterInstance.NewVTOrcProcess(config) 244 vtorcProcess.ExtraArgs = orcExtraArgs 245 err := vtorcProcess.Setup() 246 require.NoError(t, err) 247 clusterInfo.ClusterInstance.VTOrcProcesses = append(clusterInfo.ClusterInstance.VTOrcProcesses, vtorcProcess) 248 } 249 } 250 251 // StopVTOrcs is used to stop the vtorcs 252 func StopVTOrcs(t *testing.T, clusterInfo *VTOrcClusterInfo) { 253 t.Helper() 254 // Stop vtorc 255 for _, vtorcProcess := range clusterInfo.ClusterInstance.VTOrcProcesses { 256 if err := vtorcProcess.TearDown(); err != nil { 257 log.Errorf("Error in vtorc teardown: %v", err) 258 } 259 } 260 clusterInfo.ClusterInstance.VTOrcProcesses = nil 261 } 262 263 // SetupVttabletsAndVTOrcs is used to setup the vttablets and start the vtorcs 264 func SetupVttabletsAndVTOrcs(t *testing.T, clusterInfo *VTOrcClusterInfo, numReplicasReqCell1, numRdonlyReqCell1 int, orcExtraArgs []string, config cluster.VTOrcConfiguration, vtorcCount int, durability string) { 265 // stop vtorc if it is running 266 StopVTOrcs(t, clusterInfo) 267 268 // remove all the vttablets so that each test can add the amount that they require 269 err := shutdownVttablets(clusterInfo) 270 require.NoError(t, err) 271 272 for _, cellInfo := range clusterInfo.CellInfos { 273 if cellInfo.CellName == Cell1 { 274 for _, tablet := range cellInfo.ReplicaTablets { 275 if numReplicasReqCell1 == 0 { 276 break 277 } 278 cleanAndStartVttablet(t, clusterInfo, tablet) 279 numReplicasReqCell1-- 280 } 281 282 for _, tablet := range cellInfo.RdonlyTablets { 283 if numRdonlyReqCell1 == 0 { 284 break 285 } 286 cleanAndStartVttablet(t, clusterInfo, tablet) 287 numRdonlyReqCell1-- 288 } 289 } 290 } 291 292 if numRdonlyReqCell1 > 0 || numReplicasReqCell1 > 0 { 293 t.Fatalf("more than available tablets requested. Please increase the constants numReplicas or numRdonly") 294 } 295 296 // wait for the tablets to come up properly 297 for _, tablet := range clusterInfo.ClusterInstance.Keyspaces[0].Shards[0].Vttablets { 298 err := tablet.VttabletProcess.WaitForTabletStatuses([]string{"SERVING", "NOT_SERVING"}) 299 require.NoError(t, err) 300 } 301 for _, tablet := range clusterInfo.ClusterInstance.Keyspaces[0].Shards[0].Vttablets { 302 err := tablet.VttabletProcess.WaitForTabletTypes([]string{"replica", "rdonly"}) 303 require.NoError(t, err) 304 } 305 306 if durability == "" { 307 durability = "none" 308 } 309 out, err := clusterInfo.VtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspaceName, fmt.Sprintf("--durability-policy=%s", durability)) 310 require.NoError(t, err, out) 311 312 // start vtorc 313 StartVTOrcs(t, clusterInfo, orcExtraArgs, config, vtorcCount) 314 } 315 316 // cleanAndStartVttablet cleans the MySQL instance underneath for running a new test. It also starts the vttablet. 317 func cleanAndStartVttablet(t *testing.T, clusterInfo *VTOrcClusterInfo, vttablet *cluster.Vttablet) { 318 t.Helper() 319 // set super-read-only to false 320 _, err := RunSQL(t, "SET GLOBAL super_read_only = OFF", vttablet, "") 321 require.NoError(t, err) 322 // remove the databases if they exist 323 _, err = RunSQL(t, "DROP DATABASE IF EXISTS vt_ks", vttablet, "") 324 require.NoError(t, err) 325 _, err = RunSQL(t, "DROP DATABASE IF EXISTS _vt", vttablet, "") 326 require.NoError(t, err) 327 // stop the replication 328 _, err = RunSQL(t, "STOP SLAVE", vttablet, "") 329 require.NoError(t, err) 330 // reset the binlog 331 _, err = RunSQL(t, "RESET MASTER", vttablet, "") 332 require.NoError(t, err) 333 // set read-only to true 334 _, err = RunSQL(t, "SET GLOBAL read_only = ON", vttablet, "") 335 require.NoError(t, err) 336 337 // start the vttablet 338 err = vttablet.VttabletProcess.Setup() 339 require.NoError(t, err) 340 341 clusterInfo.ClusterInstance.Keyspaces[0].Shards[0].Vttablets = append(clusterInfo.ClusterInstance.Keyspaces[0].Shards[0].Vttablets, vttablet) 342 } 343 344 // ShardPrimaryTablet waits until a primary tablet has been elected for the given shard and returns it 345 func ShardPrimaryTablet(t *testing.T, clusterInfo *VTOrcClusterInfo, keyspace *cluster.Keyspace, shard *cluster.Shard) *cluster.Vttablet { 346 start := time.Now() 347 for { 348 now := time.Now() 349 if now.Sub(start) > time.Second*60 { 350 assert.FailNow(t, "failed to elect primary before timeout") 351 } 352 result, err := clusterInfo.ClusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("GetShard", fmt.Sprintf("%s/%s", keyspace.Name, shard.Name)) 353 assert.Nil(t, err) 354 355 var shardInfo topodatapb.Shard 356 err = json2.Unmarshal([]byte(result), &shardInfo) 357 assert.Nil(t, err) 358 if shardInfo.PrimaryAlias == nil { 359 log.Warningf("Shard %v/%v has no primary yet, sleep for 1 second\n", keyspace.Name, shard.Name) 360 time.Sleep(time.Second) 361 continue 362 } 363 for _, tablet := range shard.Vttablets { 364 if tablet.Alias == topoproto.TabletAliasString(shardInfo.PrimaryAlias) { 365 return tablet 366 } 367 } 368 } 369 } 370 371 // CheckPrimaryTablet waits until the specified tablet becomes the primary tablet 372 // Makes sure the tablet type is primary, and its health check agrees. 373 func CheckPrimaryTablet(t *testing.T, clusterInfo *VTOrcClusterInfo, tablet *cluster.Vttablet, checkServing bool) { 374 start := time.Now() 375 for { 376 now := time.Now() 377 if now.Sub(start) > time.Second*60 { 378 //log.Exitf("error") 379 assert.FailNow(t, "failed to elect primary before timeout") 380 } 381 result, err := clusterInfo.ClusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("GetTablet", tablet.Alias) 382 require.NoError(t, err) 383 var tabletInfo topodatapb.Tablet 384 err = json2.Unmarshal([]byte(result), &tabletInfo) 385 require.NoError(t, err) 386 387 if topodatapb.TabletType_PRIMARY != tabletInfo.GetType() { 388 log.Warningf("Tablet %v is not primary yet, sleep for 1 second\n", tablet.Alias) 389 time.Sleep(time.Second) 390 continue 391 } 392 // make sure the health stream is updated 393 shrs, err := clusterInfo.ClusterInstance.StreamTabletHealth(context.Background(), tablet, 1) 394 require.NoError(t, err) 395 396 streamHealthResponse := shrs[0] 397 398 if checkServing && !streamHealthResponse.GetServing() { 399 log.Warningf("Tablet %v is not serving in health stream yet, sleep for 1 second\n", tablet.Alias) 400 time.Sleep(time.Second) 401 continue 402 } 403 tabletType := streamHealthResponse.GetTarget().GetTabletType() 404 if tabletType != topodatapb.TabletType_PRIMARY { 405 log.Warningf("Tablet %v is not primary in health stream yet, sleep for 1 second\n", tablet.Alias) 406 time.Sleep(time.Second) 407 continue 408 } 409 break 410 } 411 } 412 413 // CheckReplication checks that the replication is setup correctly and writes succeed and are replicated on all the replicas 414 func CheckReplication(t *testing.T, clusterInfo *VTOrcClusterInfo, primary *cluster.Vttablet, replicas []*cluster.Vttablet, timeToWait time.Duration) { 415 endTime := time.Now().Add(timeToWait) 416 // create tables, insert data and make sure it is replicated correctly 417 sqlSchema := ` 418 create table if not exists vt_ks.vt_insert_test ( 419 id bigint, 420 msg varchar(64), 421 primary key (id) 422 ) Engine=InnoDB 423 ` 424 timeout := time.After(time.Until(endTime)) 425 for { 426 select { 427 case <-timeout: 428 t.Fatal("timedout waiting for keyspace vt_ks to be created by schema engine") 429 return 430 default: 431 _, err := RunSQL(t, sqlSchema, primary, "") 432 if err != nil { 433 log.Warningf("create table failed on primary - %v, will retry", err) 434 time.Sleep(100 * time.Millisecond) 435 break 436 } 437 confirmReplication(t, primary, replicas, time.Until(endTime), clusterInfo.lastUsedValue) 438 clusterInfo.lastUsedValue++ 439 validateTopology(t, clusterInfo, true, time.Until(endTime)) 440 return 441 } 442 } 443 } 444 445 // VerifyWritesSucceed inserts more data into the table vt_insert_test and checks that it is replicated too 446 // Call this function only after CheckReplication has been executed once, since that function creates the table that this function uses. 447 func VerifyWritesSucceed(t *testing.T, clusterInfo *VTOrcClusterInfo, primary *cluster.Vttablet, replicas []*cluster.Vttablet, timeToWait time.Duration) { 448 t.Helper() 449 confirmReplication(t, primary, replicas, timeToWait, clusterInfo.lastUsedValue) 450 clusterInfo.lastUsedValue++ 451 } 452 453 func confirmReplication(t *testing.T, primary *cluster.Vttablet, replicas []*cluster.Vttablet, timeToWait time.Duration, valueToInsert int) { 454 t.Helper() 455 log.Infof("Insert data into primary and check that it is replicated to replica") 456 // insert data into the new primary, check the connected replica work 457 insertSQL := fmt.Sprintf("insert into vt_insert_test(id, msg) values (%d, 'test %d')", valueToInsert, valueToInsert) 458 _, err := RunSQL(t, insertSQL, primary, "vt_ks") 459 require.NoError(t, err) 460 time.Sleep(100 * time.Millisecond) 461 timeout := time.After(timeToWait) 462 for { 463 select { 464 case <-timeout: 465 t.Fatal("timedout waiting for replication, data not yet replicated") 466 return 467 default: 468 err = nil 469 for _, tab := range replicas { 470 errInReplication := checkInsertedValues(t, tab, valueToInsert) 471 if errInReplication != nil { 472 err = errInReplication 473 } 474 } 475 if err != nil { 476 log.Warningf("waiting for replication - error received - %v, will retry", err) 477 time.Sleep(300 * time.Millisecond) 478 break 479 } 480 return 481 } 482 } 483 } 484 485 func checkInsertedValues(t *testing.T, tablet *cluster.Vttablet, index int) error { 486 selectSQL := fmt.Sprintf("select msg from vt_ks.vt_insert_test where id=%d", index) 487 qr, err := RunSQL(t, selectSQL, tablet, "") 488 // The error may be not nil, if the replication has not caught upto the point where the table exists. 489 // We can safely skip this error and retry reading after wait 490 if err == nil && len(qr.Rows) == 1 { 491 return nil 492 } 493 return fmt.Errorf("data is not yet replicated") 494 } 495 496 // WaitForReplicationToStop waits for replication to stop on the given tablet 497 func WaitForReplicationToStop(t *testing.T, vttablet *cluster.Vttablet) error { 498 timeout := time.After(15 * time.Second) 499 for { 500 select { 501 case <-timeout: 502 return fmt.Errorf("timedout: waiting for primary to stop replication") 503 default: 504 res, err := RunSQL(t, "SHOW SLAVE STATUS", vttablet, "") 505 if err != nil { 506 return err 507 } 508 if len(res.Rows) == 0 { 509 return nil 510 } 511 time.Sleep(1 * time.Second) 512 } 513 } 514 } 515 516 func validateTopology(t *testing.T, clusterInfo *VTOrcClusterInfo, pingTablets bool, timeToWait time.Duration) { 517 ch := make(chan error) 518 timeout := time.After(timeToWait) 519 go func() { 520 for { 521 select { 522 case <-timeout: 523 ch <- fmt.Errorf("time out waiting for validation to pass") 524 return 525 default: 526 var err error 527 var output string 528 if pingTablets { 529 output, err = clusterInfo.ClusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("Validate", "--", "--ping-tablets=true") 530 } else { 531 output, err = clusterInfo.ClusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("Validate") 532 } 533 if err != nil { 534 log.Warningf("Validate failed, retrying, output - %s", output) 535 time.Sleep(100 * time.Millisecond) 536 break 537 } 538 ch <- nil 539 return 540 } 541 } 542 }() 543 544 select { 545 case err := <-ch: 546 require.NoError(t, err) 547 return 548 case <-timeout: 549 t.Fatal("time out waiting for validation to pass") 550 } 551 } 552 553 // KillTablets is used to kill the tablets 554 func KillTablets(vttablets []*cluster.Vttablet) { 555 for _, tablet := range vttablets { 556 log.Infof("Shutting down MySQL for %v", tablet.Alias) 557 _ = tablet.MysqlctlProcess.Stop() 558 log.Infof("Calling TearDown on tablet %v", tablet.Alias) 559 _ = tablet.VttabletProcess.TearDown() 560 } 561 } 562 563 func getMysqlConnParam(tablet *cluster.Vttablet, db string) mysql.ConnParams { 564 connParams := mysql.ConnParams{ 565 Uname: "vt_dba", 566 UnixSocket: path.Join(os.Getenv("VTDATAROOT"), fmt.Sprintf("/vt_%010d/mysql.sock", tablet.TabletUID)), 567 } 568 if db != "" { 569 connParams.DbName = db 570 } 571 return connParams 572 } 573 574 // RunSQL is used to run a SQL statement on the given tablet 575 func RunSQL(t *testing.T, sql string, tablet *cluster.Vttablet, db string) (*sqltypes.Result, error) { 576 // Get Connection 577 tabletParams := getMysqlConnParam(tablet, db) 578 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 579 defer cancel() 580 conn, err := mysql.Connect(ctx, &tabletParams) 581 require.Nil(t, err) 582 defer conn.Close() 583 584 // RunSQL 585 return execute(t, conn, sql) 586 } 587 588 func execute(t *testing.T, conn *mysql.Conn, query string) (*sqltypes.Result, error) { 589 t.Helper() 590 return conn.ExecuteFetch(query, 1000, true) 591 } 592 593 // StartVttablet is used to start a vttablet from the given cell and type 594 func StartVttablet(t *testing.T, clusterInfo *VTOrcClusterInfo, cell string, isRdonly bool) *cluster.Vttablet { 595 596 var tablet *cluster.Vttablet 597 for _, cellInfo := range clusterInfo.CellInfos { 598 if cellInfo.CellName == cell { 599 tabletsToUse := cellInfo.ReplicaTablets 600 if isRdonly { 601 tabletsToUse = cellInfo.RdonlyTablets 602 } 603 for _, vttablet := range tabletsToUse { 604 if isVttabletInUse(clusterInfo, vttablet) { 605 continue 606 } 607 tablet = vttablet 608 cleanAndStartVttablet(t, clusterInfo, vttablet) 609 break 610 } 611 break 612 } 613 } 614 615 require.NotNil(t, tablet, "Could not start requested tablet") 616 // wait for the tablets to come up properly 617 err := tablet.VttabletProcess.WaitForTabletStatuses([]string{"SERVING", "NOT_SERVING"}) 618 require.NoError(t, err) 619 err = tablet.VttabletProcess.WaitForTabletTypes([]string{"replica", "rdonly"}) 620 require.NoError(t, err) 621 return tablet 622 } 623 624 func isVttabletInUse(clusterInfo *VTOrcClusterInfo, tablet *cluster.Vttablet) bool { 625 for _, vttablet := range clusterInfo.ClusterInstance.Keyspaces[0].Shards[0].Vttablets { 626 if tablet == vttablet { 627 return true 628 } 629 } 630 return false 631 } 632 633 // PermanentlyRemoveVttablet removes the tablet specified from the cluster. It makes it so that 634 // this vttablet or mysql instance are not reused for any other test. 635 func PermanentlyRemoveVttablet(clusterInfo *VTOrcClusterInfo, tablet *cluster.Vttablet) { 636 // remove the tablet from our global list 637 for _, cellInfo := range clusterInfo.CellInfos { 638 for i, vttablet := range cellInfo.ReplicaTablets { 639 if vttablet == tablet { 640 // remove this tablet since its mysql has stopped 641 cellInfo.ReplicaTablets = append(cellInfo.ReplicaTablets[:i], cellInfo.ReplicaTablets[i+1:]...) 642 KillTablets([]*cluster.Vttablet{tablet}) 643 return 644 } 645 } 646 for i, vttablet := range cellInfo.RdonlyTablets { 647 if vttablet == tablet { 648 // remove this tablet since its mysql has stopped 649 cellInfo.RdonlyTablets = append(cellInfo.RdonlyTablets[:i], cellInfo.RdonlyTablets[i+1:]...) 650 KillTablets([]*cluster.Vttablet{tablet}) 651 return 652 } 653 } 654 } 655 } 656 657 // ChangePrivileges is used to change the privileges of the given user. These commands are executed such that they are not replicated 658 func ChangePrivileges(t *testing.T, sql string, tablet *cluster.Vttablet, user string) { 659 _, err := RunSQL(t, "SET sql_log_bin = OFF;"+sql+";SET sql_log_bin = ON;", tablet, "") 660 require.NoError(t, err) 661 662 res, err := RunSQL(t, fmt.Sprintf("SELECT id FROM INFORMATION_SCHEMA.PROCESSLIST WHERE user = '%s'", user), tablet, "") 663 require.NoError(t, err) 664 for _, row := range res.Rows { 665 id, err := row[0].ToInt64() 666 require.NoError(t, err) 667 _, err = RunSQL(t, fmt.Sprintf("kill %d", id), tablet, "") 668 require.NoError(t, err) 669 } 670 } 671 672 // ResetPrimaryLogs is used reset the binary logs 673 func ResetPrimaryLogs(t *testing.T, curPrimary *cluster.Vttablet) { 674 _, err := RunSQL(t, "FLUSH BINARY LOGS", curPrimary, "") 675 require.NoError(t, err) 676 677 binLogsOutput, err := RunSQL(t, "SHOW BINARY LOGS", curPrimary, "") 678 require.NoError(t, err) 679 require.True(t, len(binLogsOutput.Rows) >= 2, "there should be atlease 2 binlog files") 680 681 lastLogFile := binLogsOutput.Rows[len(binLogsOutput.Rows)-1][0].ToString() 682 683 _, err = RunSQL(t, "PURGE BINARY LOGS TO '"+lastLogFile+"'", curPrimary, "") 684 require.NoError(t, err) 685 } 686 687 // CheckSourcePort is used to check that the replica has the given source port set in its MySQL instance 688 func CheckSourcePort(t *testing.T, replica *cluster.Vttablet, source *cluster.Vttablet, timeToWait time.Duration) { 689 timeout := time.After(timeToWait) 690 for { 691 select { 692 case <-timeout: 693 t.Fatal("timedout waiting for correct primary to be setup") 694 return 695 default: 696 res, err := RunSQL(t, "SHOW SLAVE STATUS", replica, "") 697 require.NoError(t, err) 698 699 if len(res.Rows) != 1 { 700 log.Warningf("no replication status yet, will retry") 701 break 702 } 703 704 for idx, field := range res.Fields { 705 if strings.EqualFold(field.Name, "MASTER_PORT") || strings.EqualFold(field.Name, "SOURCE_PORT") { 706 port, err := res.Rows[0][idx].ToInt64() 707 require.NoError(t, err) 708 if port == int64(source.MySQLPort) { 709 return 710 } 711 } 712 } 713 log.Warningf("source port not set correctly yet, will retry") 714 } 715 time.Sleep(300 * time.Millisecond) 716 } 717 } 718 719 // MakeAPICall is used make an API call given the url. It returns the status and the body of the response received 720 func MakeAPICall(t *testing.T, vtorc *cluster.VTOrcProcess, url string) (status int, response string) { 721 t.Helper() 722 var err error 723 status, response, err = vtorc.MakeAPICall(url) 724 require.NoError(t, err) 725 return status, response 726 } 727 728 // MakeAPICallRetry is used to make an API call and retry on the given condition. 729 // The function provided takes in the status and response and returns if we should continue to retry or not 730 func MakeAPICallRetry(t *testing.T, vtorc *cluster.VTOrcProcess, url string, retry func(int, string) bool) (status int, response string) { 731 t.Helper() 732 timeout := time.After(10 * time.Second) 733 for { 734 select { 735 case <-timeout: 736 t.Fatal("timed out waiting for api to work") 737 return 738 default: 739 status, response = MakeAPICall(t, vtorc, url) 740 if retry(status, response) { 741 time.Sleep(1 * time.Second) 742 break 743 } 744 return status, response 745 } 746 } 747 } 748 749 // SetupNewClusterSemiSync is used to setup a new cluster with semi-sync set. 750 // It creates a cluster with 4 tablets, one of which is a Replica 751 func SetupNewClusterSemiSync(t *testing.T) *VTOrcClusterInfo { 752 var tablets []*cluster.Vttablet 753 clusterInstance := cluster.NewCluster(Cell1, Hostname) 754 keyspace := &cluster.Keyspace{Name: keyspaceName} 755 // Start topo server 756 err := clusterInstance.StartTopo() 757 require.NoError(t, err, "Error starting topo: %v", err) 758 759 err = clusterInstance.TopoProcess.ManageTopoDir("mkdir", "/vitess/"+Cell1) 760 require.NoError(t, err, "Error managing topo: %v", err) 761 762 for i := 0; i < 3; i++ { 763 tablet := clusterInstance.NewVttabletInstance("replica", 100+i, Cell1) 764 tablets = append(tablets, tablet) 765 } 766 tablet := clusterInstance.NewVttabletInstance("rdonly", 103, Cell1) 767 tablets = append(tablets, tablet) 768 769 shard := &cluster.Shard{Name: shardName} 770 shard.Vttablets = tablets 771 772 clusterInstance.VtTabletExtraArgs = []string{ 773 "--lock_tables_timeout", "5s", 774 "--disable_active_reparents", 775 } 776 777 // Initialize Cluster 778 err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard}) 779 require.NoError(t, err, "Cannot launch cluster: %v", err) 780 781 //Start MySql 782 var mysqlCtlProcessList []*exec.Cmd 783 for _, shard := range clusterInstance.Keyspaces[0].Shards { 784 for _, tablet := range shard.Vttablets { 785 log.Infof("Starting MySql for tablet %v", tablet.Alias) 786 proc, err := tablet.MysqlctlProcess.StartProcess() 787 if err != nil { 788 require.NoError(t, err, "Error starting start mysql: %v", err) 789 } 790 mysqlCtlProcessList = append(mysqlCtlProcessList, proc) 791 } 792 } 793 794 // Wait for mysql processes to start 795 for _, proc := range mysqlCtlProcessList { 796 if err := proc.Wait(); err != nil { 797 require.NoError(t, err, "Error starting mysql: %v", err) 798 } 799 } 800 801 for _, tablet := range tablets { 802 require.NoError(t, err) 803 // Start the tablet 804 err = tablet.VttabletProcess.Setup() 805 require.NoError(t, err) 806 } 807 808 for _, tablet := range tablets { 809 err := tablet.VttabletProcess.WaitForTabletStatuses([]string{"SERVING", "NOT_SERVING"}) 810 require.NoError(t, err) 811 } 812 813 vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", clusterInstance.VtctldProcess.GrpcPort, clusterInstance.TmpDirectory) 814 815 out, err := vtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspaceName, "--durability-policy=semi_sync") 816 require.NoError(t, err, out) 817 818 // create topo server connection 819 ts, err := topo.OpenServer(*clusterInstance.TopoFlavorString(), clusterInstance.VtctlProcess.TopoGlobalAddress, clusterInstance.VtctlProcess.TopoGlobalRoot) 820 require.NoError(t, err) 821 clusterInfo := &VTOrcClusterInfo{ 822 ClusterInstance: clusterInstance, 823 Ts: ts, 824 CellInfos: nil, 825 lastUsedValue: 100, 826 VtctldClientProcess: vtctldClientProcess, 827 } 828 return clusterInfo 829 } 830 831 // AddSemiSyncKeyspace is used to setup a new keyspace with semi-sync. 832 // It creates a keyspace with 3 tablets 833 func AddSemiSyncKeyspace(t *testing.T, clusterInfo *VTOrcClusterInfo) { 834 var tablets []*cluster.Vttablet 835 keyspaceSemiSyncName := "ks2" 836 keyspace := &cluster.Keyspace{Name: keyspaceSemiSyncName} 837 838 for i := 0; i < 3; i++ { 839 tablet := clusterInfo.ClusterInstance.NewVttabletInstance("replica", 300+i, Cell1) 840 tablets = append(tablets, tablet) 841 } 842 843 shard := &cluster.Shard{Name: shardName} 844 shard.Vttablets = tablets 845 846 oldVttabletArgs := clusterInfo.ClusterInstance.VtTabletExtraArgs 847 defer func() { 848 clusterInfo.ClusterInstance.VtTabletExtraArgs = oldVttabletArgs 849 }() 850 clusterInfo.ClusterInstance.VtTabletExtraArgs = []string{ 851 "--lock_tables_timeout", "5s", 852 "--disable_active_reparents", 853 } 854 855 // Initialize Cluster 856 err := clusterInfo.ClusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard}) 857 require.NoError(t, err, "Cannot launch cluster: %v", err) 858 859 //Start MySql 860 var mysqlCtlProcessList []*exec.Cmd 861 for _, shard := range clusterInfo.ClusterInstance.Keyspaces[1].Shards { 862 for _, tablet := range shard.Vttablets { 863 log.Infof("Starting MySql for tablet %v", tablet.Alias) 864 proc, err := tablet.MysqlctlProcess.StartProcess() 865 if err != nil { 866 require.NoError(t, err, "Error starting start mysql: %v", err) 867 } 868 mysqlCtlProcessList = append(mysqlCtlProcessList, proc) 869 } 870 } 871 872 // Wait for mysql processes to start 873 for _, proc := range mysqlCtlProcessList { 874 if err := proc.Wait(); err != nil { 875 require.NoError(t, err, "Error starting mysql: %v", err) 876 } 877 } 878 879 for _, tablet := range tablets { 880 require.NoError(t, err) 881 // Start the tablet 882 err = tablet.VttabletProcess.Setup() 883 require.NoError(t, err) 884 } 885 886 for _, tablet := range tablets { 887 err := tablet.VttabletProcess.WaitForTabletStatuses([]string{"SERVING", "NOT_SERVING"}) 888 require.NoError(t, err) 889 } 890 891 vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", clusterInfo.ClusterInstance.VtctldProcess.GrpcPort, clusterInfo.ClusterInstance.TmpDirectory) 892 out, err := vtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspaceSemiSyncName, "--durability-policy=semi_sync") 893 require.NoError(t, err, out) 894 } 895 896 // IsSemiSyncSetupCorrectly checks that the semi-sync is setup correctly on the given vttablet 897 func IsSemiSyncSetupCorrectly(t *testing.T, tablet *cluster.Vttablet, semiSyncVal string) bool { 898 dbVar, err := tablet.VttabletProcess.GetDBVar("rpl_semi_sync_slave_enabled", "") 899 require.NoError(t, err) 900 return semiSyncVal == dbVar 901 } 902 903 // IsPrimarySemiSyncSetupCorrectly checks that the priamry side semi-sync is setup correctly on the given vttablet 904 func IsPrimarySemiSyncSetupCorrectly(t *testing.T, tablet *cluster.Vttablet, semiSyncVal string) bool { 905 dbVar, err := tablet.VttabletProcess.GetDBVar("rpl_semi_sync_master_enabled", "") 906 require.NoError(t, err) 907 return semiSyncVal == dbVar 908 } 909 910 // WaitForReadOnlyValue waits for the read_only global variable to reach the provided value 911 func WaitForReadOnlyValue(t *testing.T, curPrimary *cluster.Vttablet, expectValue int64) (match bool) { 912 timeout := 15 * time.Second 913 startTime := time.Now() 914 for time.Since(startTime) < timeout { 915 qr, err := RunSQL(t, "select @@global.read_only as read_only", curPrimary, "") 916 require.NoError(t, err) 917 require.NotNil(t, qr) 918 row := qr.Named().Row() 919 require.NotNil(t, row) 920 readOnly, err := row.ToInt64("read_only") 921 require.NoError(t, err) 922 if readOnly == expectValue { 923 return true 924 } 925 time.Sleep(time.Second) 926 } 927 return false 928 } 929 930 // WaitForSuccessfulRecoveryCount waits until the given recovery name's count of successful runs matches the count expected 931 func WaitForSuccessfulRecoveryCount(t *testing.T, vtorcInstance *cluster.VTOrcProcess, recoveryName string, countExpected int) { 932 t.Helper() 933 timeout := 15 * time.Second 934 startTime := time.Now() 935 for time.Since(startTime) < timeout { 936 vars := vtorcInstance.GetVars() 937 successfulRecoveriesMap := vars["SuccessfulRecoveries"].(map[string]interface{}) 938 successCount := successfulRecoveriesMap[recoveryName] 939 if successCount == countExpected { 940 return 941 } 942 time.Sleep(time.Second) 943 } 944 vars := vtorcInstance.GetVars() 945 successfulRecoveriesMap := vars["SuccessfulRecoveries"].(map[string]interface{}) 946 successCount := successfulRecoveriesMap[recoveryName] 947 assert.EqualValues(t, countExpected, successCount) 948 }