vitess.io/vitess@v0.16.2/go/vt/schemamanager/tablet_executor.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package schemamanager 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "vitess.io/vitess/go/sync2" 26 "vitess.io/vitess/go/timer" 27 "vitess.io/vitess/go/vt/logutil" 28 "vitess.io/vitess/go/vt/schema" 29 "vitess.io/vitess/go/vt/sqlparser" 30 "vitess.io/vitess/go/vt/topo" 31 "vitess.io/vitess/go/vt/vtctl/schematools" 32 "vitess.io/vitess/go/vt/vterrors" 33 "vitess.io/vitess/go/vt/vttablet/tmclient" 34 35 querypb "vitess.io/vitess/go/vt/proto/query" 36 tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" 37 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 38 ) 39 40 // TabletExecutor applies schema changes to all tablets. 41 type TabletExecutor struct { 42 migrationContext string 43 ts *topo.Server 44 tmc tmclient.TabletManagerClient 45 logger logutil.Logger 46 tablets []*topodatapb.Tablet 47 isClosed bool 48 allowBigSchemaChange bool 49 keyspace string 50 waitReplicasTimeout time.Duration 51 ddlStrategySetting *schema.DDLStrategySetting 52 uuids []string 53 skipPreflight bool 54 } 55 56 // NewTabletExecutor creates a new TabletExecutor instance 57 func NewTabletExecutor(migrationContext string, ts *topo.Server, tmc tmclient.TabletManagerClient, logger logutil.Logger, waitReplicasTimeout time.Duration) *TabletExecutor { 58 return &TabletExecutor{ 59 ts: ts, 60 tmc: tmc, 61 logger: logger, 62 isClosed: true, 63 allowBigSchemaChange: false, 64 waitReplicasTimeout: waitReplicasTimeout, 65 migrationContext: migrationContext, 66 } 67 } 68 69 // AllowBigSchemaChange changes TabletExecutor such that big schema changes 70 // will no longer be rejected. 71 func (exec *TabletExecutor) AllowBigSchemaChange() { 72 exec.allowBigSchemaChange = true 73 } 74 75 // DisallowBigSchemaChange enables the check for big schema changes such that 76 // TabletExecutor will reject these. 77 func (exec *TabletExecutor) DisallowBigSchemaChange() { 78 exec.allowBigSchemaChange = false 79 } 80 81 // SetDDLStrategy applies ddl_strategy from command line flags 82 func (exec *TabletExecutor) SetDDLStrategy(ddlStrategy string) error { 83 ddlStrategySetting, err := schema.ParseDDLStrategy(ddlStrategy) 84 if err != nil { 85 return err 86 } 87 exec.ddlStrategySetting = ddlStrategySetting 88 return nil 89 } 90 91 // SetUUIDList sets a (possibly empty) list of provided UUIDs for schema migrations 92 func (exec *TabletExecutor) SetUUIDList(uuids []string) error { 93 uuidsMap := map[string]bool{} 94 for _, uuid := range uuids { 95 if !schema.IsOnlineDDLUUID(uuid) { 96 return fmt.Errorf("Not a valid UUID: %s", uuid) 97 } 98 uuidsMap[uuid] = true 99 } 100 if len(uuidsMap) != len(uuids) { 101 return fmt.Errorf("UUID values must be unique") 102 } 103 exec.uuids = uuids 104 return nil 105 } 106 107 // hasProvidedUUIDs returns true when UUIDs were provided 108 func (exec *TabletExecutor) hasProvidedUUIDs() bool { 109 return len(exec.uuids) != 0 110 } 111 112 // SkipPreflight disables preflight checks 113 func (exec *TabletExecutor) SkipPreflight() { 114 exec.skipPreflight = true 115 } 116 117 // Open opens a connection to the primary for every shard. 118 func (exec *TabletExecutor) Open(ctx context.Context, keyspace string) error { 119 if !exec.isClosed { 120 return nil 121 } 122 exec.keyspace = keyspace 123 shardNames, err := exec.ts.GetShardNames(ctx, keyspace) 124 if err != nil { 125 return fmt.Errorf("unable to get shard names for keyspace: %s, error: %v", keyspace, err) 126 } 127 exec.tablets = make([]*topodatapb.Tablet, len(shardNames)) 128 for i, shardName := range shardNames { 129 shardInfo, err := exec.ts.GetShard(ctx, keyspace, shardName) 130 if err != nil { 131 return fmt.Errorf("unable to get shard info, keyspace: %s, shard: %s, error: %v", keyspace, shardName, err) 132 } 133 if !shardInfo.HasPrimary() { 134 return fmt.Errorf("shard: %s does not have a primary", shardName) 135 } 136 tabletInfo, err := exec.ts.GetTablet(ctx, shardInfo.PrimaryAlias) 137 if err != nil { 138 return fmt.Errorf("unable to get primary tablet info, keyspace: %s, shard: %s, error: %v", keyspace, shardName, err) 139 } 140 exec.tablets[i] = tabletInfo.Tablet 141 } 142 143 if len(exec.tablets) == 0 { 144 return fmt.Errorf("keyspace: %s does not contain any primary tablets", keyspace) 145 } 146 exec.isClosed = false 147 return nil 148 } 149 150 // Validate validates a list of sql statements. 151 func (exec *TabletExecutor) Validate(ctx context.Context, sqls []string) error { 152 if exec.isClosed { 153 return fmt.Errorf("executor is closed") 154 } 155 156 // We ignore DATABASE-level DDLs here because detectBigSchemaChanges doesn't 157 // look at them anyway. 158 parsedDDLs, _, _, _, err := exec.parseDDLs(sqls) 159 if err != nil { 160 return err 161 } 162 163 bigSchemaChange, err := exec.detectBigSchemaChanges(ctx, parsedDDLs) 164 if bigSchemaChange && exec.allowBigSchemaChange { 165 exec.logger.Warningf("Processing big schema change. This may cause visible MySQL downtime.") 166 return nil 167 } 168 return err 169 } 170 171 func (exec *TabletExecutor) parseDDLs(sqls []string) ([]sqlparser.DDLStatement, []sqlparser.DBDDLStatement, [](*sqlparser.RevertMigration), [](*sqlparser.AlterMigration), error) { 172 parsedDDLs := make([]sqlparser.DDLStatement, 0) 173 parsedDBDDLs := make([]sqlparser.DBDDLStatement, 0) 174 revertStatements := make([](*sqlparser.RevertMigration), 0) 175 alterMigrationStatements := make([](*sqlparser.AlterMigration), 0) 176 for _, sql := range sqls { 177 stmt, err := sqlparser.Parse(sql) 178 if err != nil { 179 return nil, nil, nil, nil, fmt.Errorf("failed to parse sql: %s, got error: %v", sql, err) 180 } 181 switch stmt := stmt.(type) { 182 case sqlparser.DDLStatement: 183 parsedDDLs = append(parsedDDLs, stmt) 184 case sqlparser.DBDDLStatement: 185 parsedDBDDLs = append(parsedDBDDLs, stmt) 186 case *sqlparser.RevertMigration: 187 revertStatements = append(revertStatements, stmt) 188 case *sqlparser.AlterMigration: 189 alterMigrationStatements = append(alterMigrationStatements, stmt) 190 default: 191 if len(exec.tablets) != 1 { 192 return nil, nil, nil, nil, fmt.Errorf("non-ddl statements can only be executed for single shard keyspaces: %s", sql) 193 } 194 } 195 } 196 return parsedDDLs, parsedDBDDLs, revertStatements, alterMigrationStatements, nil 197 } 198 199 // IsOnlineSchemaDDL returns true if we expect to run a online schema change DDL 200 func (exec *TabletExecutor) isOnlineSchemaDDL(stmt sqlparser.Statement) (isOnline bool) { 201 switch stmt := stmt.(type) { 202 case sqlparser.DDLStatement: 203 if exec.ddlStrategySetting == nil { 204 return false 205 } 206 if exec.ddlStrategySetting.Strategy.IsDirect() { 207 return false 208 } 209 switch stmt.GetAction() { 210 case sqlparser.CreateDDLAction, sqlparser.DropDDLAction, sqlparser.AlterDDLAction: 211 return true 212 } 213 case *sqlparser.RevertMigration: 214 return true 215 } 216 return false 217 } 218 219 // a schema change that satisfies any following condition is considered 220 // to be a big schema change and will be rejected. 221 // 1. Alter more than 100,000 rows. 222 // 2. Change a table with more than 2,000,000 rows (Drops are fine). 223 func (exec *TabletExecutor) detectBigSchemaChanges(ctx context.Context, parsedDDLs []sqlparser.DDLStatement) (bool, error) { 224 // We want to avoid any overhead if possible. If all DDLs are online schema changes, then we want to 225 // skip GetSchema altogether. 226 foundAnyNonOnlineDDL := false 227 for _, ddl := range parsedDDLs { 228 if !exec.isOnlineSchemaDDL(ddl) { 229 foundAnyNonOnlineDDL = true 230 } 231 } 232 if !foundAnyNonOnlineDDL { 233 return false, nil 234 } 235 // exec.tablets is guaranteed to have at least one element; 236 // Otherwise, Open should fail and executor should fail. 237 primaryTabletInfo := exec.tablets[0] 238 // get database schema, excluding views. 239 req := &tabletmanagerdatapb.GetSchemaRequest{Tables: []string{}, ExcludeTables: []string{}, TableSchemaOnly: true} 240 dbSchema, err := exec.tmc.GetSchema(ctx, primaryTabletInfo, req) 241 if err != nil { 242 return false, fmt.Errorf("unable to get database schema, error: %v", err) 243 } 244 tableWithCount := make(map[string]uint64, len(dbSchema.TableDefinitions)) 245 for _, tableSchema := range dbSchema.TableDefinitions { 246 tableWithCount[tableSchema.Name] = tableSchema.RowCount 247 } 248 for _, ddl := range parsedDDLs { 249 if exec.isOnlineSchemaDDL(ddl) { 250 // Since this is an online schema change, there is no need to worry about big changes 251 continue 252 } 253 switch ddl.GetAction() { 254 case sqlparser.DropDDLAction, sqlparser.CreateDDLAction, sqlparser.TruncateDDLAction, sqlparser.RenameDDLAction: 255 continue 256 } 257 tableName := ddl.GetTable().Name.String() 258 if rowCount, ok := tableWithCount[tableName]; ok { 259 if rowCount > 100000 && ddl.GetAction() == sqlparser.AlterDDLAction { 260 return true, fmt.Errorf( 261 "big schema change detected. Disable check with -allow_long_unavailability. ddl: %s alters a table with more than 100 thousand rows", sqlparser.String(ddl)) 262 } 263 if rowCount > 2000000 { 264 return true, fmt.Errorf( 265 "big schema change detected. Disable check with -allow_long_unavailability. ddl: %s changes a table with more than 2 million rows", sqlparser.String(ddl)) 266 } 267 } 268 } 269 return false, nil 270 } 271 272 func (exec *TabletExecutor) preflightSchemaChanges(ctx context.Context, sqls []string) error { 273 if exec.skipPreflight { 274 return nil 275 } 276 _, err := exec.tmc.PreflightSchema(ctx, exec.tablets[0], sqls) 277 return err 278 } 279 280 // executeSQL executes a single SQL statement either as online DDL or synchronously on all tablets. 281 // In online DDL case, the query may be exploded into multiple queries during 282 func (exec *TabletExecutor) executeSQL(ctx context.Context, sql string, providedUUID string, execResult *ExecuteResult) (executedAsynchronously bool, err error) { 283 stmt, err := sqlparser.Parse(sql) 284 if err != nil { 285 return false, err 286 } 287 switch stmt := stmt.(type) { 288 case sqlparser.DDLStatement: 289 if exec.isOnlineSchemaDDL(stmt) { 290 onlineDDLs, err := schema.NewOnlineDDLs(exec.keyspace, sql, stmt, exec.ddlStrategySetting, exec.migrationContext, providedUUID) 291 if err != nil { 292 execResult.ExecutorErr = err.Error() 293 return false, err 294 } 295 for _, onlineDDL := range onlineDDLs { 296 exec.executeOnAllTablets(ctx, execResult, onlineDDL.SQL, true) 297 if len(execResult.SuccessShards) > 0 { 298 execResult.UUIDs = append(execResult.UUIDs, onlineDDL.UUID) 299 exec.logger.Printf("%s\n", onlineDDL.UUID) 300 } 301 } 302 return true, nil 303 } 304 case *sqlparser.RevertMigration: 305 strategySetting := schema.NewDDLStrategySetting(schema.DDLStrategyOnline, exec.ddlStrategySetting.Options) 306 onlineDDL, err := schema.NewOnlineDDL(exec.keyspace, "", sqlparser.String(stmt), strategySetting, exec.migrationContext, providedUUID) 307 if err != nil { 308 execResult.ExecutorErr = err.Error() 309 return false, err 310 } 311 exec.executeOnAllTablets(ctx, execResult, onlineDDL.SQL, true) 312 execResult.UUIDs = append(execResult.UUIDs, onlineDDL.UUID) 313 exec.logger.Printf("%s\n", onlineDDL.UUID) 314 return true, nil 315 case *sqlparser.AlterMigration: 316 exec.executeOnAllTablets(ctx, execResult, sql, true) 317 return true, nil 318 } 319 exec.executeOnAllTablets(ctx, execResult, sql, false) 320 return false, nil 321 } 322 323 // Execute applies schema changes 324 func (exec *TabletExecutor) Execute(ctx context.Context, sqls []string) *ExecuteResult { 325 execResult := ExecuteResult{} 326 execResult.Sqls = sqls 327 if exec.isClosed { 328 execResult.ExecutorErr = "executor is closed" 329 return &execResult 330 } 331 startTime := time.Now() 332 defer func() { execResult.TotalTimeSpent = time.Since(startTime) }() 333 334 // Lock the keyspace so our schema change doesn't overlap with other 335 // keyspace-wide operations like resharding migrations. 336 ctx, unlock, lockErr := exec.ts.LockKeyspace(ctx, exec.keyspace, "ApplySchemaKeyspace") 337 if lockErr != nil { 338 execResult.ExecutorErr = vterrors.Wrapf(lockErr, "lockErr in ApplySchemaKeyspace %v", exec.keyspace).Error() 339 return &execResult 340 } 341 defer func() { 342 // This is complicated because execResult.ExecutorErr 343 // is not of type error. 344 var unlockErr error 345 unlock(&unlockErr) 346 if execResult.ExecutorErr == "" && unlockErr != nil { 347 execResult.ExecutorErr = vterrors.Wrapf(unlockErr, "unlockErr in ApplySchemaKeyspace %v", exec.keyspace).Error() 348 } 349 }() 350 351 // Make sure the schema changes introduce a table definition change. 352 if err := exec.preflightSchemaChanges(ctx, sqls); err != nil { 353 execResult.ExecutorErr = err.Error() 354 return &execResult 355 } 356 357 if exec.hasProvidedUUIDs() && len(exec.uuids) != len(sqls) { 358 execResult.ExecutorErr = fmt.Sprintf("provided %v UUIDs do not match number of DDLs %v", len(exec.uuids), len(sqls)) 359 return &execResult 360 } 361 providedUUID := "" 362 363 rl := timer.NewRateLimiter(topo.RemoteOperationTimeout / 4) 364 defer rl.Stop() 365 366 syncOperationExecuted := false 367 368 // ReloadSchema once. Do it even if we do an early return on error 369 defer func() { 370 if !syncOperationExecuted { 371 exec.logger.Infof("Skipped ReloadSchema since all SQLs executed asynchronously") 372 return 373 } 374 // same shards will appear multiple times in execResult.SuccessShards when there are 375 // multiple SQLs 376 uniqueShards := map[string]*ShardResult{} 377 for i := range execResult.SuccessShards { 378 // Please do not change the above iteration to "for result := range ...". 379 // This is because we want to end up grabbing a pointer to the result. But golang's "for" 380 // implementation reuses the iteration parameter, and we end up reusing the same pointer. 381 result := &execResult.SuccessShards[i] 382 uniqueShards[result.Shard] = result 383 } 384 var wg sync.WaitGroup 385 // If all shards succeeded, wait (up to waitReplicasTimeout) for replicas to 386 // execute the schema change via replication. This is best-effort, meaning 387 // we still return overall success if the timeout expires. 388 concurrency := sync2.NewSemaphore(10, 0) 389 reloadCtx, cancel := context.WithTimeout(ctx, exec.waitReplicasTimeout) 390 defer cancel() 391 for _, result := range uniqueShards { 392 wg.Add(1) 393 go func(result *ShardResult) { 394 defer wg.Done() 395 exec.logger.Infof("ReloadSchema on shard: %s", result.Shard) 396 schematools.ReloadShard( 397 reloadCtx, 398 exec.ts, 399 exec.tmc, 400 exec.logger, 401 exec.keyspace, 402 result.Shard, 403 result.Position, 404 concurrency, 405 true, /* includePrimary */ 406 ) 407 }(result) 408 } 409 wg.Wait() 410 }() 411 412 for index, sql := range sqls { 413 // Attempt to renew lease: 414 if err := rl.Do(func() error { return topo.CheckKeyspaceLockedAndRenew(ctx, exec.keyspace) }); err != nil { 415 execResult.ExecutorErr = vterrors.Wrapf(err, "CheckKeyspaceLocked in ApplySchemaKeyspace %v", exec.keyspace).Error() 416 return &execResult 417 } 418 execResult.CurSQLIndex = index 419 if exec.hasProvidedUUIDs() { 420 providedUUID = exec.uuids[index] 421 } 422 executedAsynchronously, err := exec.executeSQL(ctx, sql, providedUUID, &execResult) 423 if err != nil { 424 execResult.ExecutorErr = err.Error() 425 return &execResult 426 } 427 if !executedAsynchronously { 428 syncOperationExecuted = true 429 } 430 if len(execResult.FailedShards) > 0 { 431 break 432 } 433 } 434 435 return &execResult 436 } 437 438 // executeOnAllTablets runs a query on all tablets, synchronously. This can be a long running operation. 439 func (exec *TabletExecutor) executeOnAllTablets(ctx context.Context, execResult *ExecuteResult, sql string, viaQueryService bool) { 440 var wg sync.WaitGroup 441 numOfPrimaryTablets := len(exec.tablets) 442 wg.Add(numOfPrimaryTablets) 443 errChan := make(chan ShardWithError, numOfPrimaryTablets) 444 successChan := make(chan ShardResult, numOfPrimaryTablets) 445 for _, tablet := range exec.tablets { 446 go func(tablet *topodatapb.Tablet) { 447 defer wg.Done() 448 exec.executeOneTablet(ctx, tablet, sql, viaQueryService, errChan, successChan) 449 }(tablet) 450 } 451 wg.Wait() 452 close(errChan) 453 close(successChan) 454 execResult.FailedShards = make([]ShardWithError, 0, len(errChan)) 455 execResult.SuccessShards = make([]ShardResult, 0, len(successChan)) 456 for e := range errChan { 457 execResult.FailedShards = append(execResult.FailedShards, e) 458 } 459 for r := range successChan { 460 execResult.SuccessShards = append(execResult.SuccessShards, r) 461 } 462 463 if len(execResult.FailedShards) > 0 { 464 return 465 } 466 } 467 468 func (exec *TabletExecutor) executeOneTablet( 469 ctx context.Context, 470 tablet *topodatapb.Tablet, 471 sql string, 472 viaQueryService bool, 473 errChan chan ShardWithError, 474 successChan chan ShardResult) { 475 476 var result *querypb.QueryResult 477 var err error 478 if viaQueryService { 479 result, err = exec.tmc.ExecuteQuery(ctx, tablet, &tabletmanagerdatapb.ExecuteQueryRequest{ 480 Query: []byte(sql), 481 MaxRows: 10, 482 }) 483 } else { 484 if exec.ddlStrategySetting != nil && exec.ddlStrategySetting.IsAllowZeroInDateFlag() { 485 // --allow-zero-in-date Applies to DDLs 486 stmt, err := sqlparser.Parse(string(sql)) 487 if err != nil { 488 errChan <- ShardWithError{Shard: tablet.Shard, Err: err.Error()} 489 return 490 } 491 if ddlStmt, ok := stmt.(sqlparser.DDLStatement); ok { 492 // Add comments directive to allow zero in date 493 const directive = `/*vt+ allowZeroInDate=true */` 494 ddlStmt.SetComments(ddlStmt.GetParsedComments().Prepend(directive)) 495 sql = sqlparser.String(ddlStmt) 496 } 497 } 498 result, err = exec.tmc.ExecuteFetchAsDba(ctx, tablet, false, &tabletmanagerdatapb.ExecuteFetchAsDbaRequest{ 499 Query: []byte(sql), 500 MaxRows: 10, 501 }) 502 } 503 if err != nil { 504 errChan <- ShardWithError{Shard: tablet.Shard, Err: err.Error()} 505 return 506 } 507 // Get a replication position that's guaranteed to be after the schema change 508 // was applied on the primary. 509 pos, err := exec.tmc.PrimaryPosition(ctx, tablet) 510 if err != nil { 511 errChan <- ShardWithError{ 512 Shard: tablet.Shard, 513 Err: fmt.Sprintf("couldn't get replication position after applying schema change on primary: %v", err), 514 } 515 return 516 } 517 successChan <- ShardResult{ 518 Shard: tablet.Shard, 519 Result: result, 520 Position: pos, 521 } 522 } 523 524 // Close clears tablet executor states 525 func (exec *TabletExecutor) Close() { 526 if !exec.isClosed { 527 exec.tablets = nil 528 exec.isClosed = true 529 } 530 } 531 532 var _ Executor = (*TabletExecutor)(nil)