vitess.io/vitess@v0.16.2/go/vt/wrangler/vexec.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package wrangler 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "math" 24 "strings" 25 "sync" 26 "time" 27 28 workflow2 "vitess.io/vitess/go/vt/vtctl/workflow" 29 30 "google.golang.org/protobuf/encoding/prototext" 31 32 "k8s.io/apimachinery/pkg/util/sets" 33 34 "vitess.io/vitess/go/mysql" 35 "vitess.io/vitess/go/sqltypes" 36 "vitess.io/vitess/go/vt/binlog/binlogplayer" 37 "vitess.io/vitess/go/vt/concurrency" 38 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 39 querypb "vitess.io/vitess/go/vt/proto/query" 40 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 41 "vitess.io/vitess/go/vt/sqlparser" 42 "vitess.io/vitess/go/vt/topo" 43 vtctldvexec "vitess.io/vitess/go/vt/vtctl/workflow/vexec" // renamed to avoid a collision with the vexec struct in this package 44 "vitess.io/vitess/go/vt/vterrors" 45 ) 46 47 const ( 48 vexecTableQualifier = "_vt" 49 vreplicationTableName = "vreplication" 50 sqlVReplicationDelete = "delete from _vt.vreplication" 51 ) 52 53 // vexec is the construct by which we run a query against backend shards. vexec is created by user-facing 54 // interface, like vtctl or vtgate. 55 // vexec parses, analyzes and plans th equery, and maintains state of each such step's result. 56 type vexec struct { 57 ctx context.Context 58 workflow string 59 keyspace string 60 // query is vexec's input 61 query string 62 // stmt is parsed from the query 63 stmt sqlparser.Statement 64 // tableName is extracted from the query, and used to determine the plan 65 tableName string 66 // planner will plan and execute a (possibly rewritten) query on backend shards 67 planner vexecPlanner 68 // plannedQuery is the result of supplementing original query with extra conditionals 69 plannedQuery string 70 71 wr *Wrangler 72 73 primaries []*topo.TabletInfo 74 } 75 76 func newVExec(ctx context.Context, workflow, keyspace, query string, wr *Wrangler) *vexec { 77 return &vexec{ 78 ctx: ctx, 79 workflow: workflow, 80 keyspace: keyspace, 81 query: query, 82 wr: wr, 83 } 84 } 85 86 // QueryResultForRowsAffected aggregates results into row-type results (fields + values) 87 func (wr *Wrangler) QueryResultForRowsAffected(results map[*topo.TabletInfo]*sqltypes.Result) *sqltypes.Result { 88 var qr = &sqltypes.Result{} 89 qr.Fields = []*querypb.Field{{ 90 Name: "Tablet", 91 Type: sqltypes.VarBinary, 92 }, { 93 Name: "RowsAffected", 94 Type: sqltypes.Uint64, 95 }} 96 var row2 []sqltypes.Value 97 for tablet, result := range results { 98 row2 = nil 99 row2 = append(row2, sqltypes.NewVarBinary(tablet.AliasString())) 100 row2 = append(row2, sqltypes.NewUint64(result.RowsAffected)) 101 qr.Rows = append(qr.Rows, row2) 102 } 103 return qr 104 } 105 106 // QueryResultForTabletResults aggregates given results into a "rows-affected" type result (no row data) 107 func (wr *Wrangler) QueryResultForTabletResults(results map[*topo.TabletInfo]*sqltypes.Result) *sqltypes.Result { 108 var qr = &sqltypes.Result{} 109 defaultFields := []*querypb.Field{{ 110 Name: "Tablet", 111 Type: sqltypes.VarBinary, 112 }} 113 var row2 []sqltypes.Value 114 for tablet, result := range results { 115 if qr.Fields == nil { 116 qr.Fields = append(qr.Fields, defaultFields...) 117 qr.Fields = append(qr.Fields, result.Fields...) 118 } 119 for _, row := range result.Rows { 120 row2 = nil 121 row2 = append(row2, sqltypes.NewVarBinary(tablet.AliasString())) 122 row2 = append(row2, row...) 123 qr.Rows = append(qr.Rows, row2) 124 } 125 } 126 return qr 127 } 128 129 // VExecResult runs VExec and the naggregates the results into a single *sqltypes.Result 130 func (wr *Wrangler) VExecResult(ctx context.Context, workflow, keyspace, query string, dryRun bool) (qr *sqltypes.Result, err error) { 131 132 results, err := wr.VExec(ctx, workflow, keyspace, query, dryRun) 133 if err != nil { 134 return nil, err 135 } 136 if dryRun { 137 return nil, nil 138 } 139 var numFields int 140 for _, result := range results { 141 numFields = len(result.Fields) 142 break 143 } 144 if numFields != 0 { 145 qr = wr.QueryResultForTabletResults(results) 146 } else { 147 qr = wr.QueryResultForRowsAffected(results) 148 } 149 return qr, nil 150 } 151 152 // VExec executes queries on a table on all primaries in the target keyspace of the workflow 153 func (wr *Wrangler) VExec(ctx context.Context, workflow, keyspace, query string, dryRun bool) (map[*topo.TabletInfo]*sqltypes.Result, error) { 154 if wr.VExecFunc != nil { 155 return wr.VExecFunc(ctx, workflow, keyspace, query, dryRun) 156 } 157 results, err := wr.runVexec(ctx, workflow, keyspace, query, dryRun) 158 retResults := make(map[*topo.TabletInfo]*sqltypes.Result) 159 for tablet, result := range results { 160 retResults[tablet] = sqltypes.Proto3ToResult(result) 161 } 162 return retResults, err 163 } 164 165 // runVexec is the main function that runs a dry or wet execution of 'query' on backend shards. 166 func (wr *Wrangler) runVexec(ctx context.Context, workflow, keyspace, query string, dryRun bool) (map[*topo.TabletInfo]*querypb.QueryResult, error) { 167 vx := newVExec(ctx, workflow, keyspace, query, wr) 168 169 if err := vx.getPrimaries(); err != nil { 170 return nil, err 171 } 172 plan, err := vx.parseAndPlan(ctx) 173 if err != nil { 174 return nil, err 175 } 176 vx.plannedQuery = plan.parsedQuery.Query 177 if dryRun { 178 return nil, vx.outputDryRunInfo(ctx) 179 } 180 return vx.exec() 181 } 182 183 // parseAndPlan parses and analyses the query, then generates a plan 184 func (vx *vexec) parseAndPlan(ctx context.Context) (plan *vexecPlan, err error) { 185 if err := vx.parseQuery(); err != nil { 186 return nil, err 187 } 188 if err := vx.getPlanner(ctx); err != nil { 189 return nil, err 190 } 191 plan, err = vx.buildPlan(ctx) 192 if err != nil { 193 return nil, err 194 } 195 return plan, nil 196 } 197 198 func (vx *vexec) outputDryRunInfo(ctx context.Context) error { 199 return vx.planner.dryRun(ctx) 200 } 201 202 // exec runs our planned query on backend shard primaries. It collects query results from all 203 // shards and returns an aggregate (UNION ALL -like) result. 204 func (vx *vexec) exec() (map[*topo.TabletInfo]*querypb.QueryResult, error) { 205 var wg sync.WaitGroup 206 allErrors := &concurrency.AllErrorRecorder{} 207 results := make(map[*topo.TabletInfo]*querypb.QueryResult) 208 var mu sync.Mutex 209 ctx, cancel := context.WithTimeout(vx.ctx, 10*time.Second) 210 defer cancel() 211 for _, primary := range vx.primaries { 212 wg.Add(1) 213 go func(ctx context.Context, primary *topo.TabletInfo) { 214 defer wg.Done() 215 qr, err := vx.planner.exec(ctx, primary.Alias, vx.plannedQuery) 216 if err != nil { 217 allErrors.RecordError(err) 218 } else { 219 // If we deleted a workflow then let's make a best effort attempt to clean 220 // up any related data. 221 if vx.query == sqlVReplicationDelete { 222 vx.wr.deleteWorkflowVDiffData(ctx, primary.Tablet, vx.workflow) 223 vx.wr.optimizeCopyStateTable(primary.Tablet) 224 } 225 mu.Lock() 226 results[primary] = qr 227 mu.Unlock() 228 } 229 }(ctx, primary) 230 } 231 wg.Wait() 232 return results, allErrors.AggrError(vterrors.Aggregate) 233 } 234 235 // parseQuery parses the input query 236 func (vx *vexec) parseQuery() (err error) { 237 if vx.stmt, err = sqlparser.Parse(vx.query); err != nil { 238 return err 239 } 240 if vx.tableName, err = extractTableName(vx.stmt); err != nil { 241 return err 242 } 243 return nil 244 } 245 246 // getPrimaries identifies primary tablet for all shards relevant to our keyspace 247 func (vx *vexec) getPrimaries() error { 248 var err error 249 shards, err := vx.wr.ts.GetShardNames(vx.ctx, vx.keyspace) 250 if err != nil { 251 return err 252 } 253 if len(shards) == 0 { 254 return fmt.Errorf("no shards found in keyspace %s", vx.keyspace) 255 } 256 var allPrimaries []*topo.TabletInfo 257 var primary *topo.TabletInfo 258 for _, shard := range shards { 259 if primary, err = vx.getPrimaryForShard(shard); err != nil { 260 return err 261 } 262 if primary == nil { 263 return fmt.Errorf("no primary found for shard %s", shard) 264 } 265 allPrimaries = append(allPrimaries, primary) 266 } 267 vx.primaries = allPrimaries 268 return nil 269 } 270 271 func (vx *vexec) getPrimaryForShard(shard string) (*topo.TabletInfo, error) { 272 si, err := vx.wr.ts.GetShard(vx.ctx, vx.keyspace, shard) 273 if err != nil { 274 return nil, err 275 } 276 if si.PrimaryAlias == nil { 277 return nil, fmt.Errorf("no primary found for shard %s", shard) 278 } 279 primary, err := vx.wr.ts.GetTablet(vx.ctx, si.PrimaryAlias) 280 if err != nil { 281 return nil, err 282 } 283 if primary == nil { 284 return nil, fmt.Errorf("could not get tablet for %s:%s", vx.keyspace, si.PrimaryAlias) 285 } 286 return primary, nil 287 } 288 289 func (wr *Wrangler) convertQueryResultToSQLTypesResult(results map[*topo.TabletInfo]*querypb.QueryResult) map[*topo.TabletInfo]*sqltypes.Result { 290 retResults := make(map[*topo.TabletInfo]*sqltypes.Result) 291 for tablet, result := range results { 292 retResults[tablet] = sqltypes.Proto3ToResult(result) 293 } 294 return retResults 295 } 296 297 // WorkflowAction can start/stop/delete or list streams in _vt.vreplication on all primaries in the target keyspace of the workflow. 298 func (wr *Wrangler) WorkflowAction(ctx context.Context, workflow, keyspace, action string, dryRun bool) (map[*topo.TabletInfo]*sqltypes.Result, error) { 299 300 if action == "show" { 301 replStatus, err := wr.ShowWorkflow(ctx, workflow, keyspace) 302 if err != nil { 303 return nil, err 304 } 305 err = dumpStreamListAsJSON(replStatus, wr) 306 return nil, err 307 } else if action == "listall" { 308 workflows, err := wr.ListAllWorkflows(ctx, keyspace, false) 309 if err != nil { 310 return nil, err 311 } 312 wr.printWorkflowList(keyspace, workflows) 313 return nil, err 314 } 315 results, err := wr.execWorkflowAction(ctx, workflow, keyspace, action, dryRun) 316 return wr.convertQueryResultToSQLTypesResult(results), err 317 } 318 319 func (wr *Wrangler) getWorkflowActionQuery(action string) (string, error) { 320 var query string 321 updateSQL := "update _vt.vreplication set state = %s" 322 switch action { 323 case "stop": 324 query = fmt.Sprintf(updateSQL, encodeString("Stopped")) 325 case "start": 326 query = fmt.Sprintf(updateSQL, encodeString("Running")) 327 case "delete": 328 query = sqlVReplicationDelete 329 default: 330 return "", fmt.Errorf("invalid action found: %s", action) 331 } 332 return query, nil 333 } 334 335 func (wr *Wrangler) execWorkflowAction(ctx context.Context, workflow, keyspace, action string, dryRun bool) (map[*topo.TabletInfo]*querypb.QueryResult, error) { 336 query, err := wr.getWorkflowActionQuery(action) 337 if err != nil { 338 return nil, err 339 } 340 return wr.runVexec(ctx, workflow, keyspace, query, dryRun) 341 } 342 343 // WorkflowTagAction sets or clears the tags for a workflow in a keyspace 344 func (wr *Wrangler) WorkflowTagAction(ctx context.Context, keyspace string, workflow string, tags string) (map[*topo.TabletInfo]*sqltypes.Result, error) { 345 query := fmt.Sprintf("update _vt.vreplication set tags = %s", encodeString(tags)) 346 results, err := wr.runVexec(ctx, workflow, keyspace, query, false) 347 return wr.convertQueryResultToSQLTypesResult(results), err 348 } 349 350 // ReplicationStatusResult represents the result of trying to get the replication status for a given workflow. 351 type ReplicationStatusResult struct { 352 // Workflow represents the name of the workflow relevant to the related replication statuses. 353 Workflow string 354 // SourceLocation represents the keyspace and shards that we are vreplicating from. 355 SourceLocation ReplicationLocation 356 // TargetLocation represents the keyspace and shards that we are vreplicating into. 357 TargetLocation ReplicationLocation 358 // MaxVReplicationLag represents the lag between the current time and the last time an event was seen from the 359 // source shards. This defines the "liveness" of the source streams. This will be high only if one of the source streams 360 // is no longer running (say, due to a network partition , primary not being available, or a vstreamer failure) 361 // MaxVReplicationTransactionLag (see below) represents the "mysql" replication lag, i.e. how far behind we are in 362 // terms of data replication from the source to the target. 363 MaxVReplicationLag int64 364 // MaxVReplicationTransactionLag represents the lag across all shards, between the current time and the timestamp 365 // of the last transaction OR heartbeat timestamp (if there have been no writes to replicate from the source). 366 MaxVReplicationTransactionLag int64 367 // Frozen is true if this workflow has been deemed complete and is in a limbo "frozen" state (Message=="FROZEN") 368 Frozen bool 369 // Statuses is a map of <shard>/<primary tablet alias> : ShardReplicationStatus (for the given shard). 370 ShardStatuses map[string]*ShardReplicationStatus 371 // SourceTimeZone represents the time zone provided to the workflow, only set if not UTC 372 SourceTimeZone string 373 // TargetTimeZone is set to the original SourceTimeZone, in reverse streams, if it was provided to the workflow 374 TargetTimeZone string 375 // OnDDL specifies the action to be taken when a DDL is encountered. 376 OnDDL string `json:"OnDDL,omitempty"` 377 // DeferSecondaryKeys specifies whether to defer the creation of secondary keys. 378 DeferSecondaryKeys bool `json:"DeferSecondaryKeys,omitempty"` 379 } 380 381 // ReplicationLocation represents a location that data is either replicating from, or replicating into. 382 type ReplicationLocation struct { 383 Keyspace string 384 Shards []string 385 } 386 387 // ShardReplicationStatus holds relevant vreplication related info for the given shard. 388 type ShardReplicationStatus struct { 389 // PrimaryReplicationStatuses represents all of the replication statuses for the primary tablets in the given shard. 390 PrimaryReplicationStatuses []*ReplicationStatus 391 // TabletControls represents the tablet controls for the tablets in the shard. 392 TabletControls []*topodatapb.Shard_TabletControl 393 // PrimaryIsServing indicates whether the primary tablet of the given shard is currently serving write traffic. 394 PrimaryIsServing bool 395 } 396 397 type copyState struct { 398 Table string 399 LastPK string 400 } 401 402 // ReplicationStatus includes data from the _vt.vreplication table, along with other useful relevant data. 403 type ReplicationStatus struct { 404 // Shard represents the relevant shard name. 405 Shard string 406 // Tablet is the tablet alias that the ReplicationStatus came from. 407 Tablet string 408 // ID represents the id column from the _vt.vreplication table. 409 ID int64 410 // Bls represents the BinlogSource. 411 Bls *binlogdatapb.BinlogSource 412 // Pos represents the pos column from the _vt.vreplication table. 413 Pos string 414 // StopPos represents the stop_pos column from the _vt.vreplication table. 415 StopPos string 416 // State represents the state column from the _vt.vreplication table. 417 State string 418 // DbName represents the db_name column from the _vt.vreplication table. 419 DBName string 420 // TransactionTimestamp represents the transaction_timestamp column from the _vt.vreplication table. 421 TransactionTimestamp int64 422 // TimeUpdated represents the time_updated column from the _vt.vreplication table. 423 TimeUpdated int64 424 // TimeHeartbeat represents the time_heartbeat column from the _vt.vreplication table. 425 TimeHeartbeat int64 426 // TimeThrottled represents the time_throttled column from the _vt.vreplication table. 427 TimeThrottled int64 428 // ComponentThrottled represents the component_throttled column from the _vt.vreplication table. 429 ComponentThrottled string 430 // Message represents the message column from the _vt.vreplication table. 431 Message string 432 // Tags contain the tags specified for this stream 433 Tags string 434 WorkflowType string 435 WorkflowSubType string 436 // CopyState represents the rows from the _vt.copy_state table. 437 CopyState []copyState 438 // sourceTimeZone represents the time zone of each stream, only set if not UTC 439 sourceTimeZone string 440 // targetTimeZone is set to the sourceTimeZone of the forward stream, if it was provided in the workflow 441 targetTimeZone string 442 deferSecondaryKeys bool 443 } 444 445 func (wr *Wrangler) getReplicationStatusFromRow(ctx context.Context, row sqltypes.RowNamedValues, primary *topo.TabletInfo) (*ReplicationStatus, string, error) { 446 var err error 447 var id, timeUpdated, transactionTimestamp, timeHeartbeat, timeThrottled int64 448 var state, dbName, pos, stopPos, message, tags, componentThrottled string 449 var workflowType, workflowSubType int64 450 var deferSecondaryKeys bool 451 var bls binlogdatapb.BinlogSource 452 var mpos mysql.Position 453 454 id, err = row.ToInt64("id") 455 if err != nil { 456 return nil, "", err 457 } 458 rowBytes, err := row.ToBytes("source") 459 if err != nil { 460 return nil, "", err 461 } 462 if err := prototext.Unmarshal(rowBytes, &bls); err != nil { 463 return nil, "", err 464 } 465 466 // gtid in the pos column can be compressed, so check and possibly uncompress 467 pos, err = row.ToString("pos") 468 if err != nil { 469 return nil, "", err 470 } 471 if pos != "" { 472 mpos, err = binlogplayer.DecodePosition(pos) 473 if err != nil { 474 return nil, "", err 475 } 476 pos = mpos.String() 477 } 478 stopPos, err = row.ToString("stop_pos") 479 if err != nil { 480 return nil, "", err 481 } 482 state, err = row.ToString("state") 483 if err != nil { 484 return nil, "", err 485 } 486 dbName, err = row.ToString("db_name") 487 if err != nil { 488 return nil, "", err 489 } 490 timeUpdated, err = row.ToInt64("time_updated") 491 if err != nil { 492 return nil, "", err 493 } 494 transactionTimestamp, err = row.ToInt64("transaction_timestamp") 495 if err != nil { 496 return nil, "", err 497 } 498 timeHeartbeat, err = row.ToInt64("time_heartbeat") 499 if err != nil { 500 return nil, "", err 501 } 502 timeThrottled, err = row.ToInt64("time_throttled") 503 if err != nil { 504 return nil, "", err 505 } 506 componentThrottled, err = row.ToString("component_throttled") 507 if err != nil { 508 return nil, "", err 509 } 510 message, err = row.ToString("message") 511 if err != nil { 512 return nil, "", err 513 } 514 tags, err = row.ToString("tags") 515 if err != nil { 516 return nil, "", err 517 } 518 workflowType, _ = row.ToInt64("workflow_type") 519 workflowSubType, _ = row.ToInt64("workflow_sub_type") 520 deferSecondaryKeys, _ = row.ToBool("defer_secondary_keys") 521 522 status := &ReplicationStatus{ 523 Shard: primary.Shard, 524 Tablet: primary.AliasString(), 525 ID: id, 526 Bls: &bls, 527 Pos: pos, 528 StopPos: stopPos, 529 State: state, 530 DBName: dbName, 531 TransactionTimestamp: transactionTimestamp, 532 TimeUpdated: timeUpdated, 533 TimeHeartbeat: timeHeartbeat, 534 TimeThrottled: timeThrottled, 535 ComponentThrottled: componentThrottled, 536 Message: message, 537 Tags: tags, 538 sourceTimeZone: bls.SourceTimeZone, 539 targetTimeZone: bls.TargetTimeZone, 540 WorkflowType: binlogdatapb.VReplicationWorkflowType_name[int32(workflowType)], 541 WorkflowSubType: binlogdatapb.VReplicationWorkflowSubType_name[int32(workflowSubType)], 542 deferSecondaryKeys: deferSecondaryKeys, 543 } 544 status.CopyState, err = wr.getCopyState(ctx, primary, id) 545 if err != nil { 546 return nil, "", err 547 } 548 549 status.State = updateState(message, status.State, status.CopyState, timeUpdated) 550 return status, bls.Keyspace, nil 551 } 552 553 func (wr *Wrangler) getStreams(ctx context.Context, workflow, keyspace string) (*ReplicationStatusResult, error) { 554 var rsr ReplicationStatusResult 555 rsr.ShardStatuses = make(map[string]*ShardReplicationStatus) 556 rsr.Workflow = workflow 557 var results map[*topo.TabletInfo]*querypb.QueryResult 558 query := `select 559 id, 560 source, 561 pos, 562 stop_pos, 563 max_replication_lag, 564 state, 565 db_name, 566 time_updated, 567 transaction_timestamp, 568 time_heartbeat, 569 time_throttled, 570 component_throttled, 571 message, 572 tags, 573 workflow_type, 574 workflow_sub_type, 575 defer_secondary_keys 576 from _vt.vreplication` 577 results, err := wr.runVexec(ctx, workflow, keyspace, query, false) 578 if err != nil { 579 return nil, err 580 } 581 582 // We set a topo timeout since we contact topo for the shard record. 583 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 584 defer cancel() 585 var sourceKeyspace string 586 sourceShards := sets.New[string]() 587 targetShards := sets.New[string]() 588 for primary, result := range results { 589 var rsrStatus []*ReplicationStatus 590 nqr := sqltypes.Proto3ToResult(result).Named() 591 if len(nqr.Rows) == 0 { 592 continue 593 } 594 for _, row := range nqr.Rows { 595 status, sk, err := wr.getReplicationStatusFromRow(ctx, row, primary) 596 if err != nil { 597 return nil, err 598 } 599 rsr.SourceTimeZone = status.sourceTimeZone 600 rsr.TargetTimeZone = status.targetTimeZone 601 sourceKeyspace = sk 602 sourceShards.Insert(status.Bls.Shard) 603 rsrStatus = append(rsrStatus, status) 604 605 // Only show the OnDDL setting if it's not the default of 0/IGNORE. 606 if status.Bls.OnDdl != binlogdatapb.OnDDLAction_IGNORE { 607 rsr.OnDDL = binlogdatapb.OnDDLAction_name[int32(status.Bls.OnDdl)] 608 // Unset it in the proto so that we do not show the 609 // low-level enum int in the JSON marshalled output 610 // as e.g. `"on_ddl": 1` is not meaningful or helpful 611 // for the end user and we instead show the mapped 612 // string value using the top-level "OnDDL" json key. 613 // Note: this is done here only because golang does 614 // not currently support setting json tags in proto 615 // declarations so that I could request it always be 616 // ommitted from marshalled JSON output: 617 // https://github.com/golang/protobuf/issues/52 618 status.Bls.OnDdl = 0 619 } 620 621 rsr.DeferSecondaryKeys = status.deferSecondaryKeys 622 623 if status.Message == workflow2.Frozen { 624 rsr.Frozen = true 625 } 626 627 // MaxVReplicationLag is the time since the last event was processed from the source 628 // The last event can be an actual binlog event or a heartbeat in case no binlog events occur within (default) 1 second 629 timeUpdated := time.Unix(status.TimeUpdated, 0) 630 replicationLag := time.Since(timeUpdated) 631 if replicationLag.Seconds() > float64(rsr.MaxVReplicationLag) { 632 rsr.MaxVReplicationLag = int64(replicationLag.Seconds()) 633 } 634 635 // MaxVReplicationTransactionLag estimates the actual lag between the source and the target 636 // If we are still processing source events it is the difference b/w current time and the timestamp of the last event 637 // If heartbeats are more recent than the last event, then the lag is the time since the last heartbeat as 638 // there can be an actual event immediately after the heartbeat, but which has not yet 639 // been processed on the target 640 // We don't allow switching during the copy phase, so in that case we just return a large lag. 641 // All timestamps are in seconds since epoch 642 lastTransactionTimestamp := status.TransactionTimestamp 643 lastHeartbeatTime := status.TimeHeartbeat 644 if status.State == "Copying" { 645 rsr.MaxVReplicationTransactionLag = math.MaxInt64 646 } else { 647 if lastTransactionTimestamp == 0 /* no new events after copy */ || 648 lastHeartbeatTime > lastTransactionTimestamp /* no recent transactions, so all caught up */ { 649 650 lastTransactionTimestamp = lastHeartbeatTime 651 } 652 now := time.Now().Unix() /*seconds since epoch*/ 653 transactionReplicationLag := now - lastTransactionTimestamp 654 if transactionReplicationLag > rsr.MaxVReplicationTransactionLag { 655 rsr.MaxVReplicationTransactionLag = transactionReplicationLag 656 } 657 } 658 } 659 si, err := wr.ts.GetShard(ctx, keyspace, primary.Shard) 660 if err != nil { 661 return nil, err 662 } 663 targetShards.Insert(si.ShardName()) 664 rsr.ShardStatuses[fmt.Sprintf("%s/%s", primary.Shard, primary.AliasString())] = &ShardReplicationStatus{ 665 PrimaryReplicationStatuses: rsrStatus, 666 TabletControls: si.TabletControls, 667 PrimaryIsServing: si.IsPrimaryServing, 668 } 669 } 670 rsr.SourceLocation = ReplicationLocation{ 671 Keyspace: sourceKeyspace, 672 Shards: sets.List(sourceShards), 673 } 674 rsr.TargetLocation = ReplicationLocation{ 675 Keyspace: keyspace, 676 Shards: sets.List(targetShards), 677 } 678 679 return &rsr, nil 680 } 681 682 // ListActiveWorkflows will return a list of all active workflows for the given keyspace. 683 func (wr *Wrangler) ListActiveWorkflows(ctx context.Context, keyspace string) ([]string, error) { 684 return wr.ListAllWorkflows(ctx, keyspace, true) 685 } 686 687 // ListAllWorkflows will return a list of all workflows (Running and Stopped) for the given keyspace. 688 func (wr *Wrangler) ListAllWorkflows(ctx context.Context, keyspace string, active bool) ([]string, error) { 689 where := "" 690 if active { 691 where = " where state <> 'Stopped'" 692 } 693 query := "select distinct workflow from _vt.vreplication" + where 694 vx := vtctldvexec.NewVExec(keyspace, "", wr.ts, wr.tmc) 695 results, err := vx.QueryContext(ctx, query) 696 if err != nil { 697 return nil, err 698 } 699 workflowsSet := sets.New[string]() 700 for _, result := range results { 701 if len(result.Rows) == 0 { 702 continue 703 } 704 qr := sqltypes.Proto3ToResult(result) 705 for _, row := range qr.Rows { 706 for _, value := range row { 707 // Even though we query for distinct, we must de-dup because we query per primary tablet. 708 workflowsSet.Insert(value.ToString()) 709 } 710 } 711 } 712 workflows := sets.List(workflowsSet) 713 return workflows, nil 714 } 715 716 // ShowWorkflow will return all of the relevant replication related information for the given workflow. 717 func (wr *Wrangler) ShowWorkflow(ctx context.Context, workflow, keyspace string) (*ReplicationStatusResult, error) { 718 replStatus, err := wr.getStreams(ctx, workflow, keyspace) 719 if err != nil { 720 return nil, err 721 } 722 if len(replStatus.ShardStatuses) == 0 { 723 return nil, fmt.Errorf("no streams found for workflow %s in keyspace %s", workflow, keyspace) 724 } 725 726 return replStatus, nil 727 } 728 729 func updateState(message, state string, cs []copyState, timeUpdated int64) string { 730 if strings.Contains(strings.ToLower(message), "error") { 731 state = "Error" 732 } else if state == "Running" && len(cs) > 0 { 733 state = "Copying" 734 } else if state == "Running" && int64(time.Now().Second())-timeUpdated > 10 /* seconds */ { 735 state = "Lagging" 736 } 737 return state 738 } 739 740 func dumpStreamListAsJSON(replStatus *ReplicationStatusResult, wr *Wrangler) error { 741 text, err := json.MarshalIndent(replStatus, "", "\t") 742 if err != nil { 743 return err 744 } 745 wr.Logger().Printf("%s\n", text) 746 return nil 747 } 748 749 func (wr *Wrangler) printWorkflowList(keyspace string, workflows []string) { 750 list := strings.Join(workflows, ", ") 751 if list == "" { 752 wr.Logger().Printf("No workflows found in keyspace %s\n", keyspace) 753 return 754 } 755 wr.Logger().Printf("Following workflow(s) found in keyspace %s: %v\n", keyspace, list) 756 } 757 758 func (wr *Wrangler) getCopyState(ctx context.Context, tablet *topo.TabletInfo, id int64) ([]copyState, error) { 759 var cs []copyState 760 query := fmt.Sprintf("select table_name, lastpk from _vt.copy_state where vrepl_id = %d and id in (select max(id) from _vt.copy_state where vrepl_id = %d group by vrepl_id, table_name)", 761 id, id) 762 qr, err := wr.VReplicationExec(ctx, tablet.Alias, query) 763 if err != nil { 764 return nil, err 765 } 766 767 result := sqltypes.Proto3ToResult(qr) 768 if result != nil { 769 for _, row := range result.Rows { 770 // These fields are varbinary, but close enough 771 table := row[0].ToString() 772 lastPK := row[1].ToString() 773 copyState := copyState{ 774 Table: table, 775 LastPK: lastPK, 776 } 777 cs = append(cs, copyState) 778 } 779 } 780 781 return cs, nil 782 }