vitess.io/vitess@v0.16.2/go/vt/vtctl/workflow/server.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package workflow 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "sort" 24 "strings" 25 "sync" 26 "time" 27 28 "google.golang.org/protobuf/encoding/prototext" 29 "k8s.io/apimachinery/pkg/util/sets" 30 31 "vitess.io/vitess/go/sqltypes" 32 "vitess.io/vitess/go/trace" 33 "vitess.io/vitess/go/vt/concurrency" 34 "vitess.io/vitess/go/vt/key" 35 "vitess.io/vitess/go/vt/log" 36 "vitess.io/vitess/go/vt/topo" 37 "vitess.io/vitess/go/vt/vtctl/workflow/vexec" 38 "vitess.io/vitess/go/vt/vtgate/evalengine" 39 "vitess.io/vitess/go/vt/vttablet/tmclient" 40 41 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 42 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 43 vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata" 44 "vitess.io/vitess/go/vt/proto/vttime" 45 ) 46 47 var ( 48 // ErrInvalidWorkflow is a catchall error type for conditions that should be 49 // impossible when operating on a workflow. 50 ErrInvalidWorkflow = errors.New("invalid workflow") 51 // ErrMultipleSourceKeyspaces occurs when a workflow somehow has multiple 52 // source keyspaces across different shard primaries. This should be 53 // impossible. 54 ErrMultipleSourceKeyspaces = errors.New("multiple source keyspaces for a single workflow") 55 // ErrMultipleTargetKeyspaces occurs when a workflow somehow has multiple 56 // target keyspaces across different shard primaries. This should be 57 // impossible. 58 ErrMultipleTargetKeyspaces = errors.New("multiple target keyspaces for a single workflow") 59 ) 60 61 // Server provides an API to work with Vitess workflows, like vreplication 62 // workflows (MoveTables, Reshard, etc) and schema migration workflows. 63 // 64 // NB: This is in alpha, and you probably don't want to depend on it (yet!). 65 // Currently, it provides only a read-only API to vreplication workflows. Write 66 // actions on vreplication workflows, and schema migration workflows entirely, 67 // are not yet supported, but planned. 68 type Server struct { 69 ts *topo.Server 70 tmc tmclient.TabletManagerClient 71 } 72 73 // NewServer returns a new server instance with the given topo.Server and 74 // TabletManagerClient. 75 func NewServer(ts *topo.Server, tmc tmclient.TabletManagerClient) *Server { 76 return &Server{ 77 ts: ts, 78 tmc: tmc, 79 } 80 } 81 82 // CheckReshardingJournalExistsOnTablet returns the journal (or an empty 83 // journal) and a boolean to indicate if the resharding_journal table exists on 84 // the given tablet. 85 // 86 // (TODO:@ajm188) This should not be part of the final public API, and should 87 // be un-exported after all places in package wrangler that call this have been 88 // migrated over. 89 func (s *Server) CheckReshardingJournalExistsOnTablet(ctx context.Context, tablet *topodatapb.Tablet, migrationID int64) (*binlogdatapb.Journal, bool, error) { 90 var ( 91 journal binlogdatapb.Journal 92 exists bool 93 ) 94 95 query := fmt.Sprintf("select val from _vt.resharding_journal where id=%v", migrationID) 96 p3qr, err := s.tmc.VReplicationExec(ctx, tablet, query) 97 if err != nil { 98 return nil, false, err 99 } 100 101 if len(p3qr.Rows) != 0 { 102 qr := sqltypes.Proto3ToResult(p3qr) 103 qrBytes, err := qr.Rows[0][0].ToBytes() 104 if err != nil { 105 return nil, false, err 106 } 107 if err := prototext.Unmarshal(qrBytes, &journal); err != nil { 108 return nil, false, err 109 } 110 111 exists = true 112 } 113 114 return &journal, exists, nil 115 } 116 117 // GetCellsWithShardReadsSwitched returns the topo cells partitioned into two 118 // slices: one with the cells where shard reads have been switched for the given 119 // tablet type and one with the cells where shard reads have not been switched 120 // for the given tablet type. 121 // 122 // This function is for use in Reshard, and "switched reads" is defined as if 123 // any one of the source shards has the query service disabled in its tablet 124 // control record. 125 func (s *Server) GetCellsWithShardReadsSwitched( 126 ctx context.Context, 127 keyspace string, 128 si *topo.ShardInfo, 129 tabletType topodatapb.TabletType, 130 ) (cellsSwitched []string, cellsNotSwitched []string, err error) { 131 cells, err := s.ts.GetCellInfoNames(ctx) 132 if err != nil { 133 return nil, nil, err 134 } 135 136 for _, cell := range cells { 137 srvks, err := s.ts.GetSrvKeyspace(ctx, cell, keyspace) 138 if err != nil { 139 return nil, nil, err 140 } 141 142 // Checking one shard is enough. 143 var ( 144 shardServedTypes []string 145 found bool 146 noControls bool 147 ) 148 149 for _, partition := range srvks.GetPartitions() { 150 if tabletType != partition.GetServedType() { 151 continue 152 } 153 154 // If reads and writes are both switched it is possible that the 155 // shard is not in the partition table. 156 for _, shardReference := range partition.GetShardReferences() { 157 if key.KeyRangeEqual(shardReference.GetKeyRange(), si.GetKeyRange()) { 158 found = true 159 break 160 } 161 } 162 163 // It is possible that there are no tablet controls if the target 164 // shards are not yet serving, or once reads and writes are both 165 // switched. 166 if len(partition.GetShardTabletControls()) == 0 { 167 noControls = true 168 break 169 } 170 171 for _, tabletControl := range partition.GetShardTabletControls() { 172 if key.KeyRangeEqual(tabletControl.GetKeyRange(), si.GetKeyRange()) { 173 if !tabletControl.GetQueryServiceDisabled() { 174 shardServedTypes = append(shardServedTypes, si.ShardName()) 175 } 176 177 break 178 } 179 } 180 } 181 182 if found && (len(shardServedTypes) > 0 || noControls) { 183 cellsNotSwitched = append(cellsNotSwitched, cell) 184 } else { 185 cellsSwitched = append(cellsSwitched, cell) 186 } 187 } 188 189 return cellsSwitched, cellsNotSwitched, nil 190 } 191 192 // GetCellsWithTableReadsSwitched returns the topo cells partitioned into two 193 // slices: one with the cells where table reads have been switched for the given 194 // tablet type and one with the cells where table reads have not been switched 195 // for the given tablet type. 196 // 197 // This function is for use in MoveTables, and "switched reads" is defined as if 198 // the routing rule for a (table, tablet_type) is pointing to the target 199 // keyspace. 200 func (s *Server) GetCellsWithTableReadsSwitched( 201 ctx context.Context, 202 keyspace string, 203 table string, 204 tabletType topodatapb.TabletType, 205 ) (cellsSwitched []string, cellsNotSwitched []string, err error) { 206 cells, err := s.ts.GetCellInfoNames(ctx) 207 if err != nil { 208 return nil, nil, err 209 } 210 211 getKeyspace := func(ruleTarget string) (string, error) { 212 arr := strings.Split(ruleTarget, ".") 213 if len(arr) != 2 { 214 return "", fmt.Errorf("rule target is not correctly formatted: %s", ruleTarget) 215 } 216 217 return arr[0], nil 218 } 219 220 for _, cell := range cells { 221 srvVSchema, err := s.ts.GetSrvVSchema(ctx, cell) 222 if err != nil { 223 return nil, nil, err 224 } 225 226 var ( 227 found bool 228 switched bool 229 ) 230 231 for _, rule := range srvVSchema.RoutingRules.Rules { 232 ruleName := fmt.Sprintf("%s.%s@%s", keyspace, table, strings.ToLower(tabletType.String())) 233 if rule.FromTable == ruleName { 234 found = true 235 236 for _, to := range rule.ToTables { 237 ks, err := getKeyspace(to) 238 if err != nil { 239 log.Errorf(err.Error()) 240 return nil, nil, err 241 } 242 243 if ks == keyspace { 244 switched = true 245 break // if one table in the workflow switched, we are done. 246 } 247 } 248 } 249 250 if found { 251 break 252 } 253 } 254 255 if switched { 256 cellsSwitched = append(cellsSwitched, cell) 257 } else { 258 cellsNotSwitched = append(cellsNotSwitched, cell) 259 } 260 } 261 262 return cellsSwitched, cellsNotSwitched, nil 263 } 264 265 // GetWorkflows returns a list of all workflows that exist in a given keyspace, 266 // with some additional filtering depending on the request parameters (for 267 // example, ActiveOnly=true restricts the search to only workflows that are 268 // currently running). 269 // 270 // It has the same signature as the vtctlservicepb.VtctldServer's GetWorkflows 271 // rpc, and grpcvtctldserver delegates to this function. 272 func (s *Server) GetWorkflows(ctx context.Context, req *vtctldatapb.GetWorkflowsRequest) (*vtctldatapb.GetWorkflowsResponse, error) { 273 span, ctx := trace.NewSpan(ctx, "workflow.Server.GetWorkflows") 274 defer span.Finish() 275 276 span.Annotate("keyspace", req.Keyspace) 277 span.Annotate("active_only", req.ActiveOnly) 278 279 where := "" 280 if req.ActiveOnly { 281 where = "WHERE state <> 'Stopped'" 282 } 283 284 query := fmt.Sprintf(` 285 SELECT 286 id, 287 workflow, 288 source, 289 pos, 290 stop_pos, 291 max_replication_lag, 292 state, 293 db_name, 294 time_updated, 295 transaction_timestamp, 296 message, 297 tags, 298 workflow_type, 299 workflow_sub_type 300 FROM 301 _vt.vreplication 302 %s`, 303 where, 304 ) 305 306 vx := vexec.NewVExec(req.Keyspace, "", s.ts, s.tmc) 307 results, err := vx.QueryContext(ctx, query) 308 if err != nil { 309 return nil, err 310 } 311 312 m := sync.Mutex{} // guards access to the following maps during concurrent calls to scanWorkflow 313 workflowsMap := make(map[string]*vtctldatapb.Workflow, len(results)) 314 sourceKeyspaceByWorkflow := make(map[string]string, len(results)) 315 sourceShardsByWorkflow := make(map[string]sets.Set[string], len(results)) 316 targetKeyspaceByWorkflow := make(map[string]string, len(results)) 317 targetShardsByWorkflow := make(map[string]sets.Set[string], len(results)) 318 maxVReplicationLagByWorkflow := make(map[string]float64, len(results)) 319 320 // We guarantee the following invariants when this function is called for a 321 // given workflow: 322 // - workflow.Name != "" (more precisely, ".Name is set 'properly'") 323 // - workflowsMap[workflow.Name] == workflow 324 // - sourceShardsByWorkflow[workflow.Name] != nil 325 // - targetShardsByWorkflow[workflow.Name] != nil 326 // - workflow.ShardStatuses != nil 327 scanWorkflow := func(ctx context.Context, workflow *vtctldatapb.Workflow, row sqltypes.RowNamedValues, tablet *topo.TabletInfo) error { 328 span, ctx := trace.NewSpan(ctx, "workflow.Server.scanWorkflow") 329 defer span.Finish() 330 331 span.Annotate("keyspace", req.Keyspace) 332 span.Annotate("shard", tablet.Shard) 333 span.Annotate("active_only", req.ActiveOnly) 334 span.Annotate("workflow", workflow.Name) 335 span.Annotate("tablet_alias", tablet.AliasString()) 336 337 id, err := evalengine.ToInt64(row["id"]) 338 if err != nil { 339 return err 340 } 341 342 var bls binlogdatapb.BinlogSource 343 rowBytes, err := row["source"].ToBytes() 344 if err != nil { 345 return err 346 } 347 if err := prototext.Unmarshal(rowBytes, &bls); err != nil { 348 return err 349 } 350 351 pos := row["pos"].ToString() 352 stopPos := row["stop_pos"].ToString() 353 state := row["state"].ToString() 354 dbName := row["db_name"].ToString() 355 356 timeUpdatedSeconds, err := evalengine.ToInt64(row["time_updated"]) 357 if err != nil { 358 return err 359 } 360 361 transactionTimeSeconds, err := evalengine.ToInt64(row["transaction_timestamp"]) 362 if err != nil { 363 return err 364 } 365 366 message := row["message"].ToString() 367 368 tags := row["tags"].ToString() 369 var tagArray []string 370 if tags != "" { 371 tagArray = strings.Split(tags, ",") 372 } 373 workflowType, _ := row["workflow_type"].ToInt64() 374 workflowSubType, _ := row["workflow_sub_type"].ToInt64() 375 stream := &vtctldatapb.Workflow_Stream{ 376 Id: id, 377 Shard: tablet.Shard, 378 Tablet: tablet.Alias, 379 BinlogSource: &bls, 380 Position: pos, 381 StopPosition: stopPos, 382 State: state, 383 DbName: dbName, 384 TransactionTimestamp: &vttime.Time{ 385 Seconds: transactionTimeSeconds, 386 }, 387 TimeUpdated: &vttime.Time{ 388 Seconds: timeUpdatedSeconds, 389 }, 390 Message: message, 391 Tags: tagArray, 392 } 393 workflow.WorkflowType = binlogdatapb.VReplicationWorkflowType_name[int32(workflowType)] 394 workflow.WorkflowSubType = binlogdatapb.VReplicationWorkflowSubType_name[int32(workflowSubType)] 395 stream.CopyStates, err = s.getWorkflowCopyStates(ctx, tablet, id) 396 if err != nil { 397 return err 398 } 399 400 span.Annotate("num_copy_states", len(stream.CopyStates)) 401 402 switch { 403 case strings.Contains(strings.ToLower(stream.Message), "error"): 404 stream.State = "Error" 405 case stream.State == "Running" && len(stream.CopyStates) > 0: 406 stream.State = "Copying" 407 case stream.State == "Running" && int64(time.Now().Second())-timeUpdatedSeconds > 10: 408 stream.State = "Lagging" 409 } 410 411 // At this point, we're going to start modifying the maps defined 412 // outside this function, as well as fields on the passed-in Workflow 413 // pointer. Since we're running concurrently, take the lock. 414 // 415 // We've already made the remote call to getCopyStates, so synchronizing 416 // here shouldn't hurt too badly, performance-wise. 417 m.Lock() 418 defer m.Unlock() 419 420 shardStreamKey := fmt.Sprintf("%s/%s", tablet.Shard, tablet.AliasString()) 421 shardStream, ok := workflow.ShardStreams[shardStreamKey] 422 if !ok { 423 ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) 424 defer cancel() 425 426 si, err := s.ts.GetShard(ctx, req.Keyspace, tablet.Shard) 427 if err != nil { 428 return err 429 } 430 431 shardStream = &vtctldatapb.Workflow_ShardStream{ 432 Streams: nil, 433 TabletControls: si.TabletControls, 434 IsPrimaryServing: si.IsPrimaryServing, 435 } 436 437 workflow.ShardStreams[shardStreamKey] = shardStream 438 } 439 440 shardStream.Streams = append(shardStream.Streams, stream) 441 sourceShardsByWorkflow[workflow.Name].Insert(stream.BinlogSource.Shard) 442 targetShardsByWorkflow[workflow.Name].Insert(tablet.Shard) 443 444 if ks, ok := sourceKeyspaceByWorkflow[workflow.Name]; ok && ks != stream.BinlogSource.Keyspace { 445 return fmt.Errorf("%w: workflow = %v, ks1 = %v, ks2 = %v", ErrMultipleSourceKeyspaces, workflow.Name, ks, stream.BinlogSource.Keyspace) 446 } 447 448 sourceKeyspaceByWorkflow[workflow.Name] = stream.BinlogSource.Keyspace 449 450 if ks, ok := targetKeyspaceByWorkflow[workflow.Name]; ok && ks != tablet.Keyspace { 451 return fmt.Errorf("%w: workflow = %v, ks1 = %v, ks2 = %v", ErrMultipleTargetKeyspaces, workflow.Name, ks, tablet.Keyspace) 452 } 453 454 targetKeyspaceByWorkflow[workflow.Name] = tablet.Keyspace 455 456 timeUpdated := time.Unix(timeUpdatedSeconds, 0) 457 vreplicationLag := time.Since(timeUpdated) 458 459 if currentMaxLag, ok := maxVReplicationLagByWorkflow[workflow.Name]; ok { 460 if vreplicationLag.Seconds() > currentMaxLag { 461 maxVReplicationLagByWorkflow[workflow.Name] = vreplicationLag.Seconds() 462 } 463 } else { 464 maxVReplicationLagByWorkflow[workflow.Name] = vreplicationLag.Seconds() 465 } 466 467 return nil 468 } 469 470 var ( 471 scanWorkflowWg sync.WaitGroup 472 scanWorkflowErrors concurrency.FirstErrorRecorder 473 ) 474 475 for tablet, result := range results { 476 qr := sqltypes.Proto3ToResult(result) 477 478 // In the old implementation, we knew we had at most one (0 <= N <= 1) 479 // workflow for each shard primary we queried. There might be multiple 480 // rows (streams) comprising that workflow, so we would aggregate the 481 // rows for a given primary into a single value ("the workflow", 482 // ReplicationStatusResult in the old types). 483 // 484 // In this version, we have many (N >= 0) workflows for each shard 485 // primary we queried, so we need to determine if each row corresponds 486 // to a workflow we're already aggregating, or if it's a workflow we 487 // haven't seen yet for that shard primary. We use the workflow name to 488 // dedupe for this. 489 for _, row := range qr.Named().Rows { 490 workflowName := row["workflow"].ToString() 491 workflow, ok := workflowsMap[workflowName] 492 if !ok { 493 workflow = &vtctldatapb.Workflow{ 494 Name: workflowName, 495 ShardStreams: map[string]*vtctldatapb.Workflow_ShardStream{}, 496 } 497 498 workflowsMap[workflowName] = workflow 499 sourceShardsByWorkflow[workflowName] = sets.New[string]() 500 targetShardsByWorkflow[workflowName] = sets.New[string]() 501 } 502 503 scanWorkflowWg.Add(1) 504 go func(ctx context.Context, workflow *vtctldatapb.Workflow, row sqltypes.RowNamedValues, tablet *topo.TabletInfo) { 505 defer scanWorkflowWg.Done() 506 if err := scanWorkflow(ctx, workflow, row, tablet); err != nil { 507 scanWorkflowErrors.RecordError(err) 508 } 509 }(ctx, workflow, row, tablet) 510 } 511 } 512 513 scanWorkflowWg.Wait() 514 if scanWorkflowErrors.HasErrors() { 515 return nil, scanWorkflowErrors.Error() 516 } 517 518 var ( 519 fetchLogsWG sync.WaitGroup 520 vrepLogQuery = strings.TrimSpace(` 521 SELECT 522 id, 523 vrepl_id, 524 type, 525 state, 526 message, 527 created_at, 528 updated_at, 529 count 530 FROM 531 _vt.vreplication_log 532 ORDER BY 533 vrepl_id ASC, 534 id ASC 535 `) 536 ) 537 538 fetchStreamLogs := func(ctx context.Context, workflow *vtctldatapb.Workflow) { 539 span, ctx := trace.NewSpan(ctx, "workflow.Server.scanWorkflow") 540 defer span.Finish() 541 542 span.Annotate("keyspace", req.Keyspace) 543 span.Annotate("workflow", workflow.Name) 544 545 results, err := vx.WithWorkflow(workflow.Name).QueryContext(ctx, vrepLogQuery) 546 if err != nil { 547 // Note that we do not return here. If there are any query results 548 // in the map (i.e. some tablets returned successfully), we will 549 // still try to read log rows from them on a best-effort basis. But, 550 // we will also pre-emptively record the top-level fetch error on 551 // every stream in every shard in the workflow. Further processing 552 // below may override the error message for certain streams. 553 for _, streams := range workflow.ShardStreams { 554 for _, stream := range streams.Streams { 555 stream.LogFetchError = err.Error() 556 } 557 } 558 } 559 560 for target, p3qr := range results { 561 qr := sqltypes.Proto3ToResult(p3qr) 562 shardStreamKey := fmt.Sprintf("%s/%s", target.Shard, target.AliasString()) 563 564 ss, ok := workflow.ShardStreams[shardStreamKey] 565 if !ok || ss == nil { 566 continue 567 } 568 569 streams := ss.Streams 570 streamIdx := 0 571 markErrors := func(err error) { 572 if streamIdx >= len(streams) { 573 return 574 } 575 576 streams[streamIdx].LogFetchError = err.Error() 577 } 578 579 for _, row := range qr.Rows { 580 id, err := evalengine.ToInt64(row[0]) 581 if err != nil { 582 markErrors(err) 583 continue 584 } 585 586 streamID, err := evalengine.ToInt64(row[1]) 587 if err != nil { 588 markErrors(err) 589 continue 590 } 591 592 typ := row[2].ToString() 593 state := row[3].ToString() 594 message := row[4].ToString() 595 596 createdAt, err := time.Parse("2006-01-02 15:04:05", row[5].ToString()) 597 if err != nil { 598 markErrors(err) 599 continue 600 } 601 602 updatedAt, err := time.Parse("2006-01-02 15:04:05", row[6].ToString()) 603 if err != nil { 604 markErrors(err) 605 continue 606 } 607 608 count, err := evalengine.ToInt64(row[7]) 609 if err != nil { 610 markErrors(err) 611 continue 612 } 613 614 streamLog := &vtctldatapb.Workflow_Stream_Log{ 615 Id: id, 616 StreamId: streamID, 617 Type: typ, 618 State: state, 619 CreatedAt: &vttime.Time{ 620 Seconds: createdAt.Unix(), 621 }, 622 UpdatedAt: &vttime.Time{ 623 Seconds: updatedAt.Unix(), 624 }, 625 Message: message, 626 Count: count, 627 } 628 629 // Earlier, in the main loop where we called scanWorkflow for 630 // each _vt.vreplication row, we also sorted each ShardStreams 631 // slice by ascending id, and our _vt.vreplication_log query 632 // ordered by (stream_id ASC, id ASC), so we can walk the 633 // streams in index order in O(n) amortized over all the rows 634 // for this tablet. 635 for streamIdx < len(streams) { 636 stream := streams[streamIdx] 637 if stream.Id < streamLog.StreamId { 638 streamIdx++ 639 continue 640 } 641 642 if stream.Id > streamLog.StreamId { 643 log.Warningf("Found stream log for nonexistent stream: %+v", streamLog) 644 break 645 } 646 647 // stream.Id == streamLog.StreamId 648 stream.Logs = append(stream.Logs, streamLog) 649 break 650 } 651 } 652 } 653 } 654 655 workflows := make([]*vtctldatapb.Workflow, 0, len(workflowsMap)) 656 657 for name, workflow := range workflowsMap { 658 sourceShards, ok := sourceShardsByWorkflow[name] 659 if !ok { 660 return nil, fmt.Errorf("%w: %s has no source shards", ErrInvalidWorkflow, name) 661 } 662 663 sourceKeyspace, ok := sourceKeyspaceByWorkflow[name] 664 if !ok { 665 return nil, fmt.Errorf("%w: %s has no source keyspace", ErrInvalidWorkflow, name) 666 } 667 668 targetShards, ok := targetShardsByWorkflow[name] 669 if !ok { 670 return nil, fmt.Errorf("%w: %s has no target shards", ErrInvalidWorkflow, name) 671 } 672 673 targetKeyspace, ok := targetKeyspaceByWorkflow[name] 674 if !ok { 675 return nil, fmt.Errorf("%w: %s has no target keyspace", ErrInvalidWorkflow, name) 676 } 677 678 maxVReplicationLag, ok := maxVReplicationLagByWorkflow[name] 679 if !ok { 680 return nil, fmt.Errorf("%w: %s has no tracked vreplication lag", ErrInvalidWorkflow, name) 681 } 682 683 workflow.Source = &vtctldatapb.Workflow_ReplicationLocation{ 684 Keyspace: sourceKeyspace, 685 Shards: sets.List(sourceShards), 686 } 687 688 workflow.Target = &vtctldatapb.Workflow_ReplicationLocation{ 689 Keyspace: targetKeyspace, 690 Shards: sets.List(targetShards), 691 } 692 693 workflow.MaxVReplicationLag = int64(maxVReplicationLag) 694 695 // Sort shard streams by stream_id ASC, to support an optimization 696 // in fetchStreamLogs below. 697 for _, shardStreams := range workflow.ShardStreams { 698 sort.Slice(shardStreams.Streams, func(i, j int) bool { 699 return shardStreams.Streams[i].Id < shardStreams.Streams[j].Id 700 }) 701 } 702 703 workflows = append(workflows, workflow) 704 705 // Fetch logs for all streams associated with this workflow in the background. 706 fetchLogsWG.Add(1) 707 go func(ctx context.Context, workflow *vtctldatapb.Workflow) { 708 defer fetchLogsWG.Done() 709 fetchStreamLogs(ctx, workflow) 710 }(ctx, workflow) 711 } 712 713 // Wait for all the log fetchers to finish. 714 fetchLogsWG.Wait() 715 716 return &vtctldatapb.GetWorkflowsResponse{ 717 Workflows: workflows, 718 }, nil 719 } 720 721 func (s *Server) getWorkflowCopyStates(ctx context.Context, tablet *topo.TabletInfo, id int64) ([]*vtctldatapb.Workflow_Stream_CopyState, error) { 722 span, ctx := trace.NewSpan(ctx, "workflow.Server.getWorkflowCopyStates") 723 defer span.Finish() 724 725 span.Annotate("keyspace", tablet.Keyspace) 726 span.Annotate("shard", tablet.Shard) 727 span.Annotate("tablet_alias", tablet.AliasString()) 728 span.Annotate("vrepl_id", id) 729 730 query := fmt.Sprintf("select table_name, lastpk from _vt.copy_state where vrepl_id = %d and id in (select max(id) from _vt.copy_state where vrepl_id = %d group by vrepl_id, table_name)", id, id) 731 qr, err := s.tmc.VReplicationExec(ctx, tablet.Tablet, query) 732 if err != nil { 733 return nil, err 734 } 735 736 result := sqltypes.Proto3ToResult(qr) 737 if result == nil { 738 return nil, nil 739 } 740 741 copyStates := make([]*vtctldatapb.Workflow_Stream_CopyState, len(result.Rows)) 742 for i, row := range result.Rows { 743 // These fields are technically varbinary, but this is close enough. 744 copyStates[i] = &vtctldatapb.Workflow_Stream_CopyState{ 745 Table: row[0].ToString(), 746 LastPk: row[1].ToString(), 747 } 748 } 749 750 return copyStates, nil 751 }