vitess.io/vitess@v0.16.2/go/vt/wrangler/vexec.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package wrangler
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"math"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	workflow2 "vitess.io/vitess/go/vt/vtctl/workflow"
    29  
    30  	"google.golang.org/protobuf/encoding/prototext"
    31  
    32  	"k8s.io/apimachinery/pkg/util/sets"
    33  
    34  	"vitess.io/vitess/go/mysql"
    35  	"vitess.io/vitess/go/sqltypes"
    36  	"vitess.io/vitess/go/vt/binlog/binlogplayer"
    37  	"vitess.io/vitess/go/vt/concurrency"
    38  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    39  	querypb "vitess.io/vitess/go/vt/proto/query"
    40  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    41  	"vitess.io/vitess/go/vt/sqlparser"
    42  	"vitess.io/vitess/go/vt/topo"
    43  	vtctldvexec "vitess.io/vitess/go/vt/vtctl/workflow/vexec" // renamed to avoid a collision with the vexec struct in this package
    44  	"vitess.io/vitess/go/vt/vterrors"
    45  )
    46  
    47  const (
    48  	vexecTableQualifier   = "_vt"
    49  	vreplicationTableName = "vreplication"
    50  	sqlVReplicationDelete = "delete from _vt.vreplication"
    51  )
    52  
    53  // vexec is the construct by which we run a query against backend shards. vexec is created by user-facing
    54  // interface, like vtctl or vtgate.
    55  // vexec parses, analyzes and plans th equery, and maintains state of each such step's result.
    56  type vexec struct {
    57  	ctx      context.Context
    58  	workflow string
    59  	keyspace string
    60  	// query is vexec's input
    61  	query string
    62  	// stmt is parsed from the query
    63  	stmt sqlparser.Statement
    64  	// tableName is extracted from the query, and used to determine the plan
    65  	tableName string
    66  	// planner will plan and execute a (possibly rewritten) query on backend shards
    67  	planner vexecPlanner
    68  	// plannedQuery is the result of supplementing original query with extra conditionals
    69  	plannedQuery string
    70  
    71  	wr *Wrangler
    72  
    73  	primaries []*topo.TabletInfo
    74  }
    75  
    76  func newVExec(ctx context.Context, workflow, keyspace, query string, wr *Wrangler) *vexec {
    77  	return &vexec{
    78  		ctx:      ctx,
    79  		workflow: workflow,
    80  		keyspace: keyspace,
    81  		query:    query,
    82  		wr:       wr,
    83  	}
    84  }
    85  
    86  // QueryResultForRowsAffected aggregates results into row-type results (fields + values)
    87  func (wr *Wrangler) QueryResultForRowsAffected(results map[*topo.TabletInfo]*sqltypes.Result) *sqltypes.Result {
    88  	var qr = &sqltypes.Result{}
    89  	qr.Fields = []*querypb.Field{{
    90  		Name: "Tablet",
    91  		Type: sqltypes.VarBinary,
    92  	}, {
    93  		Name: "RowsAffected",
    94  		Type: sqltypes.Uint64,
    95  	}}
    96  	var row2 []sqltypes.Value
    97  	for tablet, result := range results {
    98  		row2 = nil
    99  		row2 = append(row2, sqltypes.NewVarBinary(tablet.AliasString()))
   100  		row2 = append(row2, sqltypes.NewUint64(result.RowsAffected))
   101  		qr.Rows = append(qr.Rows, row2)
   102  	}
   103  	return qr
   104  }
   105  
   106  // QueryResultForTabletResults aggregates given results into a "rows-affected" type result (no row data)
   107  func (wr *Wrangler) QueryResultForTabletResults(results map[*topo.TabletInfo]*sqltypes.Result) *sqltypes.Result {
   108  	var qr = &sqltypes.Result{}
   109  	defaultFields := []*querypb.Field{{
   110  		Name: "Tablet",
   111  		Type: sqltypes.VarBinary,
   112  	}}
   113  	var row2 []sqltypes.Value
   114  	for tablet, result := range results {
   115  		if qr.Fields == nil {
   116  			qr.Fields = append(qr.Fields, defaultFields...)
   117  			qr.Fields = append(qr.Fields, result.Fields...)
   118  		}
   119  		for _, row := range result.Rows {
   120  			row2 = nil
   121  			row2 = append(row2, sqltypes.NewVarBinary(tablet.AliasString()))
   122  			row2 = append(row2, row...)
   123  			qr.Rows = append(qr.Rows, row2)
   124  		}
   125  	}
   126  	return qr
   127  }
   128  
   129  // VExecResult runs VExec and the naggregates the results into a single *sqltypes.Result
   130  func (wr *Wrangler) VExecResult(ctx context.Context, workflow, keyspace, query string, dryRun bool) (qr *sqltypes.Result, err error) {
   131  
   132  	results, err := wr.VExec(ctx, workflow, keyspace, query, dryRun)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  	if dryRun {
   137  		return nil, nil
   138  	}
   139  	var numFields int
   140  	for _, result := range results {
   141  		numFields = len(result.Fields)
   142  		break
   143  	}
   144  	if numFields != 0 {
   145  		qr = wr.QueryResultForTabletResults(results)
   146  	} else {
   147  		qr = wr.QueryResultForRowsAffected(results)
   148  	}
   149  	return qr, nil
   150  }
   151  
   152  // VExec executes queries on a table on all primaries in the target keyspace of the workflow
   153  func (wr *Wrangler) VExec(ctx context.Context, workflow, keyspace, query string, dryRun bool) (map[*topo.TabletInfo]*sqltypes.Result, error) {
   154  	if wr.VExecFunc != nil {
   155  		return wr.VExecFunc(ctx, workflow, keyspace, query, dryRun)
   156  	}
   157  	results, err := wr.runVexec(ctx, workflow, keyspace, query, dryRun)
   158  	retResults := make(map[*topo.TabletInfo]*sqltypes.Result)
   159  	for tablet, result := range results {
   160  		retResults[tablet] = sqltypes.Proto3ToResult(result)
   161  	}
   162  	return retResults, err
   163  }
   164  
   165  // runVexec is the main function that runs a dry or wet execution of 'query' on backend shards.
   166  func (wr *Wrangler) runVexec(ctx context.Context, workflow, keyspace, query string, dryRun bool) (map[*topo.TabletInfo]*querypb.QueryResult, error) {
   167  	vx := newVExec(ctx, workflow, keyspace, query, wr)
   168  
   169  	if err := vx.getPrimaries(); err != nil {
   170  		return nil, err
   171  	}
   172  	plan, err := vx.parseAndPlan(ctx)
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  	vx.plannedQuery = plan.parsedQuery.Query
   177  	if dryRun {
   178  		return nil, vx.outputDryRunInfo(ctx)
   179  	}
   180  	return vx.exec()
   181  }
   182  
   183  // parseAndPlan parses and analyses the query, then generates a plan
   184  func (vx *vexec) parseAndPlan(ctx context.Context) (plan *vexecPlan, err error) {
   185  	if err := vx.parseQuery(); err != nil {
   186  		return nil, err
   187  	}
   188  	if err := vx.getPlanner(ctx); err != nil {
   189  		return nil, err
   190  	}
   191  	plan, err = vx.buildPlan(ctx)
   192  	if err != nil {
   193  		return nil, err
   194  	}
   195  	return plan, nil
   196  }
   197  
   198  func (vx *vexec) outputDryRunInfo(ctx context.Context) error {
   199  	return vx.planner.dryRun(ctx)
   200  }
   201  
   202  // exec runs our planned query on backend shard primaries. It collects query results from all
   203  // shards and returns an aggregate (UNION ALL -like) result.
   204  func (vx *vexec) exec() (map[*topo.TabletInfo]*querypb.QueryResult, error) {
   205  	var wg sync.WaitGroup
   206  	allErrors := &concurrency.AllErrorRecorder{}
   207  	results := make(map[*topo.TabletInfo]*querypb.QueryResult)
   208  	var mu sync.Mutex
   209  	ctx, cancel := context.WithTimeout(vx.ctx, 10*time.Second)
   210  	defer cancel()
   211  	for _, primary := range vx.primaries {
   212  		wg.Add(1)
   213  		go func(ctx context.Context, primary *topo.TabletInfo) {
   214  			defer wg.Done()
   215  			qr, err := vx.planner.exec(ctx, primary.Alias, vx.plannedQuery)
   216  			if err != nil {
   217  				allErrors.RecordError(err)
   218  			} else {
   219  				// If we deleted a workflow then let's make a best effort attempt to clean
   220  				// up any related data.
   221  				if vx.query == sqlVReplicationDelete {
   222  					vx.wr.deleteWorkflowVDiffData(ctx, primary.Tablet, vx.workflow)
   223  					vx.wr.optimizeCopyStateTable(primary.Tablet)
   224  				}
   225  				mu.Lock()
   226  				results[primary] = qr
   227  				mu.Unlock()
   228  			}
   229  		}(ctx, primary)
   230  	}
   231  	wg.Wait()
   232  	return results, allErrors.AggrError(vterrors.Aggregate)
   233  }
   234  
   235  // parseQuery parses the input query
   236  func (vx *vexec) parseQuery() (err error) {
   237  	if vx.stmt, err = sqlparser.Parse(vx.query); err != nil {
   238  		return err
   239  	}
   240  	if vx.tableName, err = extractTableName(vx.stmt); err != nil {
   241  		return err
   242  	}
   243  	return nil
   244  }
   245  
   246  // getPrimaries identifies primary tablet for all shards relevant to our keyspace
   247  func (vx *vexec) getPrimaries() error {
   248  	var err error
   249  	shards, err := vx.wr.ts.GetShardNames(vx.ctx, vx.keyspace)
   250  	if err != nil {
   251  		return err
   252  	}
   253  	if len(shards) == 0 {
   254  		return fmt.Errorf("no shards found in keyspace %s", vx.keyspace)
   255  	}
   256  	var allPrimaries []*topo.TabletInfo
   257  	var primary *topo.TabletInfo
   258  	for _, shard := range shards {
   259  		if primary, err = vx.getPrimaryForShard(shard); err != nil {
   260  			return err
   261  		}
   262  		if primary == nil {
   263  			return fmt.Errorf("no primary found for shard %s", shard)
   264  		}
   265  		allPrimaries = append(allPrimaries, primary)
   266  	}
   267  	vx.primaries = allPrimaries
   268  	return nil
   269  }
   270  
   271  func (vx *vexec) getPrimaryForShard(shard string) (*topo.TabletInfo, error) {
   272  	si, err := vx.wr.ts.GetShard(vx.ctx, vx.keyspace, shard)
   273  	if err != nil {
   274  		return nil, err
   275  	}
   276  	if si.PrimaryAlias == nil {
   277  		return nil, fmt.Errorf("no primary found for shard %s", shard)
   278  	}
   279  	primary, err := vx.wr.ts.GetTablet(vx.ctx, si.PrimaryAlias)
   280  	if err != nil {
   281  		return nil, err
   282  	}
   283  	if primary == nil {
   284  		return nil, fmt.Errorf("could not get tablet for %s:%s", vx.keyspace, si.PrimaryAlias)
   285  	}
   286  	return primary, nil
   287  }
   288  
   289  func (wr *Wrangler) convertQueryResultToSQLTypesResult(results map[*topo.TabletInfo]*querypb.QueryResult) map[*topo.TabletInfo]*sqltypes.Result {
   290  	retResults := make(map[*topo.TabletInfo]*sqltypes.Result)
   291  	for tablet, result := range results {
   292  		retResults[tablet] = sqltypes.Proto3ToResult(result)
   293  	}
   294  	return retResults
   295  }
   296  
   297  // WorkflowAction can start/stop/delete or list streams in _vt.vreplication on all primaries in the target keyspace of the workflow.
   298  func (wr *Wrangler) WorkflowAction(ctx context.Context, workflow, keyspace, action string, dryRun bool) (map[*topo.TabletInfo]*sqltypes.Result, error) {
   299  
   300  	if action == "show" {
   301  		replStatus, err := wr.ShowWorkflow(ctx, workflow, keyspace)
   302  		if err != nil {
   303  			return nil, err
   304  		}
   305  		err = dumpStreamListAsJSON(replStatus, wr)
   306  		return nil, err
   307  	} else if action == "listall" {
   308  		workflows, err := wr.ListAllWorkflows(ctx, keyspace, false)
   309  		if err != nil {
   310  			return nil, err
   311  		}
   312  		wr.printWorkflowList(keyspace, workflows)
   313  		return nil, err
   314  	}
   315  	results, err := wr.execWorkflowAction(ctx, workflow, keyspace, action, dryRun)
   316  	return wr.convertQueryResultToSQLTypesResult(results), err
   317  }
   318  
   319  func (wr *Wrangler) getWorkflowActionQuery(action string) (string, error) {
   320  	var query string
   321  	updateSQL := "update _vt.vreplication set state = %s"
   322  	switch action {
   323  	case "stop":
   324  		query = fmt.Sprintf(updateSQL, encodeString("Stopped"))
   325  	case "start":
   326  		query = fmt.Sprintf(updateSQL, encodeString("Running"))
   327  	case "delete":
   328  		query = sqlVReplicationDelete
   329  	default:
   330  		return "", fmt.Errorf("invalid action found: %s", action)
   331  	}
   332  	return query, nil
   333  }
   334  
   335  func (wr *Wrangler) execWorkflowAction(ctx context.Context, workflow, keyspace, action string, dryRun bool) (map[*topo.TabletInfo]*querypb.QueryResult, error) {
   336  	query, err := wr.getWorkflowActionQuery(action)
   337  	if err != nil {
   338  		return nil, err
   339  	}
   340  	return wr.runVexec(ctx, workflow, keyspace, query, dryRun)
   341  }
   342  
   343  // WorkflowTagAction sets or clears the tags for a workflow in a keyspace
   344  func (wr *Wrangler) WorkflowTagAction(ctx context.Context, keyspace string, workflow string, tags string) (map[*topo.TabletInfo]*sqltypes.Result, error) {
   345  	query := fmt.Sprintf("update _vt.vreplication set tags = %s", encodeString(tags))
   346  	results, err := wr.runVexec(ctx, workflow, keyspace, query, false)
   347  	return wr.convertQueryResultToSQLTypesResult(results), err
   348  }
   349  
   350  // ReplicationStatusResult represents the result of trying to get the replication status for a given workflow.
   351  type ReplicationStatusResult struct {
   352  	// Workflow represents the name of the workflow relevant to the related replication statuses.
   353  	Workflow string
   354  	// SourceLocation represents the keyspace and shards that we are vreplicating from.
   355  	SourceLocation ReplicationLocation
   356  	// TargetLocation represents the keyspace and shards that we are vreplicating into.
   357  	TargetLocation ReplicationLocation
   358  	// MaxVReplicationLag represents the lag between the current time and the last time an event was seen from the
   359  	// source shards. This defines the "liveness" of the source streams. This will be high only if one of the source streams
   360  	// is no longer running (say, due to a network partition , primary not being available, or a vstreamer failure)
   361  	// MaxVReplicationTransactionLag (see below) represents the "mysql" replication lag, i.e. how far behind we are in
   362  	// terms of data replication from the source to the target.
   363  	MaxVReplicationLag int64
   364  	// MaxVReplicationTransactionLag represents the lag across all shards, between the current time and the timestamp
   365  	// of the last transaction OR heartbeat timestamp (if there have been no writes to replicate from the source).
   366  	MaxVReplicationTransactionLag int64
   367  	// Frozen is true if this workflow has been deemed complete and is in a limbo "frozen" state (Message=="FROZEN")
   368  	Frozen bool
   369  	// Statuses is a map of <shard>/<primary tablet alias> : ShardReplicationStatus (for the given shard).
   370  	ShardStatuses map[string]*ShardReplicationStatus
   371  	// SourceTimeZone represents the time zone provided to the workflow, only set if not UTC
   372  	SourceTimeZone string
   373  	// TargetTimeZone is set to the original SourceTimeZone, in reverse streams, if it was provided to the workflow
   374  	TargetTimeZone string
   375  	// OnDDL specifies the action to be taken when a DDL is encountered.
   376  	OnDDL string `json:"OnDDL,omitempty"`
   377  	// DeferSecondaryKeys specifies whether to defer the creation of secondary keys.
   378  	DeferSecondaryKeys bool `json:"DeferSecondaryKeys,omitempty"`
   379  }
   380  
   381  // ReplicationLocation represents a location that data is either replicating from, or replicating into.
   382  type ReplicationLocation struct {
   383  	Keyspace string
   384  	Shards   []string
   385  }
   386  
   387  // ShardReplicationStatus holds relevant vreplication related info for the given shard.
   388  type ShardReplicationStatus struct {
   389  	// PrimaryReplicationStatuses represents all of the replication statuses for the primary tablets in the given shard.
   390  	PrimaryReplicationStatuses []*ReplicationStatus
   391  	// TabletControls represents the tablet controls for the tablets in the shard.
   392  	TabletControls []*topodatapb.Shard_TabletControl
   393  	// PrimaryIsServing indicates whether the primary tablet of the given shard is currently serving write traffic.
   394  	PrimaryIsServing bool
   395  }
   396  
   397  type copyState struct {
   398  	Table  string
   399  	LastPK string
   400  }
   401  
   402  // ReplicationStatus includes data from the _vt.vreplication table, along with other useful relevant data.
   403  type ReplicationStatus struct {
   404  	// Shard represents the relevant shard name.
   405  	Shard string
   406  	// Tablet is the tablet alias that the ReplicationStatus came from.
   407  	Tablet string
   408  	// ID represents the id column from the _vt.vreplication table.
   409  	ID int64
   410  	// Bls represents the BinlogSource.
   411  	Bls *binlogdatapb.BinlogSource
   412  	// Pos represents the pos column from the _vt.vreplication table.
   413  	Pos string
   414  	// StopPos represents the stop_pos column from the _vt.vreplication table.
   415  	StopPos string
   416  	// State represents the state column from the _vt.vreplication table.
   417  	State string
   418  	// DbName represents the db_name column from the _vt.vreplication table.
   419  	DBName string
   420  	// TransactionTimestamp represents the transaction_timestamp column from the _vt.vreplication table.
   421  	TransactionTimestamp int64
   422  	// TimeUpdated represents the time_updated column from the _vt.vreplication table.
   423  	TimeUpdated int64
   424  	// TimeHeartbeat represents the time_heartbeat column from the _vt.vreplication table.
   425  	TimeHeartbeat int64
   426  	// TimeThrottled represents the time_throttled column from the _vt.vreplication table.
   427  	TimeThrottled int64
   428  	// ComponentThrottled represents the component_throttled column from the _vt.vreplication table.
   429  	ComponentThrottled string
   430  	// Message represents the message column from the _vt.vreplication table.
   431  	Message string
   432  	// Tags contain the tags specified for this stream
   433  	Tags            string
   434  	WorkflowType    string
   435  	WorkflowSubType string
   436  	// CopyState represents the rows from the _vt.copy_state table.
   437  	CopyState []copyState
   438  	// sourceTimeZone represents the time zone of each stream, only set if not UTC
   439  	sourceTimeZone string
   440  	// targetTimeZone is set to the sourceTimeZone of the forward stream, if it was provided in the workflow
   441  	targetTimeZone     string
   442  	deferSecondaryKeys bool
   443  }
   444  
   445  func (wr *Wrangler) getReplicationStatusFromRow(ctx context.Context, row sqltypes.RowNamedValues, primary *topo.TabletInfo) (*ReplicationStatus, string, error) {
   446  	var err error
   447  	var id, timeUpdated, transactionTimestamp, timeHeartbeat, timeThrottled int64
   448  	var state, dbName, pos, stopPos, message, tags, componentThrottled string
   449  	var workflowType, workflowSubType int64
   450  	var deferSecondaryKeys bool
   451  	var bls binlogdatapb.BinlogSource
   452  	var mpos mysql.Position
   453  
   454  	id, err = row.ToInt64("id")
   455  	if err != nil {
   456  		return nil, "", err
   457  	}
   458  	rowBytes, err := row.ToBytes("source")
   459  	if err != nil {
   460  		return nil, "", err
   461  	}
   462  	if err := prototext.Unmarshal(rowBytes, &bls); err != nil {
   463  		return nil, "", err
   464  	}
   465  
   466  	// gtid in the pos column can be compressed, so check and possibly uncompress
   467  	pos, err = row.ToString("pos")
   468  	if err != nil {
   469  		return nil, "", err
   470  	}
   471  	if pos != "" {
   472  		mpos, err = binlogplayer.DecodePosition(pos)
   473  		if err != nil {
   474  			return nil, "", err
   475  		}
   476  		pos = mpos.String()
   477  	}
   478  	stopPos, err = row.ToString("stop_pos")
   479  	if err != nil {
   480  		return nil, "", err
   481  	}
   482  	state, err = row.ToString("state")
   483  	if err != nil {
   484  		return nil, "", err
   485  	}
   486  	dbName, err = row.ToString("db_name")
   487  	if err != nil {
   488  		return nil, "", err
   489  	}
   490  	timeUpdated, err = row.ToInt64("time_updated")
   491  	if err != nil {
   492  		return nil, "", err
   493  	}
   494  	transactionTimestamp, err = row.ToInt64("transaction_timestamp")
   495  	if err != nil {
   496  		return nil, "", err
   497  	}
   498  	timeHeartbeat, err = row.ToInt64("time_heartbeat")
   499  	if err != nil {
   500  		return nil, "", err
   501  	}
   502  	timeThrottled, err = row.ToInt64("time_throttled")
   503  	if err != nil {
   504  		return nil, "", err
   505  	}
   506  	componentThrottled, err = row.ToString("component_throttled")
   507  	if err != nil {
   508  		return nil, "", err
   509  	}
   510  	message, err = row.ToString("message")
   511  	if err != nil {
   512  		return nil, "", err
   513  	}
   514  	tags, err = row.ToString("tags")
   515  	if err != nil {
   516  		return nil, "", err
   517  	}
   518  	workflowType, _ = row.ToInt64("workflow_type")
   519  	workflowSubType, _ = row.ToInt64("workflow_sub_type")
   520  	deferSecondaryKeys, _ = row.ToBool("defer_secondary_keys")
   521  
   522  	status := &ReplicationStatus{
   523  		Shard:                primary.Shard,
   524  		Tablet:               primary.AliasString(),
   525  		ID:                   id,
   526  		Bls:                  &bls,
   527  		Pos:                  pos,
   528  		StopPos:              stopPos,
   529  		State:                state,
   530  		DBName:               dbName,
   531  		TransactionTimestamp: transactionTimestamp,
   532  		TimeUpdated:          timeUpdated,
   533  		TimeHeartbeat:        timeHeartbeat,
   534  		TimeThrottled:        timeThrottled,
   535  		ComponentThrottled:   componentThrottled,
   536  		Message:              message,
   537  		Tags:                 tags,
   538  		sourceTimeZone:       bls.SourceTimeZone,
   539  		targetTimeZone:       bls.TargetTimeZone,
   540  		WorkflowType:         binlogdatapb.VReplicationWorkflowType_name[int32(workflowType)],
   541  		WorkflowSubType:      binlogdatapb.VReplicationWorkflowSubType_name[int32(workflowSubType)],
   542  		deferSecondaryKeys:   deferSecondaryKeys,
   543  	}
   544  	status.CopyState, err = wr.getCopyState(ctx, primary, id)
   545  	if err != nil {
   546  		return nil, "", err
   547  	}
   548  
   549  	status.State = updateState(message, status.State, status.CopyState, timeUpdated)
   550  	return status, bls.Keyspace, nil
   551  }
   552  
   553  func (wr *Wrangler) getStreams(ctx context.Context, workflow, keyspace string) (*ReplicationStatusResult, error) {
   554  	var rsr ReplicationStatusResult
   555  	rsr.ShardStatuses = make(map[string]*ShardReplicationStatus)
   556  	rsr.Workflow = workflow
   557  	var results map[*topo.TabletInfo]*querypb.QueryResult
   558  	query := `select 
   559  		id,
   560  		source,
   561  		pos,
   562  		stop_pos,
   563  		max_replication_lag,
   564  		state,
   565  		db_name,
   566  		time_updated,
   567  		transaction_timestamp,
   568  		time_heartbeat,
   569  		time_throttled,
   570  		component_throttled,
   571  		message,
   572  		tags,
   573  		workflow_type, 
   574  		workflow_sub_type,
   575  		defer_secondary_keys
   576  	from _vt.vreplication`
   577  	results, err := wr.runVexec(ctx, workflow, keyspace, query, false)
   578  	if err != nil {
   579  		return nil, err
   580  	}
   581  
   582  	// We set a topo timeout since we contact topo for the shard record.
   583  	ctx, cancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
   584  	defer cancel()
   585  	var sourceKeyspace string
   586  	sourceShards := sets.New[string]()
   587  	targetShards := sets.New[string]()
   588  	for primary, result := range results {
   589  		var rsrStatus []*ReplicationStatus
   590  		nqr := sqltypes.Proto3ToResult(result).Named()
   591  		if len(nqr.Rows) == 0 {
   592  			continue
   593  		}
   594  		for _, row := range nqr.Rows {
   595  			status, sk, err := wr.getReplicationStatusFromRow(ctx, row, primary)
   596  			if err != nil {
   597  				return nil, err
   598  			}
   599  			rsr.SourceTimeZone = status.sourceTimeZone
   600  			rsr.TargetTimeZone = status.targetTimeZone
   601  			sourceKeyspace = sk
   602  			sourceShards.Insert(status.Bls.Shard)
   603  			rsrStatus = append(rsrStatus, status)
   604  
   605  			// Only show the OnDDL setting if it's not the default of 0/IGNORE.
   606  			if status.Bls.OnDdl != binlogdatapb.OnDDLAction_IGNORE {
   607  				rsr.OnDDL = binlogdatapb.OnDDLAction_name[int32(status.Bls.OnDdl)]
   608  				// Unset it in the proto so that we do not show the
   609  				// low-level enum int in the JSON marshalled output
   610  				// as e.g. `"on_ddl": 1` is not meaningful or helpful
   611  				// for the end user and we instead show the mapped
   612  				// string value using the top-level "OnDDL" json key.
   613  				// Note: this is done here only because golang does
   614  				// not currently support setting json tags in proto
   615  				// declarations so that I could request it always be
   616  				// ommitted from marshalled JSON output:
   617  				// https://github.com/golang/protobuf/issues/52
   618  				status.Bls.OnDdl = 0
   619  			}
   620  
   621  			rsr.DeferSecondaryKeys = status.deferSecondaryKeys
   622  
   623  			if status.Message == workflow2.Frozen {
   624  				rsr.Frozen = true
   625  			}
   626  
   627  			// MaxVReplicationLag is the time since the last event was processed from the source
   628  			// The last event can be an actual binlog event or a heartbeat in case no binlog events occur within (default) 1 second
   629  			timeUpdated := time.Unix(status.TimeUpdated, 0)
   630  			replicationLag := time.Since(timeUpdated)
   631  			if replicationLag.Seconds() > float64(rsr.MaxVReplicationLag) {
   632  				rsr.MaxVReplicationLag = int64(replicationLag.Seconds())
   633  			}
   634  
   635  			// MaxVReplicationTransactionLag estimates the actual lag between the source and the target
   636  			// If we are still processing source events it is the difference b/w current time and the timestamp of the last event
   637  			// If heartbeats are more recent than the last event, then the lag is the time since the last heartbeat as
   638  			// there can be an actual event immediately after the heartbeat, but which has not yet
   639  			// been processed on the target
   640  			// We don't allow switching during the copy phase, so in that case we just return a large lag.
   641  			// All timestamps are in seconds since epoch
   642  			lastTransactionTimestamp := status.TransactionTimestamp
   643  			lastHeartbeatTime := status.TimeHeartbeat
   644  			if status.State == "Copying" {
   645  				rsr.MaxVReplicationTransactionLag = math.MaxInt64
   646  			} else {
   647  				if lastTransactionTimestamp == 0 /* no new events after copy */ ||
   648  					lastHeartbeatTime > lastTransactionTimestamp /* no recent transactions, so all caught up */ {
   649  
   650  					lastTransactionTimestamp = lastHeartbeatTime
   651  				}
   652  				now := time.Now().Unix() /*seconds since epoch*/
   653  				transactionReplicationLag := now - lastTransactionTimestamp
   654  				if transactionReplicationLag > rsr.MaxVReplicationTransactionLag {
   655  					rsr.MaxVReplicationTransactionLag = transactionReplicationLag
   656  				}
   657  			}
   658  		}
   659  		si, err := wr.ts.GetShard(ctx, keyspace, primary.Shard)
   660  		if err != nil {
   661  			return nil, err
   662  		}
   663  		targetShards.Insert(si.ShardName())
   664  		rsr.ShardStatuses[fmt.Sprintf("%s/%s", primary.Shard, primary.AliasString())] = &ShardReplicationStatus{
   665  			PrimaryReplicationStatuses: rsrStatus,
   666  			TabletControls:             si.TabletControls,
   667  			PrimaryIsServing:           si.IsPrimaryServing,
   668  		}
   669  	}
   670  	rsr.SourceLocation = ReplicationLocation{
   671  		Keyspace: sourceKeyspace,
   672  		Shards:   sets.List(sourceShards),
   673  	}
   674  	rsr.TargetLocation = ReplicationLocation{
   675  		Keyspace: keyspace,
   676  		Shards:   sets.List(targetShards),
   677  	}
   678  
   679  	return &rsr, nil
   680  }
   681  
   682  // ListActiveWorkflows will return a list of all active workflows for the given keyspace.
   683  func (wr *Wrangler) ListActiveWorkflows(ctx context.Context, keyspace string) ([]string, error) {
   684  	return wr.ListAllWorkflows(ctx, keyspace, true)
   685  }
   686  
   687  // ListAllWorkflows will return a list of all workflows (Running and Stopped) for the given keyspace.
   688  func (wr *Wrangler) ListAllWorkflows(ctx context.Context, keyspace string, active bool) ([]string, error) {
   689  	where := ""
   690  	if active {
   691  		where = " where state <> 'Stopped'"
   692  	}
   693  	query := "select distinct workflow from _vt.vreplication" + where
   694  	vx := vtctldvexec.NewVExec(keyspace, "", wr.ts, wr.tmc)
   695  	results, err := vx.QueryContext(ctx, query)
   696  	if err != nil {
   697  		return nil, err
   698  	}
   699  	workflowsSet := sets.New[string]()
   700  	for _, result := range results {
   701  		if len(result.Rows) == 0 {
   702  			continue
   703  		}
   704  		qr := sqltypes.Proto3ToResult(result)
   705  		for _, row := range qr.Rows {
   706  			for _, value := range row {
   707  				// Even though we query for distinct, we must de-dup because we query per primary tablet.
   708  				workflowsSet.Insert(value.ToString())
   709  			}
   710  		}
   711  	}
   712  	workflows := sets.List(workflowsSet)
   713  	return workflows, nil
   714  }
   715  
   716  // ShowWorkflow will return all of the relevant replication related information for the given workflow.
   717  func (wr *Wrangler) ShowWorkflow(ctx context.Context, workflow, keyspace string) (*ReplicationStatusResult, error) {
   718  	replStatus, err := wr.getStreams(ctx, workflow, keyspace)
   719  	if err != nil {
   720  		return nil, err
   721  	}
   722  	if len(replStatus.ShardStatuses) == 0 {
   723  		return nil, fmt.Errorf("no streams found for workflow %s in keyspace %s", workflow, keyspace)
   724  	}
   725  
   726  	return replStatus, nil
   727  }
   728  
   729  func updateState(message, state string, cs []copyState, timeUpdated int64) string {
   730  	if strings.Contains(strings.ToLower(message), "error") {
   731  		state = "Error"
   732  	} else if state == "Running" && len(cs) > 0 {
   733  		state = "Copying"
   734  	} else if state == "Running" && int64(time.Now().Second())-timeUpdated > 10 /* seconds */ {
   735  		state = "Lagging"
   736  	}
   737  	return state
   738  }
   739  
   740  func dumpStreamListAsJSON(replStatus *ReplicationStatusResult, wr *Wrangler) error {
   741  	text, err := json.MarshalIndent(replStatus, "", "\t")
   742  	if err != nil {
   743  		return err
   744  	}
   745  	wr.Logger().Printf("%s\n", text)
   746  	return nil
   747  }
   748  
   749  func (wr *Wrangler) printWorkflowList(keyspace string, workflows []string) {
   750  	list := strings.Join(workflows, ", ")
   751  	if list == "" {
   752  		wr.Logger().Printf("No workflows found in keyspace %s\n", keyspace)
   753  		return
   754  	}
   755  	wr.Logger().Printf("Following workflow(s) found in keyspace %s: %v\n", keyspace, list)
   756  }
   757  
   758  func (wr *Wrangler) getCopyState(ctx context.Context, tablet *topo.TabletInfo, id int64) ([]copyState, error) {
   759  	var cs []copyState
   760  	query := fmt.Sprintf("select table_name, lastpk from _vt.copy_state where vrepl_id = %d and id in (select max(id) from _vt.copy_state where vrepl_id = %d group by vrepl_id, table_name)",
   761  		id, id)
   762  	qr, err := wr.VReplicationExec(ctx, tablet.Alias, query)
   763  	if err != nil {
   764  		return nil, err
   765  	}
   766  
   767  	result := sqltypes.Proto3ToResult(qr)
   768  	if result != nil {
   769  		for _, row := range result.Rows {
   770  			// These fields are varbinary, but close enough
   771  			table := row[0].ToString()
   772  			lastPK := row[1].ToString()
   773  			copyState := copyState{
   774  				Table:  table,
   775  				LastPK: lastPK,
   776  			}
   777  			cs = append(cs, copyState)
   778  		}
   779  	}
   780  
   781  	return cs, nil
   782  }