vitess.io/vitess@v0.16.2/go/vt/vtctl/workflow/traffic_switcher.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package workflow
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"errors"
    23  	"fmt"
    24  	"hash/fnv"
    25  	"math"
    26  	"sort"
    27  	"strings"
    28  
    29  	"google.golang.org/protobuf/encoding/prototext"
    30  	"k8s.io/apimachinery/pkg/util/sets"
    31  
    32  	"vitess.io/vitess/go/sqltypes"
    33  	"vitess.io/vitess/go/vt/concurrency"
    34  	"vitess.io/vitess/go/vt/logutil"
    35  	"vitess.io/vitess/go/vt/topo"
    36  	"vitess.io/vitess/go/vt/vtgate/vindexes"
    37  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    38  
    39  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    40  	querypb "vitess.io/vitess/go/vt/proto/query"
    41  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    42  )
    43  
    44  const (
    45  	// Frozen is the message value of frozen vreplication streams.
    46  	Frozen = "FROZEN"
    47  )
    48  
    49  var (
    50  	// ErrNoStreams occurs when no target streams are found for a workflow in a
    51  	// target keyspace.
    52  	ErrNoStreams = errors.New("no streams found")
    53  )
    54  
    55  // TrafficSwitchDirection specifies the switching direction.
    56  type TrafficSwitchDirection int
    57  
    58  // The following constants define the switching direction.
    59  const (
    60  	DirectionForward = TrafficSwitchDirection(iota)
    61  	DirectionBackward
    62  )
    63  
    64  // TableRemovalType specifies the way the a table will be removed during a
    65  // DropSource for a MoveTables workflow.
    66  type TableRemovalType int
    67  
    68  // The following consts define if DropSource will drop or rename the table.
    69  const (
    70  	DropTable = TableRemovalType(iota)
    71  	RenameTable
    72  )
    73  
    74  var tableRemovalTypeStrs = [...]string{
    75  	"DROP TABLE",
    76  	"RENAME TABLE",
    77  }
    78  
    79  // String returns a string representation of a TableRemovalType
    80  func (trt TableRemovalType) String() string {
    81  	if trt < DropTable || trt > RenameTable {
    82  		return "Unknown"
    83  	}
    84  
    85  	return tableRemovalTypeStrs[trt]
    86  }
    87  
    88  // ITrafficSwitcher is a temporary hack to allow us to move streamMigrater out
    89  // of package wrangler without also needing to move trafficSwitcher in the same
    90  // changeset.
    91  //
    92  // After moving TrafficSwitcher to this package, this type should be removed,
    93  // and StreamMigrator should be updated to contain a field of type
    94  // *TrafficSwitcher instead of ITrafficSwitcher.
    95  type ITrafficSwitcher interface {
    96  	/* Functions that expose types and behavior contained in *wrangler.Wrangler */
    97  
    98  	TopoServer() *topo.Server
    99  	TabletManagerClient() tmclient.TabletManagerClient
   100  	Logger() logutil.Logger
   101  	// VReplicationExec here is used when we want the (*wrangler.Wrangler)
   102  	// implementation, which does a topo lookup on the tablet alias before
   103  	// calling the underlying TabletManagerClient RPC.
   104  	VReplicationExec(ctx context.Context, alias *topodatapb.TabletAlias, query string) (*querypb.QueryResult, error)
   105  
   106  	/* Functions that expose fields on the *wrangler.trafficSwitcher */
   107  
   108  	ExternalTopo() *topo.Server
   109  	MigrationType() binlogdatapb.MigrationType
   110  	ReverseWorkflowName() string
   111  	SourceKeyspaceName() string
   112  	SourceKeyspaceSchema() *vindexes.KeyspaceSchema
   113  	Sources() map[string]*MigrationSource
   114  	Tables() []string
   115  	TargetKeyspaceName() string
   116  	Targets() map[string]*MigrationTarget
   117  	WorkflowName() string
   118  	SourceTimeZone() string
   119  
   120  	/* Functions that *wrangler.trafficSwitcher implements */
   121  
   122  	ForAllSources(f func(source *MigrationSource) error) error
   123  	ForAllTargets(f func(target *MigrationTarget) error) error
   124  	ForAllUIDs(f func(target *MigrationTarget, uid uint32) error) error
   125  	SourceShards() []*topo.ShardInfo
   126  	TargetShards() []*topo.ShardInfo
   127  }
   128  
   129  // TargetInfo contains the metadata for a set of targets involved in a workflow.
   130  type TargetInfo struct {
   131  	Targets         map[string]*MigrationTarget
   132  	Frozen          bool
   133  	OptCells        string
   134  	OptTabletTypes  string
   135  	WorkflowType    binlogdatapb.VReplicationWorkflowType
   136  	WorkflowSubType binlogdatapb.VReplicationWorkflowSubType
   137  }
   138  
   139  // MigrationSource contains the metadata for each migration source.
   140  type MigrationSource struct {
   141  	si        *topo.ShardInfo
   142  	primary   *topo.TabletInfo
   143  	Position  string
   144  	Journaled bool
   145  }
   146  
   147  // NewMigrationSource returns a MigrationSource for the given shard and primary.
   148  //
   149  // (TODO|@ajm188): do we always want to start with (position:"", journaled:false)?
   150  func NewMigrationSource(si *topo.ShardInfo, primary *topo.TabletInfo) *MigrationSource {
   151  	return &MigrationSource{
   152  		si:      si,
   153  		primary: primary,
   154  	}
   155  }
   156  
   157  // GetShard returns the *topo.ShardInfo for the migration source.
   158  func (source *MigrationSource) GetShard() *topo.ShardInfo {
   159  	return source.si
   160  }
   161  
   162  // GetPrimary returns the *topo.TabletInfo for the primary tablet of the
   163  // migration source.
   164  func (source *MigrationSource) GetPrimary() *topo.TabletInfo {
   165  	return source.primary
   166  }
   167  
   168  // MigrationTarget contains the metadata for each migration target.
   169  type MigrationTarget struct {
   170  	si       *topo.ShardInfo
   171  	primary  *topo.TabletInfo
   172  	Sources  map[uint32]*binlogdatapb.BinlogSource
   173  	Position string
   174  }
   175  
   176  // GetShard returns the *topo.ShardInfo for the migration target.
   177  func (target *MigrationTarget) GetShard() *topo.ShardInfo {
   178  	return target.si
   179  }
   180  
   181  // GetPrimary returns the *topo.TabletInfo for the primary tablet of the
   182  // migration target.
   183  func (target *MigrationTarget) GetPrimary() *topo.TabletInfo {
   184  	return target.primary
   185  }
   186  
   187  // BuildTargets collects MigrationTargets and other metadata (see TargetInfo)
   188  // from a workflow in the target keyspace.
   189  //
   190  // It returns ErrNoStreams if there are no targets found for the workflow.
   191  func BuildTargets(ctx context.Context, ts *topo.Server, tmc tmclient.TabletManagerClient, targetKeyspace string, workflow string) (*TargetInfo, error) {
   192  	targetShards, err := ts.GetShardNames(ctx, targetKeyspace)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  
   197  	var (
   198  		frozen          bool
   199  		optCells        string
   200  		optTabletTypes  string
   201  		targets         = make(map[string]*MigrationTarget, len(targetShards))
   202  		workflowType    binlogdatapb.VReplicationWorkflowType
   203  		workflowSubType binlogdatapb.VReplicationWorkflowSubType
   204  	)
   205  
   206  	// We check all shards in the target keyspace. Not all of them may have a
   207  	// stream. For example, if we're splitting -80 to [-40,40-80], only those
   208  	// two target shards will have vreplication streams, and the other shards in
   209  	// the target keyspace will not.
   210  	for _, targetShard := range targetShards {
   211  		si, err := ts.GetShard(ctx, targetKeyspace, targetShard)
   212  		if err != nil {
   213  			return nil, err
   214  		}
   215  
   216  		if si.PrimaryAlias == nil {
   217  			// This can happen if bad inputs are given.
   218  			return nil, fmt.Errorf("shard %v/%v doesn't have a primary set", targetKeyspace, targetShard)
   219  		}
   220  
   221  		primary, err := ts.GetTablet(ctx, si.PrimaryAlias)
   222  		if err != nil {
   223  			return nil, err
   224  		}
   225  
   226  		// NB: changing the whitespace of this query breaks tests for now.
   227  		// (TODO:@ajm188) extend FakeDBClient to be less whitespace-sensitive on
   228  		// expected queries.
   229  		query := fmt.Sprintf("select id, source, message, cell, tablet_types, workflow_type, workflow_sub_type, defer_secondary_keys from _vt.vreplication where workflow=%s and db_name=%s", encodeString(workflow), encodeString(primary.DbName()))
   230  		p3qr, err := tmc.VReplicationExec(ctx, primary.Tablet, query)
   231  		if err != nil {
   232  			return nil, err
   233  		}
   234  
   235  		if len(p3qr.Rows) < 1 {
   236  			continue
   237  		}
   238  
   239  		target := &MigrationTarget{
   240  			si:      si,
   241  			primary: primary,
   242  			Sources: make(map[uint32]*binlogdatapb.BinlogSource),
   243  		}
   244  
   245  		qr := sqltypes.Proto3ToResult(p3qr)
   246  		for _, row := range qr.Named().Rows {
   247  			id, err := row["id"].ToInt64()
   248  			if err != nil {
   249  				return nil, err
   250  			}
   251  
   252  			var bls binlogdatapb.BinlogSource
   253  			rowBytes, err := row["source"].ToBytes()
   254  			if err != nil {
   255  				return nil, err
   256  			}
   257  			if err := prototext.Unmarshal(rowBytes, &bls); err != nil {
   258  				return nil, err
   259  			}
   260  
   261  			if row["message"].ToString() == Frozen {
   262  				frozen = true
   263  			}
   264  
   265  			target.Sources[uint32(id)] = &bls
   266  			optCells = row["cell"].ToString()
   267  			optTabletTypes = row["tablet_types"].ToString()
   268  
   269  			workflowType = getVReplicationWorkflowType(row)
   270  			workflowSubType = getVReplicationWorkflowSubType(row)
   271  
   272  		}
   273  
   274  		targets[targetShard] = target
   275  	}
   276  
   277  	if len(targets) == 0 {
   278  		return nil, fmt.Errorf("%w in keyspace %s for %s", ErrNoStreams, targetKeyspace, workflow)
   279  	}
   280  
   281  	return &TargetInfo{
   282  		Targets:         targets,
   283  		Frozen:          frozen,
   284  		OptCells:        optCells,
   285  		OptTabletTypes:  optTabletTypes,
   286  		WorkflowType:    workflowType,
   287  		WorkflowSubType: workflowSubType,
   288  	}, nil
   289  }
   290  
   291  func getVReplicationWorkflowType(row sqltypes.RowNamedValues) binlogdatapb.VReplicationWorkflowType {
   292  	i, _ := row["workflow_type"].ToInt64()
   293  	return binlogdatapb.VReplicationWorkflowType(i)
   294  }
   295  
   296  func getVReplicationWorkflowSubType(row sqltypes.RowNamedValues) binlogdatapb.VReplicationWorkflowSubType {
   297  	i, _ := row["workflow_sub_type"].ToInt64()
   298  	return binlogdatapb.VReplicationWorkflowSubType(i)
   299  }
   300  
   301  // CompareShards compares the list of shards in a workflow with the shards in
   302  // that keyspace according to the topo. It returns an error if they do not match.
   303  //
   304  // This function is used to validate MoveTables workflows.
   305  //
   306  // (TODO|@ajm188): This function is temporarily-exported until *wrangler.trafficSwitcher
   307  // has been fully moved over to this package. Once that refactor is finished,
   308  // this function should be unexported. Consequently, YOU SHOULD NOT DEPEND ON
   309  // THIS FUNCTION EXTERNALLY.
   310  func CompareShards(ctx context.Context, keyspace string, shards []*topo.ShardInfo, ts *topo.Server) error {
   311  	shardSet := sets.New[string]()
   312  	for _, si := range shards {
   313  		shardSet.Insert(si.ShardName())
   314  	}
   315  
   316  	topoShards, err := ts.GetShardNames(ctx, keyspace)
   317  	if err != nil {
   318  		return err
   319  	}
   320  
   321  	topoShardSet := sets.New[string](topoShards...)
   322  	if !shardSet.Equal(topoShardSet) {
   323  		wfExtra := shardSet.Difference(topoShardSet)
   324  		topoExtra := topoShardSet.Difference(shardSet)
   325  
   326  		var rec concurrency.AllErrorRecorder
   327  		if wfExtra.Len() > 0 {
   328  			wfExtraSorted := sets.List(wfExtra)
   329  			rec.RecordError(fmt.Errorf("switch command shards not in topo: %v", wfExtraSorted))
   330  		}
   331  
   332  		if topoExtra.Len() > 0 {
   333  			topoExtraSorted := sets.List(topoExtra)
   334  			rec.RecordError(fmt.Errorf("topo shards not in switch command: %v", topoExtraSorted))
   335  		}
   336  
   337  		return fmt.Errorf("mismatched shards for keyspace %s: %s", keyspace, strings.Join(rec.ErrorStrings(), "; "))
   338  	}
   339  
   340  	return nil
   341  }
   342  
   343  // HashStreams produces a stable hash based on the target keyspace and migration
   344  // targets.
   345  func HashStreams(targetKeyspace string, targets map[string]*MigrationTarget) int64 {
   346  	var expanded []string
   347  	for shard, target := range targets {
   348  		for uid := range target.Sources {
   349  			expanded = append(expanded, fmt.Sprintf("%s:%d", shard, uid))
   350  		}
   351  	}
   352  
   353  	sort.Strings(expanded)
   354  
   355  	hasher := fnv.New64()
   356  	hasher.Write([]byte(targetKeyspace))
   357  
   358  	for _, s := range expanded {
   359  		hasher.Write([]byte(s))
   360  	}
   361  
   362  	// Convert to int64 after dropping the highest bit.
   363  	return int64(hasher.Sum64() & math.MaxInt64)
   364  }
   365  
   366  const reverseSuffix = "_reverse"
   367  
   368  // ReverseWorkflowName returns the "reversed" name of a workflow. For a
   369  // "forward" workflow, this is the workflow name with "_reversed" appended, and
   370  // for a "reversed" workflow, this is the workflow name with the "_reversed"
   371  // suffix removed.
   372  func ReverseWorkflowName(workflow string) string {
   373  	if strings.HasSuffix(workflow, reverseSuffix) {
   374  		return workflow[:len(workflow)-len(reverseSuffix)]
   375  	}
   376  
   377  	return workflow + reverseSuffix
   378  }
   379  
   380  // Straight copy-paste of encodeString from wrangler/keyspace.go. I want to make
   381  // this public, but it doesn't belong in package workflow. Maybe package sqltypes,
   382  // or maybe package sqlescape?
   383  func encodeString(in string) string {
   384  	buf := bytes.NewBuffer(nil)
   385  	sqltypes.NewVarChar(in).EncodeSQL(buf)
   386  	return buf.String()
   387  }