vitess.io/vitess@v0.16.2/go/vt/wrangler/resharder.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package wrangler
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	"google.golang.org/protobuf/encoding/prototext"
    27  
    28  	"vitess.io/vitess/go/vt/log"
    29  	"vitess.io/vitess/go/vt/schema"
    30  	"vitess.io/vitess/go/vt/vtctl/workflow"
    31  
    32  	"vitess.io/vitess/go/sqltypes"
    33  	"vitess.io/vitess/go/vt/binlog/binlogplayer"
    34  	"vitess.io/vitess/go/vt/concurrency"
    35  	"vitess.io/vitess/go/vt/key"
    36  	binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
    37  	vschemapb "vitess.io/vitess/go/vt/proto/vschema"
    38  	"vitess.io/vitess/go/vt/topo"
    39  	"vitess.io/vitess/go/vt/topotools"
    40  	"vitess.io/vitess/go/vt/vterrors"
    41  	"vitess.io/vitess/go/vt/vtgate/vindexes"
    42  	"vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication"
    43  )
    44  
    45  type resharder struct {
    46  	wr                 *Wrangler
    47  	keyspace           string
    48  	workflow           string
    49  	sourceShards       []*topo.ShardInfo
    50  	sourcePrimaries    map[string]*topo.TabletInfo
    51  	targetShards       []*topo.ShardInfo
    52  	targetPrimaries    map[string]*topo.TabletInfo
    53  	vschema            *vschemapb.Keyspace
    54  	refStreams         map[string]*refStream
    55  	cell               string //single cell or cellsAlias or comma-separated list of cells/cellsAliases
    56  	tabletTypes        string
    57  	stopAfterCopy      bool
    58  	onDDL              string
    59  	deferSecondaryKeys bool
    60  }
    61  
    62  type refStream struct {
    63  	workflow    string
    64  	bls         *binlogdatapb.BinlogSource
    65  	cell        string
    66  	tabletTypes string
    67  }
    68  
    69  // Reshard initiates a resharding workflow.
    70  func (wr *Wrangler) Reshard(ctx context.Context, keyspace, workflow string, sources, targets []string,
    71  	skipSchemaCopy bool, cell, tabletTypes, onDDL string, autoStart, stopAfterCopy, deferSecondaryKeys bool) error {
    72  	if err := wr.validateNewWorkflow(ctx, keyspace, workflow); err != nil {
    73  		return err
    74  	}
    75  	if err := wr.ts.ValidateSrvKeyspace(ctx, keyspace, cell); err != nil {
    76  		err2 := vterrors.Wrapf(err, "SrvKeyspace for keyspace %s is corrupt in cell %s", keyspace, cell)
    77  		log.Errorf("%w", err2)
    78  		return err2
    79  	}
    80  
    81  	rs, err := wr.buildResharder(ctx, keyspace, workflow, sources, targets, cell, tabletTypes)
    82  	if err != nil {
    83  		return vterrors.Wrap(err, "buildResharder")
    84  	}
    85  
    86  	rs.onDDL = onDDL
    87  	rs.stopAfterCopy = stopAfterCopy
    88  	rs.deferSecondaryKeys = deferSecondaryKeys
    89  	if !skipSchemaCopy {
    90  		if err := rs.copySchema(ctx); err != nil {
    91  			return vterrors.Wrap(err, "copySchema")
    92  		}
    93  	}
    94  	if err := rs.createStreams(ctx); err != nil {
    95  		return vterrors.Wrap(err, "createStreams")
    96  	}
    97  
    98  	if autoStart {
    99  		if err := rs.startStreams(ctx); err != nil {
   100  			return vterrors.Wrap(err, "startStreams")
   101  		}
   102  	} else {
   103  		wr.Logger().Infof("Streams will not be started since -auto_start is set to false")
   104  	}
   105  	return nil
   106  }
   107  
   108  func (wr *Wrangler) buildResharder(ctx context.Context, keyspace, workflow string, sources, targets []string, cell, tabletTypes string) (*resharder, error) {
   109  	rs := &resharder{
   110  		wr:              wr,
   111  		keyspace:        keyspace,
   112  		workflow:        workflow,
   113  		sourcePrimaries: make(map[string]*topo.TabletInfo),
   114  		targetPrimaries: make(map[string]*topo.TabletInfo),
   115  		cell:            cell,
   116  		tabletTypes:     tabletTypes,
   117  	}
   118  	for _, shard := range sources {
   119  		si, err := wr.ts.GetShard(ctx, keyspace, shard)
   120  		if err != nil {
   121  			return nil, vterrors.Wrapf(err, "GetShard(%s) failed", shard)
   122  		}
   123  		if !si.IsPrimaryServing {
   124  			return nil, fmt.Errorf("source shard %v is not in serving state", shard)
   125  		}
   126  		rs.sourceShards = append(rs.sourceShards, si)
   127  		primary, err := wr.ts.GetTablet(ctx, si.PrimaryAlias)
   128  		if err != nil {
   129  			return nil, vterrors.Wrapf(err, "GetTablet(%s) failed", si.PrimaryAlias)
   130  		}
   131  		rs.sourcePrimaries[si.ShardName()] = primary
   132  	}
   133  	for _, shard := range targets {
   134  		si, err := wr.ts.GetShard(ctx, keyspace, shard)
   135  		if err != nil {
   136  			return nil, vterrors.Wrapf(err, "GetShard(%s) failed", shard)
   137  		}
   138  		if si.IsPrimaryServing {
   139  			return nil, fmt.Errorf("target shard %v is in serving state", shard)
   140  		}
   141  		rs.targetShards = append(rs.targetShards, si)
   142  		primary, err := wr.ts.GetTablet(ctx, si.PrimaryAlias)
   143  		if err != nil {
   144  			return nil, vterrors.Wrapf(err, "GetTablet(%s) failed", si.PrimaryAlias)
   145  		}
   146  		rs.targetPrimaries[si.ShardName()] = primary
   147  	}
   148  	if err := topotools.ValidateForReshard(rs.sourceShards, rs.targetShards); err != nil {
   149  		return nil, vterrors.Wrap(err, "ValidateForReshard")
   150  	}
   151  	if err := rs.validateTargets(ctx); err != nil {
   152  		return nil, vterrors.Wrap(err, "validateTargets")
   153  	}
   154  
   155  	vschema, err := wr.ts.GetVSchema(ctx, keyspace)
   156  	if err != nil {
   157  		return nil, vterrors.Wrap(err, "GetVSchema")
   158  	}
   159  	rs.vschema = vschema
   160  
   161  	if err := rs.readRefStreams(ctx); err != nil {
   162  		return nil, vterrors.Wrap(err, "readRefStreams")
   163  	}
   164  	return rs, nil
   165  }
   166  
   167  func (rs *resharder) validateTargets(ctx context.Context) error {
   168  	err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error {
   169  		targetPrimary := rs.targetPrimaries[target.ShardName()]
   170  		query := fmt.Sprintf("select 1 from _vt.vreplication where db_name=%s", encodeString(targetPrimary.DbName()))
   171  		p3qr, err := rs.wr.tmc.VReplicationExec(ctx, targetPrimary.Tablet, query)
   172  		if err != nil {
   173  			return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", targetPrimary.Tablet, query)
   174  		}
   175  		if len(p3qr.Rows) != 0 {
   176  			return errors.New("some streams already exist in the target shards, please clean them up and retry the command")
   177  		}
   178  		return nil
   179  	})
   180  	return err
   181  }
   182  
   183  func (rs *resharder) readRefStreams(ctx context.Context) error {
   184  	var mu sync.Mutex
   185  	err := rs.forAll(rs.sourceShards, func(source *topo.ShardInfo) error {
   186  		sourcePrimary := rs.sourcePrimaries[source.ShardName()]
   187  
   188  		query := fmt.Sprintf("select workflow, source, cell, tablet_types from _vt.vreplication where db_name=%s and message != 'FROZEN'", encodeString(sourcePrimary.DbName()))
   189  		p3qr, err := rs.wr.tmc.VReplicationExec(ctx, sourcePrimary.Tablet, query)
   190  		if err != nil {
   191  			return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", sourcePrimary.Tablet, query)
   192  		}
   193  		qr := sqltypes.Proto3ToResult(p3qr)
   194  
   195  		mu.Lock()
   196  		defer mu.Unlock()
   197  
   198  		mustCreate := false
   199  		var ref map[string]bool
   200  		if rs.refStreams == nil {
   201  			rs.refStreams = make(map[string]*refStream)
   202  			mustCreate = true
   203  		} else {
   204  			// Copy the ref streams for comparison.
   205  			ref = make(map[string]bool, len(rs.refStreams))
   206  			for k := range rs.refStreams {
   207  				ref[k] = true
   208  			}
   209  		}
   210  		for _, row := range qr.Rows {
   211  
   212  			workflow := row[0].ToString()
   213  			if workflow == "" {
   214  				return fmt.Errorf("VReplication streams must have named workflows for migration: shard: %s:%s", source.Keyspace(), source.ShardName())
   215  			}
   216  			var bls binlogdatapb.BinlogSource
   217  			rowBytes, err := row[1].ToBytes()
   218  			if err != nil {
   219  				return err
   220  			}
   221  			if err := prototext.Unmarshal(rowBytes, &bls); err != nil {
   222  				return vterrors.Wrapf(err, "prototext.Unmarshal: %v", row)
   223  			}
   224  			isReference, err := rs.blsIsReference(&bls)
   225  			if err != nil {
   226  				return vterrors.Wrap(err, "blsIsReference")
   227  			}
   228  			if !isReference {
   229  				continue
   230  			}
   231  			key := fmt.Sprintf("%s:%s:%s", workflow, bls.Keyspace, bls.Shard)
   232  			if mustCreate {
   233  				rs.refStreams[key] = &refStream{
   234  					workflow:    workflow,
   235  					bls:         &bls,
   236  					cell:        row[2].ToString(),
   237  					tabletTypes: row[3].ToString(),
   238  				}
   239  			} else {
   240  				if !ref[key] {
   241  					return fmt.Errorf("streams are mismatched across source shards for workflow: %s", workflow)
   242  				}
   243  				delete(ref, key)
   244  			}
   245  		}
   246  		if len(ref) != 0 {
   247  			return fmt.Errorf("streams are mismatched across source shards: %v", ref)
   248  		}
   249  		return nil
   250  	})
   251  	return err
   252  }
   253  
   254  // blsIsReference is partially copied from streamMigrater.templatize.
   255  // It reuses the constants from that function also.
   256  func (rs *resharder) blsIsReference(bls *binlogdatapb.BinlogSource) (bool, error) {
   257  	streamType := workflow.StreamTypeUnknown
   258  	for _, rule := range bls.Filter.Rules {
   259  		typ, err := rs.identifyRuleType(rule)
   260  		if err != nil {
   261  			return false, err
   262  		}
   263  
   264  		switch typ {
   265  		case workflow.StreamTypeSharded:
   266  			if streamType == workflow.StreamTypeReference {
   267  				return false, fmt.Errorf("cannot reshard streams with a mix of reference and sharded tables: %v", bls)
   268  			}
   269  			streamType = workflow.StreamTypeSharded
   270  		case workflow.StreamTypeReference:
   271  			if streamType == workflow.StreamTypeSharded {
   272  				return false, fmt.Errorf("cannot reshard streams with a mix of reference and sharded tables: %v", bls)
   273  			}
   274  			streamType = workflow.StreamTypeReference
   275  		}
   276  	}
   277  	return streamType == workflow.StreamTypeReference, nil
   278  }
   279  
   280  func (rs *resharder) identifyRuleType(rule *binlogdatapb.Rule) (workflow.StreamType, error) {
   281  	vtable, ok := rs.vschema.Tables[rule.Match]
   282  	if !ok && !schema.IsInternalOperationTableName(rule.Match) {
   283  		return 0, fmt.Errorf("table %v not found in vschema", rule.Match)
   284  	}
   285  	if vtable != nil && vtable.Type == vindexes.TypeReference {
   286  		return workflow.StreamTypeReference, nil
   287  	}
   288  	// In this case, 'sharded' means that it's not a reference
   289  	// table. We don't care about any other subtleties.
   290  	return workflow.StreamTypeSharded, nil
   291  }
   292  
   293  func (rs *resharder) copySchema(ctx context.Context) error {
   294  	oneSource := rs.sourceShards[0].PrimaryAlias
   295  	err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error {
   296  		return rs.wr.CopySchemaShard(ctx, oneSource, []string{"/.*"}, nil, false, rs.keyspace, target.ShardName(), 1*time.Second, false)
   297  	})
   298  	return err
   299  }
   300  
   301  func (rs *resharder) createStreams(ctx context.Context) error {
   302  	var excludeRules []*binlogdatapb.Rule
   303  	for tableName, table := range rs.vschema.Tables {
   304  		if table.Type == vindexes.TypeReference {
   305  			excludeRules = append(excludeRules, &binlogdatapb.Rule{
   306  				Match:  tableName,
   307  				Filter: "exclude",
   308  			})
   309  		}
   310  	}
   311  
   312  	err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error {
   313  		targetPrimary := rs.targetPrimaries[target.ShardName()]
   314  
   315  		ig := vreplication.NewInsertGenerator(binlogplayer.BlpStopped, targetPrimary.DbName())
   316  
   317  		// copy excludeRules to prevent data race.
   318  		copyExcludeRules := append([]*binlogdatapb.Rule(nil), excludeRules...)
   319  		for _, source := range rs.sourceShards {
   320  			if !key.KeyRangesIntersect(target.KeyRange, source.KeyRange) {
   321  				continue
   322  			}
   323  			filter := &binlogdatapb.Filter{
   324  				Rules: append(copyExcludeRules, &binlogdatapb.Rule{
   325  					Match:  "/.*",
   326  					Filter: key.KeyRangeString(target.KeyRange),
   327  				}),
   328  			}
   329  			bls := &binlogdatapb.BinlogSource{
   330  				Keyspace:      rs.keyspace,
   331  				Shard:         source.ShardName(),
   332  				Filter:        filter,
   333  				StopAfterCopy: rs.stopAfterCopy,
   334  				OnDdl:         binlogdatapb.OnDDLAction(binlogdatapb.OnDDLAction_value[rs.onDDL]),
   335  			}
   336  			ig.AddRow(rs.workflow, bls, "", rs.cell, rs.tabletTypes,
   337  				int64(binlogdatapb.VReplicationWorkflowType_Reshard),
   338  				int64(binlogdatapb.VReplicationWorkflowSubType_None),
   339  				rs.deferSecondaryKeys)
   340  		}
   341  
   342  		for _, rstream := range rs.refStreams {
   343  			ig.AddRow(rstream.workflow, rstream.bls, "", rstream.cell, rstream.tabletTypes,
   344  				//todo: fix based on original stream
   345  				int64(binlogdatapb.VReplicationWorkflowType_Reshard),
   346  				int64(binlogdatapb.VReplicationWorkflowSubType_None),
   347  				rs.deferSecondaryKeys)
   348  		}
   349  		query := ig.String()
   350  		if _, err := rs.wr.tmc.VReplicationExec(ctx, targetPrimary.Tablet, query); err != nil {
   351  			return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", targetPrimary.Tablet, query)
   352  		}
   353  		return nil
   354  	})
   355  
   356  	return err
   357  }
   358  
   359  func (rs *resharder) startStreams(ctx context.Context) error {
   360  	err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error {
   361  		targetPrimary := rs.targetPrimaries[target.ShardName()]
   362  		query := fmt.Sprintf("update _vt.vreplication set state='Running' where db_name=%s", encodeString(targetPrimary.DbName()))
   363  		if _, err := rs.wr.tmc.VReplicationExec(ctx, targetPrimary.Tablet, query); err != nil {
   364  			return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", targetPrimary.Tablet, query)
   365  		}
   366  		return nil
   367  	})
   368  	return err
   369  }
   370  
   371  func (rs *resharder) forAll(shards []*topo.ShardInfo, f func(*topo.ShardInfo) error) error {
   372  	var wg sync.WaitGroup
   373  	allErrors := &concurrency.AllErrorRecorder{}
   374  	for _, shard := range shards {
   375  		wg.Add(1)
   376  		go func(shard *topo.ShardInfo) {
   377  			defer wg.Done()
   378  
   379  			if err := f(shard); err != nil {
   380  				allErrors.RecordError(err)
   381  			}
   382  		}(shard)
   383  	}
   384  	wg.Wait()
   385  	return allErrors.AggrError(vterrors.Aggregate)
   386  }