vitess.io/vitess@v0.16.2/go/vt/vtctl/reparentutil/replication.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package reparentutil
    18  
    19  import (
    20  	"context"
    21  	"sync"
    22  	"time"
    23  
    24  	"k8s.io/apimachinery/pkg/util/sets"
    25  
    26  	"vitess.io/vitess/go/event"
    27  	"vitess.io/vitess/go/mysql"
    28  	"vitess.io/vitess/go/vt/concurrency"
    29  	"vitess.io/vitess/go/vt/log"
    30  	"vitess.io/vitess/go/vt/logutil"
    31  	replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata"
    32  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    33  	"vitess.io/vitess/go/vt/proto/vtrpc"
    34  	"vitess.io/vitess/go/vt/topo"
    35  	"vitess.io/vitess/go/vt/topo/topoproto"
    36  	"vitess.io/vitess/go/vt/topotools"
    37  	"vitess.io/vitess/go/vt/topotools/events"
    38  	"vitess.io/vitess/go/vt/vterrors"
    39  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    40  )
    41  
    42  // FindValidEmergencyReparentCandidates will find candidates for an emergency
    43  // reparent, and, if successful, return a mapping of those tablet aliases (as
    44  // raw strings) to their replication positions for later comparison.
    45  func FindValidEmergencyReparentCandidates(
    46  	statusMap map[string]*replicationdatapb.StopReplicationStatus,
    47  	primaryStatusMap map[string]*replicationdatapb.PrimaryStatus,
    48  ) (map[string]mysql.Position, error) {
    49  	replicationStatusMap := make(map[string]*mysql.ReplicationStatus, len(statusMap))
    50  	positionMap := make(map[string]mysql.Position)
    51  
    52  	// Build out replication status list from proto types.
    53  	for alias, statuspb := range statusMap {
    54  		status := mysql.ProtoToReplicationStatus(statuspb.After)
    55  		replicationStatusMap[alias] = &status
    56  	}
    57  
    58  	// Determine if we're GTID-based. If we are, we'll need to look for errant
    59  	// GTIDs below.
    60  	var (
    61  		isGTIDBased                bool
    62  		isNonGTIDBased             bool
    63  		emptyRelayPosErrorRecorder concurrency.FirstErrorRecorder
    64  	)
    65  
    66  	for alias, status := range replicationStatusMap {
    67  		if _, ok := status.RelayLogPosition.GTIDSet.(mysql.Mysql56GTIDSet); ok {
    68  			isGTIDBased = true
    69  		} else {
    70  			isNonGTIDBased = true
    71  		}
    72  
    73  		if status.RelayLogPosition.IsZero() {
    74  			// Potentially bail. If any other tablet is detected to have
    75  			// GTID-based relay log positions, we will return the error recorded
    76  			// here.
    77  			emptyRelayPosErrorRecorder.RecordError(vterrors.Errorf(vtrpc.Code_UNAVAILABLE, "encountered tablet %v with no relay log position, when at least one other tablet in the status map has GTID based relay log positions", alias))
    78  		}
    79  	}
    80  
    81  	if isGTIDBased && emptyRelayPosErrorRecorder.HasErrors() {
    82  		return nil, emptyRelayPosErrorRecorder.Error()
    83  	}
    84  
    85  	if isGTIDBased && isNonGTIDBased {
    86  		return nil, vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "encountered mix of GTID-based and non GTID-based relay logs")
    87  	}
    88  
    89  	// Create relevant position list of errant GTID-based positions for later
    90  	// comparison.
    91  	for alias, status := range replicationStatusMap {
    92  		// If we're not GTID-based, no need to search for errant GTIDs, so just
    93  		// add the position to the map and continue.
    94  		if !isGTIDBased {
    95  			positionMap[alias] = status.Position
    96  
    97  			continue
    98  		}
    99  
   100  		// This condition should really never happen, since we did the same cast
   101  		// in the earlier loop, but let's be doubly sure.
   102  		relayLogGTIDSet, ok := status.RelayLogPosition.GTIDSet.(mysql.Mysql56GTIDSet)
   103  		if !ok {
   104  			return nil, vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "we got a filled-in relay log position, but it's not of type Mysql56GTIDSet, even though we've determined we need to use GTID based assesment")
   105  		}
   106  
   107  		// We need to remove this alias's status from the list, otherwise the
   108  		// GTID diff will always be empty.
   109  		statusList := make([]*mysql.ReplicationStatus, 0, len(replicationStatusMap)-1)
   110  
   111  		for a, s := range replicationStatusMap {
   112  			if a != alias {
   113  				statusList = append(statusList, s)
   114  			}
   115  		}
   116  
   117  		errantGTIDs, err := status.FindErrantGTIDs(statusList)
   118  		switch {
   119  		case err != nil:
   120  			// Could not look up GTIDs to determine if we have any. It's not
   121  			// safe to continue.
   122  			return nil, err
   123  		case len(errantGTIDs) != 0:
   124  			// This tablet has errant GTIDs. It's not a valid candidate for
   125  			// reparent, so don't insert it into the final mapping.
   126  			log.Errorf("skipping %v because we detected errant GTIDs - %v", alias, errantGTIDs)
   127  			continue
   128  		}
   129  
   130  		pos := mysql.Position{GTIDSet: relayLogGTIDSet}
   131  		positionMap[alias] = pos
   132  	}
   133  
   134  	for alias, primaryStatus := range primaryStatusMap {
   135  		executedPosition, err := mysql.DecodePosition(primaryStatus.Position)
   136  		if err != nil {
   137  			return nil, vterrors.Wrapf(err, "could not decode a primary status executed position for tablet %v: %v", alias, err)
   138  		}
   139  
   140  		positionMap[alias] = executedPosition
   141  	}
   142  
   143  	return positionMap, nil
   144  }
   145  
   146  // ReplicaWasRunning returns true if a StopReplicationStatus indicates that the
   147  // replica had running replication threads before being stopped. It returns an
   148  // error if the Before state of replication is nil.
   149  func ReplicaWasRunning(stopStatus *replicationdatapb.StopReplicationStatus) (bool, error) {
   150  	if stopStatus == nil || stopStatus.Before == nil {
   151  		return false, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "could not determine Before state of StopReplicationStatus %v", stopStatus)
   152  	}
   153  
   154  	replStatus := mysql.ProtoToReplicationStatus(stopStatus.Before)
   155  	return (replStatus.IOState == mysql.ReplicationStateRunning) ||
   156  		(replStatus.SQLState == mysql.ReplicationStateRunning), nil
   157  }
   158  
   159  // SQLThreadWasRunning returns true if a StopReplicationStatus indicates that the
   160  // replica had a running sql thread. It returns an
   161  // error if the Before state of replication is nil.
   162  func SQLThreadWasRunning(stopStatus *replicationdatapb.StopReplicationStatus) (bool, error) {
   163  	if stopStatus == nil || stopStatus.Before == nil {
   164  		return false, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "could not determine Before state of StopReplicationStatus %v", stopStatus)
   165  	}
   166  
   167  	replStatus := mysql.ProtoToReplicationStatus(stopStatus.Before)
   168  	return replStatus.SQLState == mysql.ReplicationStateRunning, nil
   169  }
   170  
   171  // SetReplicationSource is used to set the replication source on the specified
   172  // tablet to the current shard primary (if available). It also figures out if
   173  // the tablet should be sending semi-sync ACKs or not and passes that to the
   174  // tabletmanager RPC.
   175  //
   176  // It does not start the replication forcefully.
   177  // If we are unable to find the shard primary of the tablet from the topo server
   178  // we exit out without any error.
   179  func SetReplicationSource(ctx context.Context, ts *topo.Server, tmc tmclient.TabletManagerClient, tablet *topodatapb.Tablet) error {
   180  	shardPrimary, err := topotools.GetShardPrimaryForTablet(ctx, ts, tablet)
   181  	if err != nil {
   182  		// If we didn't find the shard primary, we return without any error
   183  		return nil
   184  	}
   185  
   186  	durabilityName, err := ts.GetKeyspaceDurability(ctx, tablet.Keyspace)
   187  	if err != nil {
   188  		return err
   189  	}
   190  	log.Infof("Getting a new durability policy for %v", durabilityName)
   191  	durability, err := GetDurabilityPolicy(durabilityName)
   192  	if err != nil {
   193  		return err
   194  	}
   195  
   196  	isSemiSync := IsReplicaSemiSync(durability, shardPrimary.Tablet, tablet)
   197  	return tmc.SetReplicationSource(ctx, tablet, shardPrimary.Alias, 0, "", false, isSemiSync)
   198  }
   199  
   200  // replicationSnapshot stores the status maps and the tablets that were reachable
   201  // when trying to stopReplicationAndBuildStatusMaps.
   202  type replicationSnapshot struct {
   203  	statusMap        map[string]*replicationdatapb.StopReplicationStatus
   204  	primaryStatusMap map[string]*replicationdatapb.PrimaryStatus
   205  	reachableTablets []*topodatapb.Tablet
   206  }
   207  
   208  // stopReplicationAndBuildStatusMaps stops replication on all replicas, then
   209  // collects and returns a mapping of TabletAlias (as string) to their current
   210  // replication positions.
   211  // Apart from the status maps, it also returns the tablets reached as a list
   212  func stopReplicationAndBuildStatusMaps(
   213  	ctx context.Context,
   214  	tmc tmclient.TabletManagerClient,
   215  	ev *events.Reparent,
   216  	tabletMap map[string]*topo.TabletInfo,
   217  	stopReplicationTimeout time.Duration,
   218  	ignoredTablets sets.Set[string],
   219  	tabletToWaitFor *topodatapb.TabletAlias,
   220  	durability Durabler,
   221  	logger logutil.Logger,
   222  ) (*replicationSnapshot, error) {
   223  	event.DispatchUpdate(ev, "stop replication on all replicas")
   224  
   225  	var (
   226  		m          sync.Mutex
   227  		errChan    = make(chan concurrency.Error)
   228  		allTablets []*topodatapb.Tablet
   229  		res        = &replicationSnapshot{
   230  			statusMap:        map[string]*replicationdatapb.StopReplicationStatus{},
   231  			primaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
   232  			reachableTablets: []*topodatapb.Tablet{},
   233  		}
   234  	)
   235  
   236  	groupCtx, groupCancel := context.WithTimeout(ctx, stopReplicationTimeout)
   237  	defer groupCancel()
   238  
   239  	fillStatus := func(alias string, tabletInfo *topo.TabletInfo, mustWaitForTablet bool) {
   240  		var concurrencyErr concurrency.Error
   241  		var err error
   242  		defer func() {
   243  			concurrencyErr.Err = err
   244  			concurrencyErr.MustWaitFor = mustWaitForTablet
   245  			errChan <- concurrencyErr
   246  		}()
   247  
   248  		logger.Infof("getting replication position from %v", alias)
   249  
   250  		stopReplicationStatus, err := tmc.StopReplicationAndGetStatus(groupCtx, tabletInfo.Tablet, replicationdatapb.StopReplicationMode_IOTHREADONLY)
   251  		if err != nil {
   252  			sqlErr, isSQLErr := mysql.NewSQLErrorFromError(err).(*mysql.SQLError)
   253  			if isSQLErr && sqlErr != nil && sqlErr.Number() == mysql.ERNotReplica {
   254  				var primaryStatus *replicationdatapb.PrimaryStatus
   255  
   256  				primaryStatus, err = tmc.DemotePrimary(groupCtx, tabletInfo.Tablet)
   257  				if err != nil {
   258  					msg := "replica %v thinks it's primary but we failed to demote it: %v"
   259  					err = vterrors.Wrapf(err, msg, alias, err)
   260  
   261  					logger.Warningf(msg, alias, err)
   262  					return
   263  				}
   264  
   265  				m.Lock()
   266  				res.primaryStatusMap[alias] = primaryStatus
   267  				res.reachableTablets = append(res.reachableTablets, tabletInfo.Tablet)
   268  				m.Unlock()
   269  			} else {
   270  				logger.Warningf("failed to get replication status from %v: %v", alias, err)
   271  				err = vterrors.Wrapf(err, "error when getting replication status for alias %v: %v", alias, err)
   272  			}
   273  		} else {
   274  			var sqlThreadRunning bool
   275  			// Check if the sql thread was running for the tablet
   276  			sqlThreadRunning, err = SQLThreadWasRunning(stopReplicationStatus)
   277  			if err == nil {
   278  				// If the sql thread was running, then we will add the tablet to the status map and the list of
   279  				// reachable tablets.
   280  				if sqlThreadRunning {
   281  					m.Lock()
   282  					res.statusMap[alias] = stopReplicationStatus
   283  					res.reachableTablets = append(res.reachableTablets, tabletInfo.Tablet)
   284  					m.Unlock()
   285  				} else {
   286  					// If the sql thread was stopped, we do not consider the tablet as reachable
   287  					// The user must either explicitly ignore this tablet or start its replication
   288  					logger.Warningf("sql thread stopped on tablet - %v", alias)
   289  					err = vterrors.New(vtrpc.Code_FAILED_PRECONDITION, "sql thread stopped on tablet - "+alias)
   290  				}
   291  			}
   292  		}
   293  	}
   294  
   295  	tabletAliasToWaitFor := ""
   296  	numErrorsToWaitFor := 0
   297  	if tabletToWaitFor != nil {
   298  		tabletAliasToWaitFor = topoproto.TabletAliasString(tabletToWaitFor)
   299  	}
   300  	for alias, tabletInfo := range tabletMap {
   301  		allTablets = append(allTablets, tabletInfo.Tablet)
   302  		if !ignoredTablets.Has(alias) {
   303  			mustWaitFor := tabletAliasToWaitFor == alias
   304  			if mustWaitFor {
   305  				numErrorsToWaitFor++
   306  			}
   307  			go fillStatus(alias, tabletInfo, mustWaitFor)
   308  		}
   309  	}
   310  
   311  	errgroup := concurrency.ErrorGroup{
   312  		NumGoroutines:        len(tabletMap) - ignoredTablets.Len(),
   313  		NumRequiredSuccesses: len(tabletMap) - ignoredTablets.Len() - 1,
   314  		NumAllowedErrors:     len(tabletMap), // We set the number of allowed errors to a very high value, because we don't want to exit early
   315  		// even in case of multiple failures. We rely on the revoke function below to determine if we have more failures than we can tolerate
   316  		NumErrorsToWaitFor: numErrorsToWaitFor,
   317  	}
   318  
   319  	errRecorder := errgroup.Wait(groupCancel, errChan)
   320  	if len(errRecorder.Errors) <= 1 {
   321  		return res, nil
   322  	}
   323  	// check that the tablets we were able to reach are sufficient for us to guarantee that no new write will be accepted by any tablet
   324  	revokeSuccessful := haveRevoked(durability, res.reachableTablets, allTablets)
   325  	if !revokeSuccessful {
   326  		return nil, vterrors.Wrapf(errRecorder.Error(), "could not reach sufficient tablets to guarantee safety: %v", errRecorder.Error())
   327  	}
   328  
   329  	return res, nil
   330  }
   331  
   332  // WaitForRelayLogsToApply blocks execution waiting for the given tablet's relay
   333  // logs to apply, unless the specified context is canceled or exceeded.
   334  // Typically a caller will set a timeout of WaitReplicasTimeout on a context and
   335  // use that context with this function.
   336  func WaitForRelayLogsToApply(ctx context.Context, tmc tmclient.TabletManagerClient, tabletInfo *topo.TabletInfo, status *replicationdatapb.StopReplicationStatus) error {
   337  	switch status.After.RelayLogPosition {
   338  	case "":
   339  		return tmc.WaitForPosition(ctx, tabletInfo.Tablet, status.After.RelayLogSourceBinlogEquivalentPosition)
   340  	default:
   341  		return tmc.WaitForPosition(ctx, tabletInfo.Tablet, status.After.RelayLogPosition)
   342  	}
   343  }