vitess.io/vitess@v0.16.2/go/vt/vtctl/reparentutil/util.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package reparentutil
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"vitess.io/vitess/go/mysql"
    26  	"vitess.io/vitess/go/vt/concurrency"
    27  	"vitess.io/vitess/go/vt/log"
    28  	"vitess.io/vitess/go/vt/logutil"
    29  	"vitess.io/vitess/go/vt/topo"
    30  	"vitess.io/vitess/go/vt/topo/topoproto"
    31  	"vitess.io/vitess/go/vt/topotools"
    32  	"vitess.io/vitess/go/vt/vtctl/reparentutil/promotionrule"
    33  	"vitess.io/vitess/go/vt/vterrors"
    34  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    35  
    36  	replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata"
    37  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    38  	"vitess.io/vitess/go/vt/proto/vtrpc"
    39  )
    40  
    41  // ChooseNewPrimary finds a tablet that should become a primary after reparent.
    42  // The criteria for the new primary-elect are (preferably) to be in the same
    43  // cell as the current primary, and to be different from avoidPrimaryAlias. The
    44  // tablet with the most advanced replication position is chosen to minimize the
    45  // amount of time spent catching up with the current primary. Further ties are
    46  // broken by the durability rules.
    47  // Note that the search for the most advanced replication position will race
    48  // with transactions being executed on the current primary, so when all tablets
    49  // are at roughly the same position, then the choice of new primary-elect will
    50  // be somewhat unpredictable.
    51  func ChooseNewPrimary(
    52  	ctx context.Context,
    53  	tmc tmclient.TabletManagerClient,
    54  	shardInfo *topo.ShardInfo,
    55  	tabletMap map[string]*topo.TabletInfo,
    56  	avoidPrimaryAlias *topodatapb.TabletAlias,
    57  	waitReplicasTimeout time.Duration,
    58  	durability Durabler,
    59  	// (TODO:@ajm188) it's a little gross we need to pass this, maybe embed in the context?
    60  	logger logutil.Logger,
    61  ) (*topodatapb.TabletAlias, error) {
    62  
    63  	var primaryCell string
    64  	if shardInfo.PrimaryAlias != nil {
    65  		primaryCell = shardInfo.PrimaryAlias.Cell
    66  	}
    67  
    68  	var (
    69  		wg sync.WaitGroup
    70  		// mutex to secure the next two fields from concurrent access
    71  		mu sync.Mutex
    72  		// tablets that are possible candidates to be the new primary and their positions
    73  		validTablets    []*topodatapb.Tablet
    74  		tabletPositions []mysql.Position
    75  	)
    76  
    77  	for _, tablet := range tabletMap {
    78  		switch {
    79  		case primaryCell != "" && tablet.Alias.Cell != primaryCell:
    80  			continue
    81  		case avoidPrimaryAlias != nil && topoproto.TabletAliasEqual(tablet.Alias, avoidPrimaryAlias):
    82  			continue
    83  		case tablet.Tablet.Type != topodatapb.TabletType_REPLICA:
    84  			continue
    85  		}
    86  
    87  		wg.Add(1)
    88  
    89  		go func(tablet *topodatapb.Tablet) {
    90  			defer wg.Done()
    91  			// find and store the positions for the tablet
    92  			pos, err := findPositionForTablet(ctx, tablet, logger, tmc, waitReplicasTimeout)
    93  			mu.Lock()
    94  			defer mu.Unlock()
    95  			if err == nil {
    96  				validTablets = append(validTablets, tablet)
    97  				tabletPositions = append(tabletPositions, pos)
    98  			}
    99  		}(tablet.Tablet)
   100  	}
   101  
   102  	wg.Wait()
   103  
   104  	// return nothing if there are no valid tablets available
   105  	if len(validTablets) == 0 {
   106  		return nil, nil
   107  	}
   108  
   109  	// sort the tablets for finding the best primary
   110  	err := sortTabletsForReparent(validTablets, tabletPositions, durability)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	return validTablets[0].Alias, nil
   116  }
   117  
   118  // findPositionForTablet processes the replication position for a single tablet and
   119  // returns it. It is safe to call from multiple goroutines.
   120  func findPositionForTablet(ctx context.Context, tablet *topodatapb.Tablet, logger logutil.Logger, tmc tmclient.TabletManagerClient, waitTimeout time.Duration) (mysql.Position, error) {
   121  	logger.Infof("getting replication position from %v", topoproto.TabletAliasString(tablet.Alias))
   122  
   123  	ctx, cancel := context.WithTimeout(ctx, waitTimeout)
   124  	defer cancel()
   125  
   126  	status, err := tmc.ReplicationStatus(ctx, tablet)
   127  	if err != nil {
   128  		sqlErr, isSQLErr := mysql.NewSQLErrorFromError(err).(*mysql.SQLError)
   129  		if isSQLErr && sqlErr != nil && sqlErr.Number() == mysql.ERNotReplica {
   130  			logger.Warningf("no replication statue from %v, using empty gtid set", topoproto.TabletAliasString(tablet.Alias))
   131  			return mysql.Position{}, nil
   132  		}
   133  		logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(tablet.Alias), err)
   134  		return mysql.Position{}, err
   135  	}
   136  
   137  	// Use the relay log position if available, otherwise use the executed GTID set (binary log position).
   138  	positionString := status.Position
   139  	if status.RelayLogPosition != "" {
   140  		positionString = status.RelayLogPosition
   141  	}
   142  	pos, err := mysql.DecodePosition(positionString)
   143  	if err != nil {
   144  		logger.Warningf("cannot decode replica position %v for tablet %v, ignoring tablet: %v", positionString, topoproto.TabletAliasString(tablet.Alias), err)
   145  		return mysql.Position{}, err
   146  	}
   147  
   148  	return pos, nil
   149  }
   150  
   151  // FindCurrentPrimary returns the current primary tablet of a shard, if any. The
   152  // current primary is whichever tablet of type PRIMARY (if any) has the most
   153  // recent PrimaryTermStartTime, which is the same rule that vtgate uses to route
   154  // primary traffic.
   155  //
   156  // The return value is nil if the current primary cannot be definitively
   157  // determined. This can happen either if no tablet claims to be type PRIMARY, or
   158  // if multiple tablets claim to be type PRIMARY and happen to have the same
   159  // PrimaryTermStartTime timestamp (a tie).
   160  //
   161  // The tabletMap must be a complete map (not a partial result) for the shard.
   162  func FindCurrentPrimary(tabletMap map[string]*topo.TabletInfo, logger logutil.Logger) *topo.TabletInfo {
   163  	var (
   164  		currentPrimary       *topo.TabletInfo
   165  		currentTermStartTime time.Time
   166  	)
   167  
   168  	for _, tablet := range tabletMap {
   169  		if tablet.Type != topodatapb.TabletType_PRIMARY {
   170  			continue
   171  		}
   172  
   173  		if currentPrimary == nil {
   174  			currentPrimary = tablet
   175  			currentTermStartTime = tablet.GetPrimaryTermStartTime()
   176  			continue
   177  		}
   178  
   179  		otherPrimaryTermStartTime := tablet.GetPrimaryTermStartTime()
   180  		if otherPrimaryTermStartTime.After(currentTermStartTime) {
   181  			currentPrimary = tablet
   182  			currentTermStartTime = otherPrimaryTermStartTime
   183  		} else if otherPrimaryTermStartTime.Equal(currentTermStartTime) {
   184  			// A tie should not happen unless the upgrade order was violated
   185  			// (e.g. some vttablets have not been upgraded) or if we get really
   186  			// unlucky.
   187  			//
   188  			// Either way, we need to be safe and not assume we know who the
   189  			// true primary is.
   190  			logger.Warningf(
   191  				"Multiple primaries (%v and %v) are tied for PrimaryTermStartTime; can't determine the true primary.",
   192  				topoproto.TabletAliasString(currentPrimary.Alias),
   193  				topoproto.TabletAliasString(tablet.Alias),
   194  			)
   195  
   196  			return nil
   197  		}
   198  	}
   199  
   200  	return currentPrimary
   201  }
   202  
   203  // ShardReplicationStatuses returns the ReplicationStatus for each tablet in a shard.
   204  func ShardReplicationStatuses(ctx context.Context, ts *topo.Server, tmc tmclient.TabletManagerClient, keyspace, shard string) ([]*topo.TabletInfo, []*replicationdatapb.Status, error) {
   205  	tabletMap, err := ts.GetTabletMapForShard(ctx, keyspace, shard)
   206  	if err != nil {
   207  		return nil, nil, err
   208  	}
   209  	tablets := topotools.CopyMapValues(tabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo)
   210  
   211  	log.Infof("Gathering tablet replication status for: %v", tablets)
   212  	wg := sync.WaitGroup{}
   213  	rec := concurrency.AllErrorRecorder{}
   214  	result := make([]*replicationdatapb.Status, len(tablets))
   215  
   216  	for i, ti := range tablets {
   217  		// Don't scan tablets that won't return something
   218  		// useful. Otherwise, you'll end up waiting for a timeout.
   219  		if ti.Type == topodatapb.TabletType_PRIMARY {
   220  			wg.Add(1)
   221  			go func(i int, ti *topo.TabletInfo) {
   222  				defer wg.Done()
   223  				pos, err := tmc.PrimaryPosition(ctx, ti.Tablet)
   224  				if err != nil {
   225  					rec.RecordError(fmt.Errorf("PrimaryPosition(%v) failed: %v", ti.AliasString(), err))
   226  					return
   227  				}
   228  				result[i] = &replicationdatapb.Status{
   229  					Position: pos,
   230  				}
   231  			}(i, ti)
   232  		} else if ti.IsReplicaType() {
   233  			wg.Add(1)
   234  			go func(i int, ti *topo.TabletInfo) {
   235  				defer wg.Done()
   236  				status, err := tmc.ReplicationStatus(ctx, ti.Tablet)
   237  				if err != nil {
   238  					rec.RecordError(fmt.Errorf("ReplicationStatus(%v) failed: %v", ti.AliasString(), err))
   239  					return
   240  				}
   241  				result[i] = status
   242  			}(i, ti)
   243  		}
   244  	}
   245  	wg.Wait()
   246  	return tablets, result, rec.Error()
   247  }
   248  
   249  // getValidCandidatesAndPositionsAsList converts the valid candidates from a map to a list of tablets, making it easier to sort
   250  func getValidCandidatesAndPositionsAsList(validCandidates map[string]mysql.Position, tabletMap map[string]*topo.TabletInfo) ([]*topodatapb.Tablet, []mysql.Position, error) {
   251  	var validTablets []*topodatapb.Tablet
   252  	var tabletPositions []mysql.Position
   253  	for tabletAlias, position := range validCandidates {
   254  		tablet, isFound := tabletMap[tabletAlias]
   255  		if !isFound {
   256  			return nil, nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "candidate %v not found in the tablet map; this an impossible situation", tabletAlias)
   257  		}
   258  		validTablets = append(validTablets, tablet.Tablet)
   259  		tabletPositions = append(tabletPositions, position)
   260  	}
   261  	return validTablets, tabletPositions, nil
   262  }
   263  
   264  // restrictValidCandidates is used to restrict some candidates from being considered eligible for becoming the intermediate source or the final promotion candidate
   265  func restrictValidCandidates(validCandidates map[string]mysql.Position, tabletMap map[string]*topo.TabletInfo) (map[string]mysql.Position, error) {
   266  	restrictedValidCandidates := make(map[string]mysql.Position)
   267  	for candidate, position := range validCandidates {
   268  		candidateInfo, ok := tabletMap[candidate]
   269  		if !ok {
   270  			return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "candidate %v not found in the tablet map; this an impossible situation", candidate)
   271  		}
   272  		// We do not allow BACKUP, DRAINED or RESTORE type of tablets to be considered for being the replication source or the candidate for primary
   273  		if topoproto.IsTypeInList(candidateInfo.Type, []topodatapb.TabletType{topodatapb.TabletType_BACKUP, topodatapb.TabletType_RESTORE, topodatapb.TabletType_DRAINED}) {
   274  			continue
   275  		}
   276  		restrictedValidCandidates[candidate] = position
   277  	}
   278  	return restrictedValidCandidates, nil
   279  }
   280  
   281  func findCandidate(
   282  	intermediateSource *topodatapb.Tablet,
   283  	possibleCandidates []*topodatapb.Tablet,
   284  ) *topodatapb.Tablet {
   285  	// check whether the one we have selected as the source belongs to the candidate list provided
   286  	for _, candidate := range possibleCandidates {
   287  		if topoproto.TabletAliasEqual(intermediateSource.Alias, candidate.Alias) {
   288  			return candidate
   289  		}
   290  	}
   291  	// return the first candidate from this list, if it isn't empty
   292  	if len(possibleCandidates) > 0 {
   293  		return possibleCandidates[0]
   294  	}
   295  	return nil
   296  }
   297  
   298  // getTabletsWithPromotionRules gets the tablets with the given promotion rule from the list of tablets
   299  func getTabletsWithPromotionRules(durability Durabler, tablets []*topodatapb.Tablet, rule promotionrule.CandidatePromotionRule) (res []*topodatapb.Tablet) {
   300  	for _, candidate := range tablets {
   301  		promotionRule := PromotionRule(durability, candidate)
   302  		if promotionRule == rule {
   303  			res = append(res, candidate)
   304  		}
   305  	}
   306  	return res
   307  }
   308  
   309  // waitForCatchUp is used to wait for the given tablet until it has caught up to the source
   310  func waitForCatchUp(
   311  	ctx context.Context,
   312  	tmc tmclient.TabletManagerClient,
   313  	logger logutil.Logger,
   314  	newPrimary *topodatapb.Tablet,
   315  	source *topodatapb.Tablet,
   316  	waitTime time.Duration,
   317  ) error {
   318  	logger.Infof("waiting for %v to catch up to %v", newPrimary.Alias, source.Alias)
   319  	// Find the primary position of the previous primary
   320  	pos, err := tmc.PrimaryPosition(ctx, source)
   321  	if err != nil {
   322  		return err
   323  	}
   324  
   325  	// Wait until the new primary has caught upto that position
   326  	waitForPosCtx, cancelFunc := context.WithTimeout(ctx, waitTime)
   327  	defer cancelFunc()
   328  	err = tmc.WaitForPosition(waitForPosCtx, newPrimary, pos)
   329  	if err != nil {
   330  		return err
   331  	}
   332  	return nil
   333  }