vitess.io/vitess@v0.16.2/go/vt/vtctl/reparentutil/util.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package reparentutil 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "vitess.io/vitess/go/mysql" 26 "vitess.io/vitess/go/vt/concurrency" 27 "vitess.io/vitess/go/vt/log" 28 "vitess.io/vitess/go/vt/logutil" 29 "vitess.io/vitess/go/vt/topo" 30 "vitess.io/vitess/go/vt/topo/topoproto" 31 "vitess.io/vitess/go/vt/topotools" 32 "vitess.io/vitess/go/vt/vtctl/reparentutil/promotionrule" 33 "vitess.io/vitess/go/vt/vterrors" 34 "vitess.io/vitess/go/vt/vttablet/tmclient" 35 36 replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" 37 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 38 "vitess.io/vitess/go/vt/proto/vtrpc" 39 ) 40 41 // ChooseNewPrimary finds a tablet that should become a primary after reparent. 42 // The criteria for the new primary-elect are (preferably) to be in the same 43 // cell as the current primary, and to be different from avoidPrimaryAlias. The 44 // tablet with the most advanced replication position is chosen to minimize the 45 // amount of time spent catching up with the current primary. Further ties are 46 // broken by the durability rules. 47 // Note that the search for the most advanced replication position will race 48 // with transactions being executed on the current primary, so when all tablets 49 // are at roughly the same position, then the choice of new primary-elect will 50 // be somewhat unpredictable. 51 func ChooseNewPrimary( 52 ctx context.Context, 53 tmc tmclient.TabletManagerClient, 54 shardInfo *topo.ShardInfo, 55 tabletMap map[string]*topo.TabletInfo, 56 avoidPrimaryAlias *topodatapb.TabletAlias, 57 waitReplicasTimeout time.Duration, 58 durability Durabler, 59 // (TODO:@ajm188) it's a little gross we need to pass this, maybe embed in the context? 60 logger logutil.Logger, 61 ) (*topodatapb.TabletAlias, error) { 62 63 var primaryCell string 64 if shardInfo.PrimaryAlias != nil { 65 primaryCell = shardInfo.PrimaryAlias.Cell 66 } 67 68 var ( 69 wg sync.WaitGroup 70 // mutex to secure the next two fields from concurrent access 71 mu sync.Mutex 72 // tablets that are possible candidates to be the new primary and their positions 73 validTablets []*topodatapb.Tablet 74 tabletPositions []mysql.Position 75 ) 76 77 for _, tablet := range tabletMap { 78 switch { 79 case primaryCell != "" && tablet.Alias.Cell != primaryCell: 80 continue 81 case avoidPrimaryAlias != nil && topoproto.TabletAliasEqual(tablet.Alias, avoidPrimaryAlias): 82 continue 83 case tablet.Tablet.Type != topodatapb.TabletType_REPLICA: 84 continue 85 } 86 87 wg.Add(1) 88 89 go func(tablet *topodatapb.Tablet) { 90 defer wg.Done() 91 // find and store the positions for the tablet 92 pos, err := findPositionForTablet(ctx, tablet, logger, tmc, waitReplicasTimeout) 93 mu.Lock() 94 defer mu.Unlock() 95 if err == nil { 96 validTablets = append(validTablets, tablet) 97 tabletPositions = append(tabletPositions, pos) 98 } 99 }(tablet.Tablet) 100 } 101 102 wg.Wait() 103 104 // return nothing if there are no valid tablets available 105 if len(validTablets) == 0 { 106 return nil, nil 107 } 108 109 // sort the tablets for finding the best primary 110 err := sortTabletsForReparent(validTablets, tabletPositions, durability) 111 if err != nil { 112 return nil, err 113 } 114 115 return validTablets[0].Alias, nil 116 } 117 118 // findPositionForTablet processes the replication position for a single tablet and 119 // returns it. It is safe to call from multiple goroutines. 120 func findPositionForTablet(ctx context.Context, tablet *topodatapb.Tablet, logger logutil.Logger, tmc tmclient.TabletManagerClient, waitTimeout time.Duration) (mysql.Position, error) { 121 logger.Infof("getting replication position from %v", topoproto.TabletAliasString(tablet.Alias)) 122 123 ctx, cancel := context.WithTimeout(ctx, waitTimeout) 124 defer cancel() 125 126 status, err := tmc.ReplicationStatus(ctx, tablet) 127 if err != nil { 128 sqlErr, isSQLErr := mysql.NewSQLErrorFromError(err).(*mysql.SQLError) 129 if isSQLErr && sqlErr != nil && sqlErr.Number() == mysql.ERNotReplica { 130 logger.Warningf("no replication statue from %v, using empty gtid set", topoproto.TabletAliasString(tablet.Alias)) 131 return mysql.Position{}, nil 132 } 133 logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", topoproto.TabletAliasString(tablet.Alias), err) 134 return mysql.Position{}, err 135 } 136 137 // Use the relay log position if available, otherwise use the executed GTID set (binary log position). 138 positionString := status.Position 139 if status.RelayLogPosition != "" { 140 positionString = status.RelayLogPosition 141 } 142 pos, err := mysql.DecodePosition(positionString) 143 if err != nil { 144 logger.Warningf("cannot decode replica position %v for tablet %v, ignoring tablet: %v", positionString, topoproto.TabletAliasString(tablet.Alias), err) 145 return mysql.Position{}, err 146 } 147 148 return pos, nil 149 } 150 151 // FindCurrentPrimary returns the current primary tablet of a shard, if any. The 152 // current primary is whichever tablet of type PRIMARY (if any) has the most 153 // recent PrimaryTermStartTime, which is the same rule that vtgate uses to route 154 // primary traffic. 155 // 156 // The return value is nil if the current primary cannot be definitively 157 // determined. This can happen either if no tablet claims to be type PRIMARY, or 158 // if multiple tablets claim to be type PRIMARY and happen to have the same 159 // PrimaryTermStartTime timestamp (a tie). 160 // 161 // The tabletMap must be a complete map (not a partial result) for the shard. 162 func FindCurrentPrimary(tabletMap map[string]*topo.TabletInfo, logger logutil.Logger) *topo.TabletInfo { 163 var ( 164 currentPrimary *topo.TabletInfo 165 currentTermStartTime time.Time 166 ) 167 168 for _, tablet := range tabletMap { 169 if tablet.Type != topodatapb.TabletType_PRIMARY { 170 continue 171 } 172 173 if currentPrimary == nil { 174 currentPrimary = tablet 175 currentTermStartTime = tablet.GetPrimaryTermStartTime() 176 continue 177 } 178 179 otherPrimaryTermStartTime := tablet.GetPrimaryTermStartTime() 180 if otherPrimaryTermStartTime.After(currentTermStartTime) { 181 currentPrimary = tablet 182 currentTermStartTime = otherPrimaryTermStartTime 183 } else if otherPrimaryTermStartTime.Equal(currentTermStartTime) { 184 // A tie should not happen unless the upgrade order was violated 185 // (e.g. some vttablets have not been upgraded) or if we get really 186 // unlucky. 187 // 188 // Either way, we need to be safe and not assume we know who the 189 // true primary is. 190 logger.Warningf( 191 "Multiple primaries (%v and %v) are tied for PrimaryTermStartTime; can't determine the true primary.", 192 topoproto.TabletAliasString(currentPrimary.Alias), 193 topoproto.TabletAliasString(tablet.Alias), 194 ) 195 196 return nil 197 } 198 } 199 200 return currentPrimary 201 } 202 203 // ShardReplicationStatuses returns the ReplicationStatus for each tablet in a shard. 204 func ShardReplicationStatuses(ctx context.Context, ts *topo.Server, tmc tmclient.TabletManagerClient, keyspace, shard string) ([]*topo.TabletInfo, []*replicationdatapb.Status, error) { 205 tabletMap, err := ts.GetTabletMapForShard(ctx, keyspace, shard) 206 if err != nil { 207 return nil, nil, err 208 } 209 tablets := topotools.CopyMapValues(tabletMap, []*topo.TabletInfo{}).([]*topo.TabletInfo) 210 211 log.Infof("Gathering tablet replication status for: %v", tablets) 212 wg := sync.WaitGroup{} 213 rec := concurrency.AllErrorRecorder{} 214 result := make([]*replicationdatapb.Status, len(tablets)) 215 216 for i, ti := range tablets { 217 // Don't scan tablets that won't return something 218 // useful. Otherwise, you'll end up waiting for a timeout. 219 if ti.Type == topodatapb.TabletType_PRIMARY { 220 wg.Add(1) 221 go func(i int, ti *topo.TabletInfo) { 222 defer wg.Done() 223 pos, err := tmc.PrimaryPosition(ctx, ti.Tablet) 224 if err != nil { 225 rec.RecordError(fmt.Errorf("PrimaryPosition(%v) failed: %v", ti.AliasString(), err)) 226 return 227 } 228 result[i] = &replicationdatapb.Status{ 229 Position: pos, 230 } 231 }(i, ti) 232 } else if ti.IsReplicaType() { 233 wg.Add(1) 234 go func(i int, ti *topo.TabletInfo) { 235 defer wg.Done() 236 status, err := tmc.ReplicationStatus(ctx, ti.Tablet) 237 if err != nil { 238 rec.RecordError(fmt.Errorf("ReplicationStatus(%v) failed: %v", ti.AliasString(), err)) 239 return 240 } 241 result[i] = status 242 }(i, ti) 243 } 244 } 245 wg.Wait() 246 return tablets, result, rec.Error() 247 } 248 249 // getValidCandidatesAndPositionsAsList converts the valid candidates from a map to a list of tablets, making it easier to sort 250 func getValidCandidatesAndPositionsAsList(validCandidates map[string]mysql.Position, tabletMap map[string]*topo.TabletInfo) ([]*topodatapb.Tablet, []mysql.Position, error) { 251 var validTablets []*topodatapb.Tablet 252 var tabletPositions []mysql.Position 253 for tabletAlias, position := range validCandidates { 254 tablet, isFound := tabletMap[tabletAlias] 255 if !isFound { 256 return nil, nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "candidate %v not found in the tablet map; this an impossible situation", tabletAlias) 257 } 258 validTablets = append(validTablets, tablet.Tablet) 259 tabletPositions = append(tabletPositions, position) 260 } 261 return validTablets, tabletPositions, nil 262 } 263 264 // restrictValidCandidates is used to restrict some candidates from being considered eligible for becoming the intermediate source or the final promotion candidate 265 func restrictValidCandidates(validCandidates map[string]mysql.Position, tabletMap map[string]*topo.TabletInfo) (map[string]mysql.Position, error) { 266 restrictedValidCandidates := make(map[string]mysql.Position) 267 for candidate, position := range validCandidates { 268 candidateInfo, ok := tabletMap[candidate] 269 if !ok { 270 return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "candidate %v not found in the tablet map; this an impossible situation", candidate) 271 } 272 // We do not allow BACKUP, DRAINED or RESTORE type of tablets to be considered for being the replication source or the candidate for primary 273 if topoproto.IsTypeInList(candidateInfo.Type, []topodatapb.TabletType{topodatapb.TabletType_BACKUP, topodatapb.TabletType_RESTORE, topodatapb.TabletType_DRAINED}) { 274 continue 275 } 276 restrictedValidCandidates[candidate] = position 277 } 278 return restrictedValidCandidates, nil 279 } 280 281 func findCandidate( 282 intermediateSource *topodatapb.Tablet, 283 possibleCandidates []*topodatapb.Tablet, 284 ) *topodatapb.Tablet { 285 // check whether the one we have selected as the source belongs to the candidate list provided 286 for _, candidate := range possibleCandidates { 287 if topoproto.TabletAliasEqual(intermediateSource.Alias, candidate.Alias) { 288 return candidate 289 } 290 } 291 // return the first candidate from this list, if it isn't empty 292 if len(possibleCandidates) > 0 { 293 return possibleCandidates[0] 294 } 295 return nil 296 } 297 298 // getTabletsWithPromotionRules gets the tablets with the given promotion rule from the list of tablets 299 func getTabletsWithPromotionRules(durability Durabler, tablets []*topodatapb.Tablet, rule promotionrule.CandidatePromotionRule) (res []*topodatapb.Tablet) { 300 for _, candidate := range tablets { 301 promotionRule := PromotionRule(durability, candidate) 302 if promotionRule == rule { 303 res = append(res, candidate) 304 } 305 } 306 return res 307 } 308 309 // waitForCatchUp is used to wait for the given tablet until it has caught up to the source 310 func waitForCatchUp( 311 ctx context.Context, 312 tmc tmclient.TabletManagerClient, 313 logger logutil.Logger, 314 newPrimary *topodatapb.Tablet, 315 source *topodatapb.Tablet, 316 waitTime time.Duration, 317 ) error { 318 logger.Infof("waiting for %v to catch up to %v", newPrimary.Alias, source.Alias) 319 // Find the primary position of the previous primary 320 pos, err := tmc.PrimaryPosition(ctx, source) 321 if err != nil { 322 return err 323 } 324 325 // Wait until the new primary has caught upto that position 326 waitForPosCtx, cancelFunc := context.WithTimeout(ctx, waitTime) 327 defer cancelFunc() 328 err = tmc.WaitForPosition(waitForPosCtx, newPrimary, pos) 329 if err != nil { 330 return err 331 } 332 return nil 333 }