vitess.io/vitess@v0.16.2/go/vt/vtctl/reparentutil/replication.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package reparentutil 18 19 import ( 20 "context" 21 "sync" 22 "time" 23 24 "k8s.io/apimachinery/pkg/util/sets" 25 26 "vitess.io/vitess/go/event" 27 "vitess.io/vitess/go/mysql" 28 "vitess.io/vitess/go/vt/concurrency" 29 "vitess.io/vitess/go/vt/log" 30 "vitess.io/vitess/go/vt/logutil" 31 replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" 32 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 33 "vitess.io/vitess/go/vt/proto/vtrpc" 34 "vitess.io/vitess/go/vt/topo" 35 "vitess.io/vitess/go/vt/topo/topoproto" 36 "vitess.io/vitess/go/vt/topotools" 37 "vitess.io/vitess/go/vt/topotools/events" 38 "vitess.io/vitess/go/vt/vterrors" 39 "vitess.io/vitess/go/vt/vttablet/tmclient" 40 ) 41 42 // FindValidEmergencyReparentCandidates will find candidates for an emergency 43 // reparent, and, if successful, return a mapping of those tablet aliases (as 44 // raw strings) to their replication positions for later comparison. 45 func FindValidEmergencyReparentCandidates( 46 statusMap map[string]*replicationdatapb.StopReplicationStatus, 47 primaryStatusMap map[string]*replicationdatapb.PrimaryStatus, 48 ) (map[string]mysql.Position, error) { 49 replicationStatusMap := make(map[string]*mysql.ReplicationStatus, len(statusMap)) 50 positionMap := make(map[string]mysql.Position) 51 52 // Build out replication status list from proto types. 53 for alias, statuspb := range statusMap { 54 status := mysql.ProtoToReplicationStatus(statuspb.After) 55 replicationStatusMap[alias] = &status 56 } 57 58 // Determine if we're GTID-based. If we are, we'll need to look for errant 59 // GTIDs below. 60 var ( 61 isGTIDBased bool 62 isNonGTIDBased bool 63 emptyRelayPosErrorRecorder concurrency.FirstErrorRecorder 64 ) 65 66 for alias, status := range replicationStatusMap { 67 if _, ok := status.RelayLogPosition.GTIDSet.(mysql.Mysql56GTIDSet); ok { 68 isGTIDBased = true 69 } else { 70 isNonGTIDBased = true 71 } 72 73 if status.RelayLogPosition.IsZero() { 74 // Potentially bail. If any other tablet is detected to have 75 // GTID-based relay log positions, we will return the error recorded 76 // here. 77 emptyRelayPosErrorRecorder.RecordError(vterrors.Errorf(vtrpc.Code_UNAVAILABLE, "encountered tablet %v with no relay log position, when at least one other tablet in the status map has GTID based relay log positions", alias)) 78 } 79 } 80 81 if isGTIDBased && emptyRelayPosErrorRecorder.HasErrors() { 82 return nil, emptyRelayPosErrorRecorder.Error() 83 } 84 85 if isGTIDBased && isNonGTIDBased { 86 return nil, vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "encountered mix of GTID-based and non GTID-based relay logs") 87 } 88 89 // Create relevant position list of errant GTID-based positions for later 90 // comparison. 91 for alias, status := range replicationStatusMap { 92 // If we're not GTID-based, no need to search for errant GTIDs, so just 93 // add the position to the map and continue. 94 if !isGTIDBased { 95 positionMap[alias] = status.Position 96 97 continue 98 } 99 100 // This condition should really never happen, since we did the same cast 101 // in the earlier loop, but let's be doubly sure. 102 relayLogGTIDSet, ok := status.RelayLogPosition.GTIDSet.(mysql.Mysql56GTIDSet) 103 if !ok { 104 return nil, vterrors.Errorf(vtrpc.Code_FAILED_PRECONDITION, "we got a filled-in relay log position, but it's not of type Mysql56GTIDSet, even though we've determined we need to use GTID based assesment") 105 } 106 107 // We need to remove this alias's status from the list, otherwise the 108 // GTID diff will always be empty. 109 statusList := make([]*mysql.ReplicationStatus, 0, len(replicationStatusMap)-1) 110 111 for a, s := range replicationStatusMap { 112 if a != alias { 113 statusList = append(statusList, s) 114 } 115 } 116 117 errantGTIDs, err := status.FindErrantGTIDs(statusList) 118 switch { 119 case err != nil: 120 // Could not look up GTIDs to determine if we have any. It's not 121 // safe to continue. 122 return nil, err 123 case len(errantGTIDs) != 0: 124 // This tablet has errant GTIDs. It's not a valid candidate for 125 // reparent, so don't insert it into the final mapping. 126 log.Errorf("skipping %v because we detected errant GTIDs - %v", alias, errantGTIDs) 127 continue 128 } 129 130 pos := mysql.Position{GTIDSet: relayLogGTIDSet} 131 positionMap[alias] = pos 132 } 133 134 for alias, primaryStatus := range primaryStatusMap { 135 executedPosition, err := mysql.DecodePosition(primaryStatus.Position) 136 if err != nil { 137 return nil, vterrors.Wrapf(err, "could not decode a primary status executed position for tablet %v: %v", alias, err) 138 } 139 140 positionMap[alias] = executedPosition 141 } 142 143 return positionMap, nil 144 } 145 146 // ReplicaWasRunning returns true if a StopReplicationStatus indicates that the 147 // replica had running replication threads before being stopped. It returns an 148 // error if the Before state of replication is nil. 149 func ReplicaWasRunning(stopStatus *replicationdatapb.StopReplicationStatus) (bool, error) { 150 if stopStatus == nil || stopStatus.Before == nil { 151 return false, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "could not determine Before state of StopReplicationStatus %v", stopStatus) 152 } 153 154 replStatus := mysql.ProtoToReplicationStatus(stopStatus.Before) 155 return (replStatus.IOState == mysql.ReplicationStateRunning) || 156 (replStatus.SQLState == mysql.ReplicationStateRunning), nil 157 } 158 159 // SQLThreadWasRunning returns true if a StopReplicationStatus indicates that the 160 // replica had a running sql thread. It returns an 161 // error if the Before state of replication is nil. 162 func SQLThreadWasRunning(stopStatus *replicationdatapb.StopReplicationStatus) (bool, error) { 163 if stopStatus == nil || stopStatus.Before == nil { 164 return false, vterrors.Errorf(vtrpc.Code_INVALID_ARGUMENT, "could not determine Before state of StopReplicationStatus %v", stopStatus) 165 } 166 167 replStatus := mysql.ProtoToReplicationStatus(stopStatus.Before) 168 return replStatus.SQLState == mysql.ReplicationStateRunning, nil 169 } 170 171 // SetReplicationSource is used to set the replication source on the specified 172 // tablet to the current shard primary (if available). It also figures out if 173 // the tablet should be sending semi-sync ACKs or not and passes that to the 174 // tabletmanager RPC. 175 // 176 // It does not start the replication forcefully. 177 // If we are unable to find the shard primary of the tablet from the topo server 178 // we exit out without any error. 179 func SetReplicationSource(ctx context.Context, ts *topo.Server, tmc tmclient.TabletManagerClient, tablet *topodatapb.Tablet) error { 180 shardPrimary, err := topotools.GetShardPrimaryForTablet(ctx, ts, tablet) 181 if err != nil { 182 // If we didn't find the shard primary, we return without any error 183 return nil 184 } 185 186 durabilityName, err := ts.GetKeyspaceDurability(ctx, tablet.Keyspace) 187 if err != nil { 188 return err 189 } 190 log.Infof("Getting a new durability policy for %v", durabilityName) 191 durability, err := GetDurabilityPolicy(durabilityName) 192 if err != nil { 193 return err 194 } 195 196 isSemiSync := IsReplicaSemiSync(durability, shardPrimary.Tablet, tablet) 197 return tmc.SetReplicationSource(ctx, tablet, shardPrimary.Alias, 0, "", false, isSemiSync) 198 } 199 200 // replicationSnapshot stores the status maps and the tablets that were reachable 201 // when trying to stopReplicationAndBuildStatusMaps. 202 type replicationSnapshot struct { 203 statusMap map[string]*replicationdatapb.StopReplicationStatus 204 primaryStatusMap map[string]*replicationdatapb.PrimaryStatus 205 reachableTablets []*topodatapb.Tablet 206 } 207 208 // stopReplicationAndBuildStatusMaps stops replication on all replicas, then 209 // collects and returns a mapping of TabletAlias (as string) to their current 210 // replication positions. 211 // Apart from the status maps, it also returns the tablets reached as a list 212 func stopReplicationAndBuildStatusMaps( 213 ctx context.Context, 214 tmc tmclient.TabletManagerClient, 215 ev *events.Reparent, 216 tabletMap map[string]*topo.TabletInfo, 217 stopReplicationTimeout time.Duration, 218 ignoredTablets sets.Set[string], 219 tabletToWaitFor *topodatapb.TabletAlias, 220 durability Durabler, 221 logger logutil.Logger, 222 ) (*replicationSnapshot, error) { 223 event.DispatchUpdate(ev, "stop replication on all replicas") 224 225 var ( 226 m sync.Mutex 227 errChan = make(chan concurrency.Error) 228 allTablets []*topodatapb.Tablet 229 res = &replicationSnapshot{ 230 statusMap: map[string]*replicationdatapb.StopReplicationStatus{}, 231 primaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{}, 232 reachableTablets: []*topodatapb.Tablet{}, 233 } 234 ) 235 236 groupCtx, groupCancel := context.WithTimeout(ctx, stopReplicationTimeout) 237 defer groupCancel() 238 239 fillStatus := func(alias string, tabletInfo *topo.TabletInfo, mustWaitForTablet bool) { 240 var concurrencyErr concurrency.Error 241 var err error 242 defer func() { 243 concurrencyErr.Err = err 244 concurrencyErr.MustWaitFor = mustWaitForTablet 245 errChan <- concurrencyErr 246 }() 247 248 logger.Infof("getting replication position from %v", alias) 249 250 stopReplicationStatus, err := tmc.StopReplicationAndGetStatus(groupCtx, tabletInfo.Tablet, replicationdatapb.StopReplicationMode_IOTHREADONLY) 251 if err != nil { 252 sqlErr, isSQLErr := mysql.NewSQLErrorFromError(err).(*mysql.SQLError) 253 if isSQLErr && sqlErr != nil && sqlErr.Number() == mysql.ERNotReplica { 254 var primaryStatus *replicationdatapb.PrimaryStatus 255 256 primaryStatus, err = tmc.DemotePrimary(groupCtx, tabletInfo.Tablet) 257 if err != nil { 258 msg := "replica %v thinks it's primary but we failed to demote it: %v" 259 err = vterrors.Wrapf(err, msg, alias, err) 260 261 logger.Warningf(msg, alias, err) 262 return 263 } 264 265 m.Lock() 266 res.primaryStatusMap[alias] = primaryStatus 267 res.reachableTablets = append(res.reachableTablets, tabletInfo.Tablet) 268 m.Unlock() 269 } else { 270 logger.Warningf("failed to get replication status from %v: %v", alias, err) 271 err = vterrors.Wrapf(err, "error when getting replication status for alias %v: %v", alias, err) 272 } 273 } else { 274 var sqlThreadRunning bool 275 // Check if the sql thread was running for the tablet 276 sqlThreadRunning, err = SQLThreadWasRunning(stopReplicationStatus) 277 if err == nil { 278 // If the sql thread was running, then we will add the tablet to the status map and the list of 279 // reachable tablets. 280 if sqlThreadRunning { 281 m.Lock() 282 res.statusMap[alias] = stopReplicationStatus 283 res.reachableTablets = append(res.reachableTablets, tabletInfo.Tablet) 284 m.Unlock() 285 } else { 286 // If the sql thread was stopped, we do not consider the tablet as reachable 287 // The user must either explicitly ignore this tablet or start its replication 288 logger.Warningf("sql thread stopped on tablet - %v", alias) 289 err = vterrors.New(vtrpc.Code_FAILED_PRECONDITION, "sql thread stopped on tablet - "+alias) 290 } 291 } 292 } 293 } 294 295 tabletAliasToWaitFor := "" 296 numErrorsToWaitFor := 0 297 if tabletToWaitFor != nil { 298 tabletAliasToWaitFor = topoproto.TabletAliasString(tabletToWaitFor) 299 } 300 for alias, tabletInfo := range tabletMap { 301 allTablets = append(allTablets, tabletInfo.Tablet) 302 if !ignoredTablets.Has(alias) { 303 mustWaitFor := tabletAliasToWaitFor == alias 304 if mustWaitFor { 305 numErrorsToWaitFor++ 306 } 307 go fillStatus(alias, tabletInfo, mustWaitFor) 308 } 309 } 310 311 errgroup := concurrency.ErrorGroup{ 312 NumGoroutines: len(tabletMap) - ignoredTablets.Len(), 313 NumRequiredSuccesses: len(tabletMap) - ignoredTablets.Len() - 1, 314 NumAllowedErrors: len(tabletMap), // We set the number of allowed errors to a very high value, because we don't want to exit early 315 // even in case of multiple failures. We rely on the revoke function below to determine if we have more failures than we can tolerate 316 NumErrorsToWaitFor: numErrorsToWaitFor, 317 } 318 319 errRecorder := errgroup.Wait(groupCancel, errChan) 320 if len(errRecorder.Errors) <= 1 { 321 return res, nil 322 } 323 // check that the tablets we were able to reach are sufficient for us to guarantee that no new write will be accepted by any tablet 324 revokeSuccessful := haveRevoked(durability, res.reachableTablets, allTablets) 325 if !revokeSuccessful { 326 return nil, vterrors.Wrapf(errRecorder.Error(), "could not reach sufficient tablets to guarantee safety: %v", errRecorder.Error()) 327 } 328 329 return res, nil 330 } 331 332 // WaitForRelayLogsToApply blocks execution waiting for the given tablet's relay 333 // logs to apply, unless the specified context is canceled or exceeded. 334 // Typically a caller will set a timeout of WaitReplicasTimeout on a context and 335 // use that context with this function. 336 func WaitForRelayLogsToApply(ctx context.Context, tmc tmclient.TabletManagerClient, tabletInfo *topo.TabletInfo, status *replicationdatapb.StopReplicationStatus) error { 337 switch status.After.RelayLogPosition { 338 case "": 339 return tmc.WaitForPosition(ctx, tabletInfo.Tablet, status.After.RelayLogSourceBinlogEquivalentPosition) 340 default: 341 return tmc.WaitForPosition(ctx, tabletInfo.Tablet, status.After.RelayLogPosition) 342 } 343 }