vitess.io/vitess@v0.16.2/go/vt/wrangler/split.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package wrangler 18 19 import ( 20 "fmt" 21 "io" 22 "time" 23 24 "context" 25 26 "vitess.io/vitess/go/vt/grpcclient" 27 "vitess.io/vitess/go/vt/topo" 28 "vitess.io/vitess/go/vt/topo/topoproto" 29 "vitess.io/vitess/go/vt/vttablet/tabletconn" 30 31 querypb "vitess.io/vitess/go/vt/proto/query" 32 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 33 ) 34 35 const ( 36 // DefaultWaitForFilteredReplicationMaxDelay is the default maximum delay value used in WaitForFilteredReplication. 37 DefaultWaitForFilteredReplicationMaxDelay = 30 * time.Second 38 ) 39 40 // SetSourceShards is a utility function to override the SourceShards fields 41 // on a Shard. 42 func (wr *Wrangler) SetSourceShards(ctx context.Context, keyspace, shard string, sources []*topodatapb.TabletAlias, tables []string) error { 43 // Read the source tablets. 44 sourceTablets, err := wr.ts.GetTabletMap(ctx, sources) 45 if err != nil { 46 return err 47 } 48 49 // Insert their KeyRange in the SourceShards array. 50 // We use a linear 0-based id, that matches what worker/split_clone.go 51 // inserts into _vt.vreplication. 52 // We want to guarantee sourceShards[i] is using sources[i], 53 // So iterating over the sourceTablets map would be a bad idea. 54 sourceShards := make([]*topodatapb.Shard_SourceShard, len(sourceTablets)) 55 for i, alias := range sources { 56 ti := sourceTablets[topoproto.TabletAliasString(alias)] 57 sourceShards[i] = &topodatapb.Shard_SourceShard{ 58 Uid: uint32(i), 59 Keyspace: ti.Keyspace, 60 Shard: ti.Shard, 61 KeyRange: ti.KeyRange, 62 Tables: tables, 63 } 64 } 65 66 // Update the shard with the new source shards. 67 _, err = wr.ts.UpdateShardFields(ctx, keyspace, shard, func(si *topo.ShardInfo) error { 68 // If the shard already has sources, maybe it's already been restored, 69 // so let's be safe and abort right here. 70 if len(si.SourceShards) > 0 { 71 return fmt.Errorf("shard %v/%v already has SourceShards, not overwriting them (full record: %v)", keyspace, shard, si.Shard) 72 } 73 74 si.SourceShards = sourceShards 75 return nil 76 }) 77 return err 78 } 79 80 // WaitForFilteredReplication will wait until the Filtered Replication process has finished. 81 func (wr *Wrangler) WaitForFilteredReplication(ctx context.Context, keyspace, shard string, maxDelay time.Duration) error { 82 shardInfo, err := wr.TopoServer().GetShard(ctx, keyspace, shard) 83 if err != nil { 84 return err 85 } 86 if len(shardInfo.SourceShards) == 0 { 87 return fmt.Errorf("shard %v/%v has no source shard", keyspace, shard) 88 } 89 if !shardInfo.HasPrimary() { 90 return fmt.Errorf("shard %v/%v has no primary", keyspace, shard) 91 } 92 alias := shardInfo.PrimaryAlias 93 tabletInfo, err := wr.TopoServer().GetTablet(ctx, alias) 94 if err != nil { 95 return err 96 } 97 98 // Always run an explicit healthcheck first to make sure we don't see any outdated values. 99 // This is especially true for tests and automation where there is no pause of multiple seconds 100 // between commands and the periodic healthcheck did not run again yet. 101 if err := wr.TabletManagerClient().RunHealthCheck(ctx, tabletInfo.Tablet); err != nil { 102 return fmt.Errorf("failed to run explicit healthcheck on tablet: %v err: %v", tabletInfo, err) 103 } 104 105 conn, err := tabletconn.GetDialer()(tabletInfo.Tablet, grpcclient.FailFast(false)) 106 if err != nil { 107 return fmt.Errorf("cannot connect to tablet %v: %v", alias, err) 108 } 109 110 var lastSeenDelay time.Duration 111 err = conn.StreamHealth(ctx, func(shr *querypb.StreamHealthResponse) error { 112 stats := shr.RealtimeStats 113 if stats == nil { 114 return fmt.Errorf("health record does not include RealtimeStats message. tablet: %v health record: %v", alias, shr) 115 } 116 if stats.HealthError != "" { 117 return fmt.Errorf("tablet is not healthy. tablet: %v health record: %v", alias, shr) 118 } 119 if stats.BinlogPlayersCount == 0 { 120 return fmt.Errorf("no filtered replication running on tablet: %v health record: %v", alias, shr) 121 } 122 123 delaySecs := stats.FilteredReplicationLagSeconds 124 lastSeenDelay = time.Duration(delaySecs) * time.Second 125 if lastSeenDelay < 0 { 126 return fmt.Errorf("last seen delay should never be negative. tablet: %v delay: %v", alias, lastSeenDelay) 127 } 128 if lastSeenDelay <= maxDelay { 129 wr.Logger().Printf("Filtered replication on tablet: %v has caught up. Last seen delay: %.1f seconds\n", alias, lastSeenDelay.Seconds()) 130 return io.EOF 131 } 132 wr.Logger().Printf("Waiting for filtered replication to catch up on tablet: %v Last seen delay: %.1f seconds\n", alias, lastSeenDelay.Seconds()) 133 return nil 134 }) 135 if err != nil { 136 return fmt.Errorf("could not stream health records from tablet: %v err: %v", alias, err) 137 } 138 139 select { 140 case <-ctx.Done(): 141 return fmt.Errorf("context was done before filtered replication did catch up. Last seen delay: %v context Error: %v", lastSeenDelay, ctx.Err()) 142 default: 143 } 144 return nil 145 }