vitess.io/vitess@v0.16.2/go/vt/topotools/keyspace.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package topotools 18 19 import ( 20 "context" 21 "fmt" 22 "strings" 23 "sync" 24 "time" 25 26 "vitess.io/vitess/go/vt/logutil" 27 "vitess.io/vitess/go/vt/topo" 28 "vitess.io/vitess/go/vt/vttablet/tmclient" 29 30 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 31 ) 32 33 // RefreshTabletsByShard calls RefreshState on all the tablets in a given shard. 34 // 35 // It only returns errors from looking up the tablet map from the topology; 36 // errors returned from any RefreshState RPCs are logged and then ignored. Also, 37 // any tablets without a .Hostname set in the topology are skipped. 38 // 39 // However, on partial errors from the topology, or errors from a RefreshState 40 // RPC will cause a boolean flag to be returned indicating only partial success 41 // along with a string detailing why we had a partial refresh. 42 func RefreshTabletsByShard(ctx context.Context, ts *topo.Server, tmc tmclient.TabletManagerClient, si *topo.ShardInfo, cells []string, logger logutil.Logger) (isPartialRefresh bool, partialRefreshDetails string, err error) { 43 logger.Infof("RefreshTabletsByShard called on shard %v/%v", si.Keyspace(), si.ShardName()) 44 // Causes and details if we have a partial refresh 45 prd := strings.Builder{} 46 47 tabletMap, err := ts.GetTabletMapForShardByCell(ctx, si.Keyspace(), si.ShardName(), cells) 48 switch { 49 case err == nil: 50 // keep going 51 case topo.IsErrType(err, topo.PartialResult): 52 logger.Warningf("RefreshTabletsByShard: got partial result for shard %v/%v, may not refresh all tablets everywhere", si.Keyspace(), si.ShardName()) 53 prd.WriteString(fmt.Sprintf("got partial results from topo server for shard %v/%v: %v", si.Keyspace(), si.ShardName(), err)) 54 isPartialRefresh = true 55 default: 56 return false, "", err 57 } 58 59 // Any errors from this point onward are ignored. 60 var ( 61 m sync.Mutex 62 wg sync.WaitGroup 63 refreshTimeout = 60 * time.Second 64 ) 65 66 // If there's a timeout set on the context, use what's left of it instead of the 60s default. 67 if deadline, ok := ctx.Deadline(); ok { 68 timeLeft := time.Until(deadline) 69 if timeLeft > 0 { 70 refreshTimeout = time.Until(deadline) 71 } 72 } 73 74 for _, ti := range tabletMap { 75 if ti.Hostname == "" { 76 // The tablet is not running, we don't have the host 77 // name to connect to, so we just skip this tablet. 78 logger.Infof("Tablet %v has no hostname, skipping its RefreshState", ti.AliasString()) 79 continue 80 } 81 82 wg.Add(1) 83 go func(ti *topo.TabletInfo) { 84 defer wg.Done() 85 grctx, grcancel := context.WithTimeout(ctx, refreshTimeout) 86 defer grcancel() 87 logger.Infof("Calling RefreshState on tablet %v with a timeout of %v", ti.AliasString(), refreshTimeout) 88 89 if err := tmc.RefreshState(grctx, ti.Tablet); err != nil { 90 logger.Warningf("RefreshTabletsByShard: failed to refresh %v: %v", ti.AliasString(), err) 91 m.Lock() 92 prd.WriteString(fmt.Sprintf("failed to refresh tablet %v: %v", ti.AliasString(), err)) 93 isPartialRefresh = true 94 m.Unlock() 95 } 96 }(ti) 97 } 98 wg.Wait() 99 100 return isPartialRefresh, prd.String(), err 101 } 102 103 // UpdateShardRecords updates the shard records based on 'from' or 'to' 104 // direction. 105 func UpdateShardRecords( 106 ctx context.Context, 107 ts *topo.Server, 108 tmc tmclient.TabletManagerClient, 109 keyspace string, 110 shards []*topo.ShardInfo, 111 cells []string, 112 servedType topodatapb.TabletType, 113 isFrom bool, 114 clearSourceShards bool, 115 logger logutil.Logger, 116 ) error { 117 disableQueryService := isFrom 118 if err := ts.UpdateDisableQueryService(ctx, keyspace, shards, servedType, cells, disableQueryService); err != nil { 119 return err 120 } 121 122 for i, si := range shards { 123 updatedShard, err := ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(si *topo.ShardInfo) error { 124 if clearSourceShards { 125 si.SourceShards = nil 126 } 127 128 return nil 129 }) 130 131 if err != nil { 132 return err 133 } 134 135 shards[i] = updatedShard 136 137 // For 'to' shards, refresh to make them serve. The 'from' shards will 138 // be refreshed after traffic has migrated. 139 if !isFrom { 140 if _, _, err := RefreshTabletsByShard(ctx, ts, tmc, si, cells, logger); err != nil { 141 logger.Warningf("RefreshTabletsByShard(%v/%v, cells=%v) failed with %v; continuing ...", si.Keyspace(), si.ShardName(), cells, err) 142 } 143 } 144 } 145 146 return nil 147 } 148 149 // KeyspaceEquality returns true iff two KeyspaceInformations are identical for testing purposes 150 func KeyspaceEquality(left, right *topodatapb.Keyspace) bool { 151 if left.KeyspaceType != right.KeyspaceType { 152 return false 153 } 154 if len(left.ServedFroms) != len(right.ServedFroms) { 155 return false 156 } 157 for i := range left.ServedFroms { 158 if left.ServedFroms[i] != right.ServedFroms[i] { 159 return false 160 } 161 } 162 if left.KeyspaceType != right.KeyspaceType { 163 return false 164 } 165 if left.BaseKeyspace != right.BaseKeyspace { 166 return false 167 } 168 169 if left.SnapshotTime != right.SnapshotTime { 170 return false 171 } 172 173 return left.DurabilityPolicy == right.DurabilityPolicy 174 }