vitess.io/vitess@v0.16.2/go/vt/wrangler/shard.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package wrangler 18 19 import ( 20 "context" 21 "fmt" 22 23 "vitess.io/vitess/go/vt/topo" 24 25 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 26 vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata" 27 ) 28 29 // shard related methods for Wrangler 30 31 // UpdateSrvKeyspacePartitions changes the SrvKeyspaceGraph 32 // for a shard. It updates serving graph 33 // 34 // This takes the keyspace lock as to not interfere with resharding operations. 35 func (wr *Wrangler) UpdateSrvKeyspacePartitions(ctx context.Context, keyspace, shard string, tabletType topodatapb.TabletType, cells []string, remove bool) (err error) { 36 // lock the keyspace 37 ctx, unlock, lockErr := wr.ts.LockKeyspace(ctx, keyspace, "UpdateSrvKeyspacePartitions") 38 if lockErr != nil { 39 return lockErr 40 } 41 defer unlock(&err) 42 43 si, err := wr.ts.GetShard(ctx, keyspace, shard) 44 if err != nil { 45 return err 46 } 47 48 if remove { 49 return wr.ts.DeleteSrvKeyspacePartitions(ctx, keyspace, []*topo.ShardInfo{si}, tabletType, cells) 50 } 51 return wr.ts.AddSrvKeyspacePartitions(ctx, keyspace, []*topo.ShardInfo{si}, tabletType, cells) 52 } 53 54 // DeleteShard will do all the necessary changes in the topology server 55 // to entirely remove a shard. 56 func (wr *Wrangler) DeleteShard(ctx context.Context, keyspace, shard string, recursive, evenIfServing bool) error { 57 // Read the Shard object. If it's not there, try to clean up 58 // the topology anyway. 59 shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) 60 if err != nil { 61 if topo.IsErrType(err, topo.NoNode) { 62 wr.Logger().Infof("Shard %v/%v doesn't seem to exist, cleaning up any potential leftover", keyspace, shard) 63 return wr.ts.DeleteShard(ctx, keyspace, shard) 64 } 65 return err 66 } 67 68 servingCells, err := wr.ts.GetShardServingCells(ctx, shardInfo) 69 if err != nil { 70 return err 71 } 72 // Check the Serving map for the shard, we don't want to 73 // remove a serving shard if not absolutely sure. 74 if !evenIfServing && len(servingCells) > 0 { 75 return fmt.Errorf("shard %v/%v is still serving, cannot delete it, use even_if_serving flag if needed", keyspace, shard) 76 } 77 78 cells, err := wr.ts.GetCellInfoNames(ctx) 79 if err != nil { 80 return err 81 } 82 83 // Go through all the cells. 84 for _, cell := range cells { 85 var aliases []*topodatapb.TabletAlias 86 87 // Get the ShardReplication object for that cell. Try 88 // to find all tablets that may belong to our shard. 89 sri, err := wr.ts.GetShardReplication(ctx, cell, keyspace, shard) 90 switch { 91 case topo.IsErrType(err, topo.NoNode): 92 // No ShardReplication object. It means the 93 // topo is inconsistent. Let's read all the 94 // tablets for that cell, and if we find any 95 // in our keyspace / shard, either abort or 96 // try to delete them. 97 aliases, err = wr.ts.GetTabletAliasesByCell(ctx, cell) 98 if err != nil { 99 return fmt.Errorf("GetTabletsByCell(%v) failed: %v", cell, err) 100 } 101 case err == nil: 102 // We found a ShardReplication object. We 103 // trust it to have all tablet records. 104 aliases = make([]*topodatapb.TabletAlias, len(sri.Nodes)) 105 for i, n := range sri.Nodes { 106 aliases[i] = n.TabletAlias 107 } 108 default: 109 return fmt.Errorf("GetShardReplication(%v, %v, %v) failed: %v", cell, keyspace, shard, err) 110 } 111 112 // Get the corresponding Tablet records. Note 113 // GetTabletMap ignores ErrNoNode, and it's good for 114 // our purpose, it means a tablet was deleted but is 115 // still referenced. 116 tabletMap, err := wr.ts.GetTabletMap(ctx, aliases) 117 if err != nil { 118 return fmt.Errorf("GetTabletMap() failed: %v", err) 119 } 120 121 // Remove the tablets that don't belong to our 122 // keyspace/shard from the map. 123 for a, ti := range tabletMap { 124 if ti.Keyspace != keyspace || ti.Shard != shard { 125 delete(tabletMap, a) 126 } 127 } 128 129 // Now see if we need to DeleteTablet, and if we can, do it. 130 if len(tabletMap) > 0 { 131 if !recursive { 132 return fmt.Errorf("shard %v/%v still has %v tablets in cell %v; use -recursive or remove them manually", keyspace, shard, len(tabletMap), cell) 133 } 134 135 wr.Logger().Infof("Deleting all tablets in shard %v/%v cell %v", keyspace, shard, cell) 136 for tabletAlias, tabletInfo := range tabletMap { 137 // We don't care about scrapping or updating the replication graph, 138 // because we're about to delete the entire replication graph. 139 wr.Logger().Infof("Deleting tablet %v", tabletAlias) 140 if err := wr.TopoServer().DeleteTablet(ctx, tabletInfo.Alias); err != nil && !topo.IsErrType(err, topo.NoNode) { 141 // We don't want to continue if a DeleteTablet fails for 142 // any good reason (other than missing tablet, in which 143 // case it's just a topology server inconsistency we can 144 // ignore). If we continue and delete the replication 145 // graph, the tablet record will be orphaned, since 146 // we'll no longer know it belongs to this shard. 147 // 148 // If the problem is temporary, or resolved externally, re-running 149 // DeleteShard will skip over tablets that were already deleted. 150 return fmt.Errorf("can't delete tablet %v: %v", tabletAlias, err) 151 } 152 } 153 } 154 } 155 156 // Try to remove the replication graph and serving graph in each cell, 157 // regardless of its existence. 158 for _, cell := range cells { 159 if err := wr.ts.DeleteShardReplication(ctx, cell, keyspace, shard); err != nil && !topo.IsErrType(err, topo.NoNode) { 160 wr.Logger().Warningf("Cannot delete ShardReplication in cell %v for %v/%v: %v", cell, keyspace, shard, err) 161 } 162 } 163 164 return wr.ts.DeleteShard(ctx, keyspace, shard) 165 } 166 167 // SourceShardDelete will delete a SourceShard inside a shard, by index. 168 // 169 // This takes the keyspace lock as not to interfere with resharding operations. 170 func (wr *Wrangler) SourceShardDelete(ctx context.Context, keyspace, shard string, uid uint32) (err error) { 171 resp, err := wr.VtctldServer().SourceShardDelete(ctx, &vtctldatapb.SourceShardDeleteRequest{ 172 Keyspace: keyspace, 173 Shard: shard, 174 Uid: uid, 175 }) 176 if err != nil { 177 return err 178 } 179 180 if resp.Shard == nil { 181 return fmt.Errorf("no SourceShard with uid %v", uid) 182 } 183 184 return nil 185 } 186 187 // SourceShardAdd will add a new SourceShard inside a shard. 188 func (wr *Wrangler) SourceShardAdd(ctx context.Context, keyspace, shard string, uid uint32, skeyspace, sshard string, keyRange *topodatapb.KeyRange, tables []string) (err error) { 189 resp, err := wr.VtctldServer().SourceShardAdd(ctx, &vtctldatapb.SourceShardAddRequest{ 190 Keyspace: keyspace, 191 Shard: shard, 192 Uid: uid, 193 SourceKeyspace: skeyspace, 194 SourceShard: sshard, 195 KeyRange: keyRange, 196 Tables: tables, 197 }) 198 if err != nil { 199 return err 200 } 201 202 if resp.Shard == nil { 203 return fmt.Errorf("uid %v is already in use", uid) 204 } 205 206 return nil 207 }