vitess.io/vitess@v0.16.2/go/vt/topo/replication.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package topo 18 19 import ( 20 "context" 21 "path" 22 23 "google.golang.org/protobuf/proto" 24 25 "vitess.io/vitess/go/trace" 26 "vitess.io/vitess/go/vt/log" 27 "vitess.io/vitess/go/vt/logutil" 28 "vitess.io/vitess/go/vt/topo/topoproto" 29 "vitess.io/vitess/go/vt/vterrors" 30 31 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 32 ) 33 34 // ShardReplicationInfo is the companion structure for ShardReplication. 35 type ShardReplicationInfo struct { 36 *topodatapb.ShardReplication 37 cell string 38 keyspace string 39 shard string 40 } 41 42 // NewShardReplicationInfo is for topo.Server implementations to 43 // create the structure 44 func NewShardReplicationInfo(sr *topodatapb.ShardReplication, cell, keyspace, shard string) *ShardReplicationInfo { 45 return &ShardReplicationInfo{ 46 ShardReplication: sr, 47 cell: cell, 48 keyspace: keyspace, 49 shard: shard, 50 } 51 } 52 53 // Cell returns the cell for a ShardReplicationInfo 54 func (sri *ShardReplicationInfo) Cell() string { 55 return sri.cell 56 } 57 58 // Keyspace returns the keyspace for a ShardReplicationInfo 59 func (sri *ShardReplicationInfo) Keyspace() string { 60 return sri.keyspace 61 } 62 63 // Shard returns the shard for a ShardReplicationInfo 64 func (sri *ShardReplicationInfo) Shard() string { 65 return sri.shard 66 } 67 68 // GetShardReplicationNode finds a node for a given tablet. 69 func (sri *ShardReplicationInfo) GetShardReplicationNode(tabletAlias *topodatapb.TabletAlias) (*topodatapb.ShardReplication_Node, error) { 70 for _, rl := range sri.Nodes { 71 if proto.Equal(rl.TabletAlias, tabletAlias) { 72 return rl, nil 73 } 74 } 75 return nil, NewError(NoNode, tabletAlias.String()) 76 } 77 78 // UpdateShardReplicationRecord is a low level function to add / update an 79 // entry to the ShardReplication object. 80 func UpdateShardReplicationRecord(ctx context.Context, ts *Server, keyspace, shard string, tabletAlias *topodatapb.TabletAlias) error { 81 span, ctx := trace.NewSpan(ctx, "TopoServer.UpdateShardReplicationFields") 82 span.Annotate("keyspace", keyspace) 83 span.Annotate("shard", shard) 84 span.Annotate("tablet", topoproto.TabletAliasString(tabletAlias)) 85 defer span.Finish() 86 87 return ts.UpdateShardReplicationFields(ctx, tabletAlias.Cell, keyspace, shard, func(sr *topodatapb.ShardReplication) error { 88 // Not very efficient, but easy to read, and allows us 89 // to remove duplicate entries if any. 90 nodes := make([]*topodatapb.ShardReplication_Node, 0, len((*sr).Nodes)+1) 91 found := false 92 modified := false 93 for _, node := range (*sr).Nodes { 94 if proto.Equal(node.TabletAlias, tabletAlias) { 95 if found { 96 log.Warningf("Found a second ShardReplication_Node for tablet %v, deleting it", tabletAlias) 97 modified = true 98 continue 99 } 100 found = true 101 } 102 nodes = append(nodes, node) 103 } 104 if !found { 105 nodes = append(nodes, &topodatapb.ShardReplication_Node{TabletAlias: tabletAlias}) 106 modified = true 107 } 108 if !modified { 109 return NewError(NoUpdateNeeded, tabletAlias.String()) 110 } 111 (*sr).Nodes = nodes 112 return nil 113 }) 114 } 115 116 // RemoveShardReplicationRecord is a low level function to remove an 117 // entry from the ShardReplication object. 118 func RemoveShardReplicationRecord(ctx context.Context, ts *Server, cell, keyspace, shard string, tabletAlias *topodatapb.TabletAlias) error { 119 err := ts.UpdateShardReplicationFields(ctx, cell, keyspace, shard, func(sr *topodatapb.ShardReplication) error { 120 nodes := make([]*topodatapb.ShardReplication_Node, 0, len((*sr).Nodes)) 121 for _, node := range (*sr).Nodes { 122 if !proto.Equal(node.TabletAlias, tabletAlias) { 123 nodes = append(nodes, node) 124 } 125 } 126 (*sr).Nodes = nodes 127 return nil 128 }) 129 return err 130 } 131 132 // FixShardReplication will fix the first problem it encounters within 133 // a ShardReplication object. It returns info about the error being fixed, if 134 // an error was found. 135 // 136 // A return value of (nil, nil) indicates no issues in the replication graph. 137 func FixShardReplication(ctx context.Context, ts *Server, logger logutil.Logger, cell, keyspace, shard string) (*topodatapb.ShardReplicationError, error) { 138 sri, err := ts.GetShardReplication(ctx, cell, keyspace, shard) 139 if err != nil { 140 return nil, err 141 } 142 143 for _, node := range sri.Nodes { 144 problem := &topodatapb.ShardReplicationError{ 145 TabletAlias: node.TabletAlias, 146 } 147 148 ti, err := ts.GetTablet(ctx, node.TabletAlias) 149 if IsErrType(err, NoNode) { 150 problem.Type = topodatapb.ShardReplicationError_NOT_FOUND 151 logger.Warningf("Tablet %v is in the replication graph, but does not exist, removing it", node.TabletAlias) 152 return problem, RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) 153 } 154 if err != nil { 155 // unknown error, we probably don't want to continue 156 return nil, err 157 } 158 159 if ti.Keyspace != keyspace || ti.Shard != shard || ti.Alias.Cell != cell { 160 problem.Type = topodatapb.ShardReplicationError_TOPOLOGY_MISMATCH 161 logger.Warningf("Tablet '%v' is in the replication graph, but has wrong keyspace/shard/cell, removing it", ti.Tablet) 162 return problem, RemoveShardReplicationRecord(ctx, ts, cell, keyspace, shard, node.TabletAlias) 163 } 164 165 logger.Infof("Keeping tablet %v in the replication graph", node.TabletAlias) 166 } 167 168 logger.Infof("All entries in replication graph are valid") 169 return nil, nil 170 } 171 172 // UpdateShardReplicationFields updates the fields inside a topo.ShardReplication object. 173 func (ts *Server) UpdateShardReplicationFields(ctx context.Context, cell, keyspace, shard string, update func(*topodatapb.ShardReplication) error) error { 174 nodePath := path.Join(KeyspacesPath, keyspace, ShardsPath, shard, ShardReplicationFile) 175 176 conn, err := ts.ConnForCell(ctx, cell) 177 if err != nil { 178 return err 179 } 180 181 for { 182 data, version, err := conn.Get(ctx, nodePath) 183 sr := &topodatapb.ShardReplication{} 184 switch { 185 case IsErrType(err, NoNode): 186 // Empty node, version is nil 187 case err == nil: 188 // Use any data we got. 189 if err = proto.Unmarshal(data, sr); err != nil { 190 return vterrors.Wrap(err, "bad ShardReplication data") 191 } 192 default: 193 return err 194 } 195 196 err = update(sr) 197 switch { 198 case IsErrType(err, NoUpdateNeeded): 199 return nil 200 case err == nil: 201 // keep going 202 default: 203 return err 204 } 205 206 // marshall and save 207 data, err = proto.Marshal(sr) 208 if err != nil { 209 return err 210 } 211 if version == nil { 212 // We have to create, and we catch NodeExists. 213 _, err = conn.Create(ctx, nodePath, data) 214 if IsErrType(err, NodeExists) { 215 // Node was created by another process, try 216 // again. 217 continue 218 } 219 return err 220 } 221 222 // We have to update, and we catch ErrBadVersion. 223 _, err = conn.Update(ctx, nodePath, data, version) 224 if IsErrType(err, BadVersion) { 225 // Node was updated by another process, try again. 226 continue 227 } 228 return err 229 } 230 } 231 232 // GetShardReplication returns the ShardReplicationInfo object. 233 func (ts *Server) GetShardReplication(ctx context.Context, cell, keyspace, shard string) (*ShardReplicationInfo, error) { 234 conn, err := ts.ConnForCell(ctx, cell) 235 if err != nil { 236 return nil, err 237 } 238 239 nodePath := path.Join(KeyspacesPath, keyspace, ShardsPath, shard, ShardReplicationFile) 240 data, _, err := conn.Get(ctx, nodePath) 241 if err != nil { 242 return nil, err 243 } 244 245 sr := &topodatapb.ShardReplication{} 246 if err = proto.Unmarshal(data, sr); err != nil { 247 return nil, vterrors.Wrap(err, "bad ShardReplication data") 248 } 249 250 return NewShardReplicationInfo(sr, cell, keyspace, shard), nil 251 } 252 253 // DeleteShardReplication deletes a ShardReplication object. 254 func (ts *Server) DeleteShardReplication(ctx context.Context, cell, keyspace, shard string) error { 255 conn, err := ts.ConnForCell(ctx, cell) 256 if err != nil { 257 return err 258 } 259 260 nodePath := path.Join(KeyspacesPath, keyspace, ShardsPath, shard, ShardReplicationFile) 261 return conn.Delete(ctx, nodePath, nil) 262 } 263 264 // DeleteKeyspaceReplication deletes all the ShardReplication objects for a cell/keyspace. 265 func (ts *Server) DeleteKeyspaceReplication(ctx context.Context, cell, keyspace string) error { 266 conn, err := ts.ConnForCell(ctx, cell) 267 if err != nil { 268 return err 269 } 270 271 nodePath := path.Join(KeyspacesPath, keyspace) 272 return conn.Delete(ctx, nodePath, nil) 273 }