vitess.io/vitess@v0.16.2/go/vt/wrangler/resharder.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package wrangler 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "sync" 24 "time" 25 26 "google.golang.org/protobuf/encoding/prototext" 27 28 "vitess.io/vitess/go/vt/log" 29 "vitess.io/vitess/go/vt/schema" 30 "vitess.io/vitess/go/vt/vtctl/workflow" 31 32 "vitess.io/vitess/go/sqltypes" 33 "vitess.io/vitess/go/vt/binlog/binlogplayer" 34 "vitess.io/vitess/go/vt/concurrency" 35 "vitess.io/vitess/go/vt/key" 36 binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" 37 vschemapb "vitess.io/vitess/go/vt/proto/vschema" 38 "vitess.io/vitess/go/vt/topo" 39 "vitess.io/vitess/go/vt/topotools" 40 "vitess.io/vitess/go/vt/vterrors" 41 "vitess.io/vitess/go/vt/vtgate/vindexes" 42 "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" 43 ) 44 45 type resharder struct { 46 wr *Wrangler 47 keyspace string 48 workflow string 49 sourceShards []*topo.ShardInfo 50 sourcePrimaries map[string]*topo.TabletInfo 51 targetShards []*topo.ShardInfo 52 targetPrimaries map[string]*topo.TabletInfo 53 vschema *vschemapb.Keyspace 54 refStreams map[string]*refStream 55 cell string //single cell or cellsAlias or comma-separated list of cells/cellsAliases 56 tabletTypes string 57 stopAfterCopy bool 58 onDDL string 59 deferSecondaryKeys bool 60 } 61 62 type refStream struct { 63 workflow string 64 bls *binlogdatapb.BinlogSource 65 cell string 66 tabletTypes string 67 } 68 69 // Reshard initiates a resharding workflow. 70 func (wr *Wrangler) Reshard(ctx context.Context, keyspace, workflow string, sources, targets []string, 71 skipSchemaCopy bool, cell, tabletTypes, onDDL string, autoStart, stopAfterCopy, deferSecondaryKeys bool) error { 72 if err := wr.validateNewWorkflow(ctx, keyspace, workflow); err != nil { 73 return err 74 } 75 if err := wr.ts.ValidateSrvKeyspace(ctx, keyspace, cell); err != nil { 76 err2 := vterrors.Wrapf(err, "SrvKeyspace for keyspace %s is corrupt in cell %s", keyspace, cell) 77 log.Errorf("%w", err2) 78 return err2 79 } 80 81 rs, err := wr.buildResharder(ctx, keyspace, workflow, sources, targets, cell, tabletTypes) 82 if err != nil { 83 return vterrors.Wrap(err, "buildResharder") 84 } 85 86 rs.onDDL = onDDL 87 rs.stopAfterCopy = stopAfterCopy 88 rs.deferSecondaryKeys = deferSecondaryKeys 89 if !skipSchemaCopy { 90 if err := rs.copySchema(ctx); err != nil { 91 return vterrors.Wrap(err, "copySchema") 92 } 93 } 94 if err := rs.createStreams(ctx); err != nil { 95 return vterrors.Wrap(err, "createStreams") 96 } 97 98 if autoStart { 99 if err := rs.startStreams(ctx); err != nil { 100 return vterrors.Wrap(err, "startStreams") 101 } 102 } else { 103 wr.Logger().Infof("Streams will not be started since -auto_start is set to false") 104 } 105 return nil 106 } 107 108 func (wr *Wrangler) buildResharder(ctx context.Context, keyspace, workflow string, sources, targets []string, cell, tabletTypes string) (*resharder, error) { 109 rs := &resharder{ 110 wr: wr, 111 keyspace: keyspace, 112 workflow: workflow, 113 sourcePrimaries: make(map[string]*topo.TabletInfo), 114 targetPrimaries: make(map[string]*topo.TabletInfo), 115 cell: cell, 116 tabletTypes: tabletTypes, 117 } 118 for _, shard := range sources { 119 si, err := wr.ts.GetShard(ctx, keyspace, shard) 120 if err != nil { 121 return nil, vterrors.Wrapf(err, "GetShard(%s) failed", shard) 122 } 123 if !si.IsPrimaryServing { 124 return nil, fmt.Errorf("source shard %v is not in serving state", shard) 125 } 126 rs.sourceShards = append(rs.sourceShards, si) 127 primary, err := wr.ts.GetTablet(ctx, si.PrimaryAlias) 128 if err != nil { 129 return nil, vterrors.Wrapf(err, "GetTablet(%s) failed", si.PrimaryAlias) 130 } 131 rs.sourcePrimaries[si.ShardName()] = primary 132 } 133 for _, shard := range targets { 134 si, err := wr.ts.GetShard(ctx, keyspace, shard) 135 if err != nil { 136 return nil, vterrors.Wrapf(err, "GetShard(%s) failed", shard) 137 } 138 if si.IsPrimaryServing { 139 return nil, fmt.Errorf("target shard %v is in serving state", shard) 140 } 141 rs.targetShards = append(rs.targetShards, si) 142 primary, err := wr.ts.GetTablet(ctx, si.PrimaryAlias) 143 if err != nil { 144 return nil, vterrors.Wrapf(err, "GetTablet(%s) failed", si.PrimaryAlias) 145 } 146 rs.targetPrimaries[si.ShardName()] = primary 147 } 148 if err := topotools.ValidateForReshard(rs.sourceShards, rs.targetShards); err != nil { 149 return nil, vterrors.Wrap(err, "ValidateForReshard") 150 } 151 if err := rs.validateTargets(ctx); err != nil { 152 return nil, vterrors.Wrap(err, "validateTargets") 153 } 154 155 vschema, err := wr.ts.GetVSchema(ctx, keyspace) 156 if err != nil { 157 return nil, vterrors.Wrap(err, "GetVSchema") 158 } 159 rs.vschema = vschema 160 161 if err := rs.readRefStreams(ctx); err != nil { 162 return nil, vterrors.Wrap(err, "readRefStreams") 163 } 164 return rs, nil 165 } 166 167 func (rs *resharder) validateTargets(ctx context.Context) error { 168 err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error { 169 targetPrimary := rs.targetPrimaries[target.ShardName()] 170 query := fmt.Sprintf("select 1 from _vt.vreplication where db_name=%s", encodeString(targetPrimary.DbName())) 171 p3qr, err := rs.wr.tmc.VReplicationExec(ctx, targetPrimary.Tablet, query) 172 if err != nil { 173 return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", targetPrimary.Tablet, query) 174 } 175 if len(p3qr.Rows) != 0 { 176 return errors.New("some streams already exist in the target shards, please clean them up and retry the command") 177 } 178 return nil 179 }) 180 return err 181 } 182 183 func (rs *resharder) readRefStreams(ctx context.Context) error { 184 var mu sync.Mutex 185 err := rs.forAll(rs.sourceShards, func(source *topo.ShardInfo) error { 186 sourcePrimary := rs.sourcePrimaries[source.ShardName()] 187 188 query := fmt.Sprintf("select workflow, source, cell, tablet_types from _vt.vreplication where db_name=%s and message != 'FROZEN'", encodeString(sourcePrimary.DbName())) 189 p3qr, err := rs.wr.tmc.VReplicationExec(ctx, sourcePrimary.Tablet, query) 190 if err != nil { 191 return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", sourcePrimary.Tablet, query) 192 } 193 qr := sqltypes.Proto3ToResult(p3qr) 194 195 mu.Lock() 196 defer mu.Unlock() 197 198 mustCreate := false 199 var ref map[string]bool 200 if rs.refStreams == nil { 201 rs.refStreams = make(map[string]*refStream) 202 mustCreate = true 203 } else { 204 // Copy the ref streams for comparison. 205 ref = make(map[string]bool, len(rs.refStreams)) 206 for k := range rs.refStreams { 207 ref[k] = true 208 } 209 } 210 for _, row := range qr.Rows { 211 212 workflow := row[0].ToString() 213 if workflow == "" { 214 return fmt.Errorf("VReplication streams must have named workflows for migration: shard: %s:%s", source.Keyspace(), source.ShardName()) 215 } 216 var bls binlogdatapb.BinlogSource 217 rowBytes, err := row[1].ToBytes() 218 if err != nil { 219 return err 220 } 221 if err := prototext.Unmarshal(rowBytes, &bls); err != nil { 222 return vterrors.Wrapf(err, "prototext.Unmarshal: %v", row) 223 } 224 isReference, err := rs.blsIsReference(&bls) 225 if err != nil { 226 return vterrors.Wrap(err, "blsIsReference") 227 } 228 if !isReference { 229 continue 230 } 231 key := fmt.Sprintf("%s:%s:%s", workflow, bls.Keyspace, bls.Shard) 232 if mustCreate { 233 rs.refStreams[key] = &refStream{ 234 workflow: workflow, 235 bls: &bls, 236 cell: row[2].ToString(), 237 tabletTypes: row[3].ToString(), 238 } 239 } else { 240 if !ref[key] { 241 return fmt.Errorf("streams are mismatched across source shards for workflow: %s", workflow) 242 } 243 delete(ref, key) 244 } 245 } 246 if len(ref) != 0 { 247 return fmt.Errorf("streams are mismatched across source shards: %v", ref) 248 } 249 return nil 250 }) 251 return err 252 } 253 254 // blsIsReference is partially copied from streamMigrater.templatize. 255 // It reuses the constants from that function also. 256 func (rs *resharder) blsIsReference(bls *binlogdatapb.BinlogSource) (bool, error) { 257 streamType := workflow.StreamTypeUnknown 258 for _, rule := range bls.Filter.Rules { 259 typ, err := rs.identifyRuleType(rule) 260 if err != nil { 261 return false, err 262 } 263 264 switch typ { 265 case workflow.StreamTypeSharded: 266 if streamType == workflow.StreamTypeReference { 267 return false, fmt.Errorf("cannot reshard streams with a mix of reference and sharded tables: %v", bls) 268 } 269 streamType = workflow.StreamTypeSharded 270 case workflow.StreamTypeReference: 271 if streamType == workflow.StreamTypeSharded { 272 return false, fmt.Errorf("cannot reshard streams with a mix of reference and sharded tables: %v", bls) 273 } 274 streamType = workflow.StreamTypeReference 275 } 276 } 277 return streamType == workflow.StreamTypeReference, nil 278 } 279 280 func (rs *resharder) identifyRuleType(rule *binlogdatapb.Rule) (workflow.StreamType, error) { 281 vtable, ok := rs.vschema.Tables[rule.Match] 282 if !ok && !schema.IsInternalOperationTableName(rule.Match) { 283 return 0, fmt.Errorf("table %v not found in vschema", rule.Match) 284 } 285 if vtable != nil && vtable.Type == vindexes.TypeReference { 286 return workflow.StreamTypeReference, nil 287 } 288 // In this case, 'sharded' means that it's not a reference 289 // table. We don't care about any other subtleties. 290 return workflow.StreamTypeSharded, nil 291 } 292 293 func (rs *resharder) copySchema(ctx context.Context) error { 294 oneSource := rs.sourceShards[0].PrimaryAlias 295 err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error { 296 return rs.wr.CopySchemaShard(ctx, oneSource, []string{"/.*"}, nil, false, rs.keyspace, target.ShardName(), 1*time.Second, false) 297 }) 298 return err 299 } 300 301 func (rs *resharder) createStreams(ctx context.Context) error { 302 var excludeRules []*binlogdatapb.Rule 303 for tableName, table := range rs.vschema.Tables { 304 if table.Type == vindexes.TypeReference { 305 excludeRules = append(excludeRules, &binlogdatapb.Rule{ 306 Match: tableName, 307 Filter: "exclude", 308 }) 309 } 310 } 311 312 err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error { 313 targetPrimary := rs.targetPrimaries[target.ShardName()] 314 315 ig := vreplication.NewInsertGenerator(binlogplayer.BlpStopped, targetPrimary.DbName()) 316 317 // copy excludeRules to prevent data race. 318 copyExcludeRules := append([]*binlogdatapb.Rule(nil), excludeRules...) 319 for _, source := range rs.sourceShards { 320 if !key.KeyRangesIntersect(target.KeyRange, source.KeyRange) { 321 continue 322 } 323 filter := &binlogdatapb.Filter{ 324 Rules: append(copyExcludeRules, &binlogdatapb.Rule{ 325 Match: "/.*", 326 Filter: key.KeyRangeString(target.KeyRange), 327 }), 328 } 329 bls := &binlogdatapb.BinlogSource{ 330 Keyspace: rs.keyspace, 331 Shard: source.ShardName(), 332 Filter: filter, 333 StopAfterCopy: rs.stopAfterCopy, 334 OnDdl: binlogdatapb.OnDDLAction(binlogdatapb.OnDDLAction_value[rs.onDDL]), 335 } 336 ig.AddRow(rs.workflow, bls, "", rs.cell, rs.tabletTypes, 337 int64(binlogdatapb.VReplicationWorkflowType_Reshard), 338 int64(binlogdatapb.VReplicationWorkflowSubType_None), 339 rs.deferSecondaryKeys) 340 } 341 342 for _, rstream := range rs.refStreams { 343 ig.AddRow(rstream.workflow, rstream.bls, "", rstream.cell, rstream.tabletTypes, 344 //todo: fix based on original stream 345 int64(binlogdatapb.VReplicationWorkflowType_Reshard), 346 int64(binlogdatapb.VReplicationWorkflowSubType_None), 347 rs.deferSecondaryKeys) 348 } 349 query := ig.String() 350 if _, err := rs.wr.tmc.VReplicationExec(ctx, targetPrimary.Tablet, query); err != nil { 351 return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", targetPrimary.Tablet, query) 352 } 353 return nil 354 }) 355 356 return err 357 } 358 359 func (rs *resharder) startStreams(ctx context.Context) error { 360 err := rs.forAll(rs.targetShards, func(target *topo.ShardInfo) error { 361 targetPrimary := rs.targetPrimaries[target.ShardName()] 362 query := fmt.Sprintf("update _vt.vreplication set state='Running' where db_name=%s", encodeString(targetPrimary.DbName())) 363 if _, err := rs.wr.tmc.VReplicationExec(ctx, targetPrimary.Tablet, query); err != nil { 364 return vterrors.Wrapf(err, "VReplicationExec(%v, %s)", targetPrimary.Tablet, query) 365 } 366 return nil 367 }) 368 return err 369 } 370 371 func (rs *resharder) forAll(shards []*topo.ShardInfo, f func(*topo.ShardInfo) error) error { 372 var wg sync.WaitGroup 373 allErrors := &concurrency.AllErrorRecorder{} 374 for _, shard := range shards { 375 wg.Add(1) 376 go func(shard *topo.ShardInfo) { 377 defer wg.Done() 378 379 if err := f(shard); err != nil { 380 allErrors.RecordError(err) 381 } 382 }(shard) 383 } 384 wg.Wait() 385 return allErrors.AggrError(vterrors.Aggregate) 386 }