github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/pkg/shardddl/optimism/table.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package optimism 15 16 import ( 17 "context" 18 "encoding/json" 19 "fmt" 20 21 "github.com/pingcap/tiflow/dm/common" 22 "github.com/pingcap/tiflow/dm/pkg/etcdutil" 23 "go.etcd.io/etcd/api/v3/mvccpb" 24 clientv3 "go.etcd.io/etcd/client/v3" 25 ) 26 27 // SourceTables represents the upstream/sources tables for a data migration **subtask**. 28 // This information should be persistent in etcd so can be retrieved after the DM-master leader restarted or changed. 29 // We need this because only one shard group exists for **every** target table in the optimistic mode (in DM-master), 30 // so we need DM-worker to report its upstream table names to DM-master. 31 // NOTE: `Task` and `Source` are redundant in the etcd key path for convenient. 32 // SourceTables is putted when starting the subtask by DM-worker, 33 // and is updated when new tables added/removed in the upstream source by DM-worker, 34 // and **may be** deleted when stopping the subtask by DM-worker later. 35 type SourceTables struct { 36 Task string `json:"task"` // data migration task name 37 Source string `json:"source"` // upstream source ID 38 39 // downstream-schema-name -> downstream-table-name -> upstream-schema-name -> upstream-table-name -> struct{}, 40 // multiple downstream/target tables (<downstream-schema-name, downstream-table-name> pair) may exist in one subtask. 41 Tables map[string]map[string]map[string]map[string]struct{} `json:"tables"` 42 43 // only used to report to the caller of the watcher, do not marsh it. 44 // if it's true, it means the SourceTables has been deleted in etcd. 45 IsDeleted bool `json:"-"` 46 } 47 48 // TargetTable represents some upstream/sources tables for **one** target table. 49 // It is often generated from `SourceTables` for the specified downstream table. 50 type TargetTable struct { 51 Task string `json:"task"` // data migration task name 52 Source string `json:"source"` // upstream source ID 53 DownSchema string `json:"down-schema"` // downstream schema name 54 DownTable string `json:"down-table"` // downstream table name 55 56 // upstream-schema-name -> upstream-table-name -> struct{} 57 UpTables map[string]map[string]struct{} `json:"up-tables"` 58 } 59 60 // emptyTargetTable returns an empty TargetTable instance. 61 func emptyTargetTable() TargetTable { 62 return TargetTable{} 63 } 64 65 // newTargetTable returns a TargetTable instance. 66 func newTargetTable(task, source, downSchema, downTable string, 67 upTables map[string]map[string]struct{}, 68 ) TargetTable { 69 return TargetTable{ 70 Task: task, 71 Source: source, 72 DownSchema: downSchema, 73 DownTable: downTable, 74 UpTables: upTables, 75 } 76 } 77 78 // IsEmpty returns whether the TargetTable instance is empty. 79 func (tt TargetTable) IsEmpty() bool { 80 return tt.Task == "" // now we treat it as empty if no task name specified. 81 } 82 83 // NewSourceTables creates a new SourceTables instances. 84 func NewSourceTables(task, source string) SourceTables { 85 return SourceTables{ 86 Task: task, 87 Source: source, 88 Tables: make(map[string]map[string]map[string]map[string]struct{}), 89 } 90 } 91 92 // String implements Stringer interface. 93 func (st SourceTables) String() string { 94 s, _ := st.toJSON() 95 return s 96 } 97 98 // toJSON returns the string of JSON represent. 99 func (st SourceTables) toJSON() (string, error) { 100 data, err := json.Marshal(st) 101 if err != nil { 102 return "", err 103 } 104 return string(data), nil 105 } 106 107 // AddTable adds a table into SourceTables. 108 // it returns whether added (not exist before). 109 func (st *SourceTables) AddTable(upSchema, upTable, downSchema, downTable string) bool { 110 if _, ok := st.Tables[downSchema]; !ok { 111 st.Tables[downSchema] = make(map[string]map[string]map[string]struct{}) 112 } 113 if _, ok := st.Tables[downSchema][downTable]; !ok { 114 st.Tables[downSchema][downTable] = make(map[string]map[string]struct{}) 115 } 116 if _, ok := st.Tables[downSchema][downTable][upSchema]; !ok { 117 st.Tables[downSchema][downTable][upSchema] = make(map[string]struct{}) 118 } 119 if _, ok := st.Tables[downSchema][downTable][upSchema][upTable]; !ok { 120 st.Tables[downSchema][downTable][upSchema][upTable] = struct{}{} 121 return true 122 } 123 return false 124 } 125 126 // RemoveTable removes a table from SourceTables. 127 // it returns whether removed (exist before). 128 func (st *SourceTables) RemoveTable(upSchema, upTable, downSchema, downTable string) bool { 129 if _, ok := st.Tables[downSchema]; !ok { 130 return false 131 } 132 if _, ok := st.Tables[downSchema][downTable]; !ok { 133 return false 134 } 135 if _, ok := st.Tables[downSchema][downTable][upSchema]; !ok { 136 return false 137 } 138 if _, ok := st.Tables[downSchema][downTable][upSchema][upTable]; !ok { 139 return false 140 } 141 142 delete(st.Tables[downSchema][downTable][upSchema], upTable) 143 if len(st.Tables[downSchema][downTable][upSchema]) == 0 { 144 delete(st.Tables[downSchema][downTable], upSchema) 145 } 146 if len(st.Tables[downSchema][downTable]) == 0 { 147 delete(st.Tables[downSchema], downTable) 148 } 149 if len(st.Tables[downSchema]) == 0 { 150 delete(st.Tables, downSchema) 151 } 152 return true 153 } 154 155 // RouteTable represents a table in upstream/downstream. 156 type RouteTable struct { 157 UpSchema string 158 UpTable string 159 DownSchema string 160 DownTable string 161 } 162 163 func (st *SourceTables) toRouteTable() map[RouteTable]struct{} { 164 tables := make(map[RouteTable]struct{}) 165 for downSchema, downTables := range st.Tables { 166 for downTable, upSchemas := range downTables { 167 for upSchema, upTables := range upSchemas { 168 for upTable := range upTables { 169 t := RouteTable{ 170 UpSchema: upSchema, 171 UpTable: upTable, 172 DownSchema: downSchema, 173 DownTable: downTable, 174 } 175 tables[t] = struct{}{} 176 } 177 } 178 } 179 } 180 return tables 181 } 182 183 func DiffSourceTables(oldST, newST SourceTables) (map[RouteTable]struct{}, map[RouteTable]struct{}) { 184 oldTables := oldST.toRouteTable() 185 newTables := newST.toRouteTable() 186 187 droppedTables := make(map[RouteTable]struct{}) 188 addedTables := make(map[RouteTable]struct{}) 189 for table := range oldTables { 190 if _, ok := newTables[table]; !ok { 191 droppedTables[table] = struct{}{} 192 } else { 193 delete(newTables, table) 194 } 195 } 196 197 for table := range newTables { 198 addedTables[table] = struct{}{} 199 } 200 return addedTables, droppedTables 201 } 202 203 // TargetTable returns a TargetTable instance for a specified downstream table, 204 // returns an empty TargetTable instance if no tables exist. 205 func (st *SourceTables) TargetTable(downSchema, downTable string) TargetTable { 206 ett := emptyTargetTable() 207 if _, ok := st.Tables[downSchema]; !ok { 208 return ett 209 } 210 if _, ok := st.Tables[downSchema][downTable]; !ok { 211 return ett 212 } 213 214 // copy upstream tables. 215 tables := make(map[string]map[string]struct{}) 216 for upSchema, upTables := range st.Tables[downSchema][downTable] { 217 tables[upSchema] = make(map[string]struct{}) 218 for upTable := range upTables { 219 tables[upSchema][upTable] = struct{}{} 220 } 221 } 222 223 return newTargetTable(st.Task, st.Source, downSchema, downTable, tables) 224 } 225 226 // sourceTablesFromJSON constructs SourceTables from its JSON represent. 227 func sourceTablesFromJSON(s string) (st SourceTables, err error) { 228 err = json.Unmarshal([]byte(s), &st) 229 return 230 } 231 232 // PutSourceTables puts source tables into etcd. 233 // This function should often be called by DM-worker. 234 func PutSourceTables(cli *clientv3.Client, st SourceTables) (int64, error) { 235 op, err := putSourceTablesOp(st) 236 if err != nil { 237 return 0, err 238 } 239 _, rev, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(op)) 240 return rev, err 241 } 242 243 // DeleteSourceTables deletes the source tables in etcd. 244 // This function should often be called by DM-worker. 245 func DeleteSourceTables(cli *clientv3.Client, st SourceTables) (int64, error) { 246 key := common.ShardDDLOptimismSourceTablesKeyAdapter.Encode(st.Task, st.Source) 247 _, rev, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(clientv3.OpDelete(key))) 248 return rev, err 249 } 250 251 // GetAllSourceTables gets all source tables in etcd currently. 252 // This function should often be called by DM-master. 253 // k/k/v: task-name -> source-ID -> source tables. 254 func GetAllSourceTables(cli *clientv3.Client) (map[string]map[string]SourceTables, int64, error) { 255 respTxn, _, err := etcdutil.DoTxnWithRepeatable(cli, etcdutil.ThenOpFunc(clientv3.OpGet(common.ShardDDLOptimismSourceTablesKeyAdapter.Path(), clientv3.WithPrefix()))) 256 if err != nil { 257 return nil, 0, err 258 } 259 resp := respTxn.Responses[0].GetResponseRange() 260 261 stm := make(map[string]map[string]SourceTables) 262 for _, kv := range resp.Kvs { 263 st, err2 := sourceTablesFromJSON(string(kv.Value)) 264 if err2 != nil { 265 return nil, 0, err2 266 } 267 268 if _, ok := stm[st.Task]; !ok { 269 stm[st.Task] = make(map[string]SourceTables) 270 } 271 stm[st.Task][st.Source] = st 272 } 273 274 return stm, resp.Header.Revision, nil 275 } 276 277 // WatchSourceTables watches PUT & DELETE operations for source tables. 278 // This function should often be called by DM-master. 279 func WatchSourceTables(ctx context.Context, cli *clientv3.Client, revision int64, 280 outCh chan<- SourceTables, errCh chan<- error, 281 ) { 282 wCtx, cancel := context.WithCancel(ctx) 283 defer cancel() 284 ch := cli.Watch(wCtx, common.ShardDDLOptimismSourceTablesKeyAdapter.Path(), 285 clientv3.WithPrefix(), clientv3.WithRev(revision)) 286 287 for { 288 select { 289 case <-ctx.Done(): 290 return 291 case resp, ok := <-ch: 292 if !ok { 293 return 294 } 295 if resp.Canceled { 296 select { 297 case errCh <- resp.Err(): 298 case <-ctx.Done(): 299 } 300 return 301 } 302 303 for _, ev := range resp.Events { 304 var ( 305 st SourceTables 306 err error 307 ) 308 309 switch ev.Type { 310 case mvccpb.PUT: 311 st, err = sourceTablesFromJSON(string(ev.Kv.Value)) 312 case mvccpb.DELETE: 313 st, err = sourceTablesFromKey(string(ev.Kv.Key)) 314 st.IsDeleted = true 315 default: 316 // this should not happen. 317 err = fmt.Errorf("unsupported ectd event type %v", ev.Type) 318 } 319 320 if err != nil { 321 select { 322 case errCh <- err: 323 case <-ctx.Done(): 324 return 325 } 326 } else { 327 select { 328 case outCh <- st: 329 case <-ctx.Done(): 330 return 331 } 332 } 333 } 334 } 335 } 336 } 337 338 // sourceTablesFromKey constructs an incomplete SourceTables from an etcd key. 339 func sourceTablesFromKey(key string) (SourceTables, error) { 340 var st SourceTables 341 ks, err := common.ShardDDLOptimismSourceTablesKeyAdapter.Decode(key) 342 if err != nil { 343 return st, err 344 } 345 st.Task = ks[0] 346 st.Source = ks[1] 347 return st, nil 348 } 349 350 // putSourceTablesOp returns a PUT etcd operation for source tables. 351 func putSourceTablesOp(st SourceTables) (clientv3.Op, error) { 352 value, err := st.toJSON() 353 if err != nil { 354 return clientv3.Op{}, err 355 } 356 key := common.ShardDDLOptimismSourceTablesKeyAdapter.Encode(st.Task, st.Source) 357 return clientv3.OpPut(key, value), nil 358 } 359 360 // CheckSourceTables try to check and fix all the source schemas and table names. 361 func CheckSourceTables(cli *clientv3.Client, source string, schemaMap map[string]string, talesMap map[string]map[string]string) error { 362 allSourceTables, _, err := GetAllSourceTables(cli) 363 if err != nil { 364 return err 365 } 366 367 for _, taskSourceTables := range allSourceTables { 368 sourceTables, ok := taskSourceTables[source] 369 if !ok { 370 continue 371 } 372 schemaKeys := make([]string, 0) 373 tblKeys := make([]string, 0) 374 hasChange := false 375 for _, tableSources := range sourceTables.Tables { 376 for _, sources := range tableSources { 377 for schema, tbls := range sources { 378 if _, ok := schemaMap[schema]; ok { 379 schemaKeys = append(schemaKeys, schema) 380 hasChange = true 381 } 382 383 tblMap, ok := talesMap[schema] 384 if !ok { 385 continue 386 } 387 for tbl := range tbls { 388 if t, ok := tblMap[tbl]; ok { 389 tblKeys = append(tblKeys, t) 390 hasChange = true 391 } 392 } 393 for _, t := range tblKeys { 394 tbls[tblMap[t]] = tbls[t] 395 delete(tbls, t) 396 } 397 tblKeys = tblKeys[:0] 398 } 399 for _, s := range schemaKeys { 400 sources[schemaMap[s]] = sources[s] 401 delete(sources, s) 402 } 403 schemaKeys = schemaKeys[:0] 404 } 405 } 406 if hasChange { 407 if _, err = PutSourceTables(cli, sourceTables); err != nil { 408 return err 409 } 410 } 411 } 412 return err 413 }