github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/sharding-meta/shardmeta.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package shardmeta 15 16 import ( 17 "encoding/json" 18 "fmt" 19 20 "github.com/go-mysql-org/go-mysql/mysql" 21 "github.com/pingcap/tidb/pkg/util/dbutil" 22 "github.com/pingcap/tidb/pkg/util/filter" 23 "github.com/pingcap/tiflow/dm/pkg/binlog" 24 "github.com/pingcap/tiflow/dm/pkg/gtid" 25 "github.com/pingcap/tiflow/dm/pkg/log" 26 "github.com/pingcap/tiflow/dm/pkg/terror" 27 "github.com/pingcap/tiflow/dm/pkg/utils" 28 "go.uber.org/zap" 29 ) 30 31 // DDLItem records ddl information used in sharding sequence organization. 32 type DDLItem struct { 33 FirstLocation binlog.Location `json:"-"` // first DDL's binlog Pos, not the End_log_pos of the event 34 DDLs []string `json:"ddls"` // DDLs, these ddls are in the same QueryEvent 35 Source string `json:"source"` // source table ID 36 37 // just used for json's marshal and unmarshal, because gtid set in FirstLocation is interface, 38 // can't be marshal and unmarshal 39 FirstPosition mysql.Position `json:"first-position"` 40 FirstGTIDSet string `json:"first-gtid-set"` 41 } 42 43 // NewDDLItem creates a new DDLItem. 44 func NewDDLItem(location binlog.Location, ddls []string, source string) *DDLItem { 45 gsetStr := "" 46 if location.GetGTID() != nil { 47 gsetStr = location.GetGTID().String() 48 } 49 50 return &DDLItem{ 51 FirstLocation: location, 52 DDLs: ddls, 53 Source: source, 54 55 FirstPosition: location.Position, 56 FirstGTIDSet: gsetStr, 57 } 58 } 59 60 // String returns the item's format string value. 61 func (item *DDLItem) String() string { 62 return fmt.Sprintf("first-location: %s ddls: %+v source: %s", item.FirstLocation, item.DDLs, item.Source) 63 } 64 65 // ShardingSequence records a list of DDLItem. 66 type ShardingSequence struct { 67 Items []*DDLItem `json:"items"` 68 } 69 70 // IsPrefixSequence checks whether a ShardingSequence is the prefix sequence of other. 71 func (seq *ShardingSequence) IsPrefixSequence(other *ShardingSequence) bool { 72 if len(seq.Items) > len(other.Items) { 73 return false 74 } 75 for idx := range seq.Items { 76 if !utils.CompareShardingDDLs(seq.Items[idx].DDLs, other.Items[idx].DDLs) { 77 return false 78 } 79 } 80 return true 81 } 82 83 // String returns the ShardingSequence's json string. 84 func (seq *ShardingSequence) String() string { 85 jsonSeq, err := json.Marshal(seq.Items) 86 if err != nil { 87 log.L().DPanic("fail to marshal ShardingSequence to json", zap.Reflect("shard sequence", seq)) 88 } 89 return string(jsonSeq) 90 } 91 92 // ShardingMeta stores sharding ddl sequence 93 // including global sequence and each source's own sequence 94 // NOTE: sharding meta is not thread safe, it must be used in thread safe context. 95 type ShardingMeta struct { 96 activeIdx int // the first unsynced DDL index 97 global *ShardingSequence // merged sharding sequence of all source tables 98 sources map[string]*ShardingSequence // source table ID -> its sharding sequence 99 tableName string // table name (with schema) used in downstream meta db 100 101 enableGTID bool // whether enableGTID, used to compare location 102 } 103 104 // NewShardingMeta creates a new ShardingMeta. 105 func NewShardingMeta(schema, table string, enableGTID bool) *ShardingMeta { 106 return &ShardingMeta{ 107 tableName: dbutil.TableName(schema, table), 108 global: &ShardingSequence{Items: make([]*DDLItem, 0)}, 109 sources: make(map[string]*ShardingSequence), 110 111 enableGTID: enableGTID, 112 } 113 } 114 115 // RestoreFromData restores ShardingMeta from given data. 116 func (meta *ShardingMeta) RestoreFromData(sourceTableID string, activeIdx int, isGlobal bool, data []byte, flavor string) error { 117 items := make([]*DDLItem, 0) 118 err := json.Unmarshal(data, &items) 119 if err != nil { 120 return terror.ErrSyncUnitInvalidShardMeta.Delegate(err) 121 } 122 123 // set FirstLocation 124 for _, item := range items { 125 gset, err1 := gtid.ParserGTID(flavor, item.FirstGTIDSet) 126 if err1 != nil { 127 return err1 128 } 129 item.FirstLocation = binlog.NewLocation( 130 item.FirstPosition, 131 gset, 132 ) 133 } 134 135 if isGlobal { 136 meta.global = &ShardingSequence{Items: items} 137 } else { 138 meta.sources[sourceTableID] = &ShardingSequence{Items: items} 139 } 140 meta.activeIdx = activeIdx 141 return nil 142 } 143 144 // ActiveIdx returns the activeIdx of sharding meta. 145 func (meta *ShardingMeta) ActiveIdx() int { 146 return meta.activeIdx 147 } 148 149 // Reinitialize reinitialize the shardingmeta. 150 func (meta *ShardingMeta) Reinitialize() { 151 meta.activeIdx = 0 152 meta.global = &ShardingSequence{make([]*DDLItem, 0)} 153 meta.sources = make(map[string]*ShardingSequence) 154 } 155 156 // checkItemExists checks whether DDLItem exists in its source sequence 157 // if exists, return the index of DDLItem in source sequence. 158 // if not exists, return the next index in source sequence. 159 func (meta *ShardingMeta) checkItemExists(item *DDLItem) (int, bool) { 160 source, ok := meta.sources[item.Source] 161 if !ok { 162 return 0, false 163 } 164 for idx, ddlItem := range source.Items { 165 if binlog.CompareLocation(item.FirstLocation, ddlItem.FirstLocation, meta.enableGTID) == 0 { 166 return idx, true 167 } 168 } 169 return len(source.Items), false 170 } 171 172 // AddItem adds a new coming DDLItem into ShardingMeta 173 // 1. if DDLItem already exists in source sequence, check whether it is active DDL only 174 // 2. add the DDLItem into its related source sequence 175 // 3. if it is a new DDL in global sequence, which means len(source.Items) > len(global.Items), add it into global sequence 176 // 4. check the source sequence is the prefix-sequence of global sequence, if not, return an error 177 // returns: 178 // 179 // active: whether the DDL will be processed in this round 180 func (meta *ShardingMeta) AddItem(item *DDLItem) (active bool, err error) { 181 index, exists := meta.checkItemExists(item) 182 if exists { 183 return index == meta.activeIdx, nil 184 } 185 186 if source, ok := meta.sources[item.Source]; !ok { 187 meta.sources[item.Source] = &ShardingSequence{Items: []*DDLItem{item}} 188 } else { 189 source.Items = append(source.Items, item) 190 } 191 192 global, source := meta.global, meta.sources[item.Source] 193 if len(source.Items) > len(global.Items) { 194 global.Items = append(global.Items, item) 195 } 196 197 if !source.IsPrefixSequence(global) { 198 return false, terror.ErrSyncUnitDDLWrongSequence.Generate(source.Items, global.Items) 199 } 200 201 return index == meta.activeIdx, nil 202 } 203 204 // GetGlobalActiveDDL returns activeDDL in global sequence. 205 func (meta *ShardingMeta) GetGlobalActiveDDL() *DDLItem { 206 if meta.activeIdx < len(meta.global.Items) { 207 return meta.global.Items[meta.activeIdx] 208 } 209 return nil 210 } 211 212 // GetGlobalItems returns global DDLItems. 213 func (meta *ShardingMeta) GetGlobalItems() []*DDLItem { 214 return meta.global.Items 215 } 216 217 // GetActiveDDLItem returns the source table's active DDLItem 218 // if in DDL unsynced procedure, the active DDLItem means the syncing DDL 219 // if in re-sync procedure, the active DDLItem means the next syncing DDL in DDL syncing sequence, may be nil. 220 func (meta *ShardingMeta) GetActiveDDLItem(tableSource string) *DDLItem { 221 source, ok := meta.sources[tableSource] 222 if !ok { 223 return nil 224 } 225 if meta.activeIdx < len(source.Items) { 226 return source.Items[meta.activeIdx] 227 } 228 return nil 229 } 230 231 // InSequenceSharding returns whether in sequence sharding. 232 func (meta *ShardingMeta) InSequenceSharding() bool { 233 globalItemCount := len(meta.global.Items) 234 return globalItemCount > 0 && meta.activeIdx < globalItemCount 235 } 236 237 // ResolveShardingDDL resolves one sharding DDL and increase activeIdx 238 // if activeIdx equals to the length of global sharding sequence, it means all 239 // sharding DDL in this ShardingMeta sequence is resolved and will reinitialize 240 // the ShardingMeta, return true if all DDLs are resolved. 241 func (meta *ShardingMeta) ResolveShardingDDL() bool { 242 meta.activeIdx++ 243 if meta.activeIdx == len(meta.global.Items) { 244 meta.Reinitialize() 245 return true 246 } 247 return false 248 } 249 250 // ActiveDDLFirstLocation returns the first binlog position of active DDL. 251 func (meta *ShardingMeta) ActiveDDLFirstLocation() (binlog.Location, error) { 252 if meta.activeIdx >= len(meta.global.Items) { 253 return binlog.Location{}, terror.ErrSyncUnitDDLActiveIndexLarger.Generate(meta.activeIdx, meta.global.Items) 254 } 255 256 return meta.global.Items[meta.activeIdx].FirstLocation, nil 257 } 258 259 // FlushData returns sharding meta flush SQL and args. 260 func (meta *ShardingMeta) FlushData(sourceID, tableID string) ([]string, [][]interface{}) { 261 if len(meta.global.Items) == 0 { 262 sql2 := fmt.Sprintf("DELETE FROM %s where source_id=? and target_table_id=?", meta.tableName) 263 args2 := []interface{}{sourceID, tableID} 264 return []string{sql2}, [][]interface{}{args2} 265 } 266 var ( 267 sqls = make([]string, 1+len(meta.sources)) 268 args = make([][]interface{}, 0, 1+len(meta.sources)) 269 baseSQL = fmt.Sprintf("INSERT INTO %s (`source_id`, `target_table_id`, `source_table_id`, `active_index`, `is_global`, `data`) VALUES(?,?,?,?,?,?) ON DUPLICATE KEY UPDATE `data`=?, `active_index`=?", meta.tableName) 270 ) 271 for i := range sqls { 272 sqls[i] = baseSQL 273 } 274 args = append(args, []interface{}{sourceID, tableID, "", meta.activeIdx, true, meta.global.String(), meta.global.String(), meta.activeIdx}) 275 for source, seq := range meta.sources { 276 args = append(args, []interface{}{sourceID, tableID, source, meta.activeIdx, false, seq.String(), seq.String(), meta.activeIdx}) 277 } 278 return sqls, args 279 } 280 281 func (meta *ShardingMeta) genRemoveSQL(sourceID, tableID string) (string, []interface{}) { 282 sql := fmt.Sprintf("DELETE FROM %s where source_id=? and target_table_id=?", meta.tableName) 283 return sql, []interface{}{sourceID, tableID} 284 } 285 286 // CheckAndUpdate check and fix schema and table names for all the sharding groups. 287 func (meta *ShardingMeta) CheckAndUpdate(logger log.Logger, targetID string, schemaMap map[string]string, tablesMap map[string]map[string]string) ([]string, [][]interface{}, error) { 288 if len(schemaMap) == 0 && len(tablesMap) == 0 { 289 return nil, nil, nil 290 } 291 292 checkSourceID := func(source string) (string, bool) { 293 sourceTable := utils.UnpackTableID(source) 294 schemaName, tblName := sourceTable.Schema, sourceTable.Name 295 realSchema, changed := schemaMap[schemaName] 296 if !changed { 297 realSchema = schemaName 298 } 299 tblMap := tablesMap[schemaName] 300 realTable, ok := tblMap[tblName] 301 if ok { 302 changed = true 303 } else { 304 realTable = tblName 305 } 306 newTableID := utils.GenTableID(&filter.Table{Schema: realSchema, Name: realTable}) 307 return newTableID, changed 308 } 309 310 for _, item := range meta.global.Items { 311 newID, changed := checkSourceID(item.Source) 312 if changed { 313 item.Source = newID 314 } 315 } 316 317 sourceIDsMap := make(map[string]string) 318 for sourceID, seqs := range meta.sources { 319 newSourceID, changed := checkSourceID(sourceID) 320 for _, item := range seqs.Items { 321 newID, hasChanged := checkSourceID(item.Source) 322 if hasChanged { 323 item.Source = newID 324 changed = true 325 } 326 } 327 if changed { 328 sourceIDsMap[sourceID] = newSourceID 329 } 330 } 331 var ( 332 sqls []string 333 args [][]interface{} 334 ) 335 for oldID, newID := range sourceIDsMap { 336 if oldID != newID { 337 seqs := meta.sources[oldID] 338 delete(meta.sources, oldID) 339 meta.sources[newID] = seqs 340 removeSQL, arg := meta.genRemoveSQL(oldID, targetID) 341 sqls = append(sqls, removeSQL) 342 args = append(args, arg) 343 } 344 logger.Info("fix sharding meta", zap.String("old", oldID), zap.String("new", newID)) 345 fixedSQLs, fixedArgs := meta.FlushData(newID, targetID) 346 sqls = append(sqls, fixedSQLs...) 347 args = append(args, fixedArgs...) 348 } 349 350 return sqls, args, nil 351 }