github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/util.go (about) 1 // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. 2 3 package restore 4 5 import ( 6 "bytes" 7 "context" 8 "fmt" 9 "regexp" 10 "strings" 11 "time" 12 13 _ "github.com/go-sql-driver/mysql" // mysql driver 14 "github.com/pingcap/errors" 15 backuppb "github.com/pingcap/kvproto/pkg/backup" 16 "github.com/pingcap/kvproto/pkg/import_sstpb" 17 "github.com/pingcap/kvproto/pkg/metapb" 18 "github.com/pingcap/log" 19 "github.com/pingcap/parser/model" 20 "github.com/pingcap/tidb/tablecodec" 21 "github.com/pingcap/tidb/util/codec" 22 "go.uber.org/zap" 23 "go.uber.org/zap/zapcore" 24 25 berrors "github.com/pingcap/br/pkg/errors" 26 "github.com/pingcap/br/pkg/glue" 27 "github.com/pingcap/br/pkg/logutil" 28 "github.com/pingcap/br/pkg/rtree" 29 "github.com/pingcap/br/pkg/summary" 30 "github.com/pingcap/br/pkg/utils" 31 ) 32 33 var ( 34 recordPrefixSep = []byte("_r") 35 quoteRegexp = regexp.MustCompile("`(?:[^`]|``)*`") 36 ) 37 38 // GetRewriteRules returns the rewrite rule of the new table and the old table. 39 func GetRewriteRules( 40 newTable, oldTable *model.TableInfo, newTimeStamp uint64, 41 ) *RewriteRules { 42 tableIDs := make(map[int64]int64) 43 tableIDs[oldTable.ID] = newTable.ID 44 if oldTable.Partition != nil { 45 for _, srcPart := range oldTable.Partition.Definitions { 46 for _, destPart := range newTable.Partition.Definitions { 47 if srcPart.Name == destPart.Name { 48 tableIDs[srcPart.ID] = destPart.ID 49 } 50 } 51 } 52 } 53 indexIDs := make(map[int64]int64) 54 for _, srcIndex := range oldTable.Indices { 55 for _, destIndex := range newTable.Indices { 56 if srcIndex.Name == destIndex.Name { 57 indexIDs[srcIndex.ID] = destIndex.ID 58 } 59 } 60 } 61 62 dataRules := make([]*import_sstpb.RewriteRule, 0) 63 for oldTableID, newTableID := range tableIDs { 64 dataRules = append(dataRules, &import_sstpb.RewriteRule{ 65 OldKeyPrefix: append(tablecodec.EncodeTablePrefix(oldTableID), recordPrefixSep...), 66 NewKeyPrefix: append(tablecodec.EncodeTablePrefix(newTableID), recordPrefixSep...), 67 NewTimestamp: newTimeStamp, 68 }) 69 for oldIndexID, newIndexID := range indexIDs { 70 dataRules = append(dataRules, &import_sstpb.RewriteRule{ 71 OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID), 72 NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID), 73 NewTimestamp: newTimeStamp, 74 }) 75 } 76 } 77 78 return &RewriteRules{ 79 Data: dataRules, 80 } 81 } 82 83 // GetSSTMetaFromFile compares the keys in file, region and rewrite rules, then returns a sst conn. 84 // The range of the returned sst meta is [regionRule.NewKeyPrefix, append(regionRule.NewKeyPrefix, 0xff)]. 85 func GetSSTMetaFromFile( 86 id []byte, 87 file *backuppb.File, 88 region *metapb.Region, 89 regionRule *import_sstpb.RewriteRule, 90 ) import_sstpb.SSTMeta { 91 // Get the column family of the file by the file name. 92 var cfName string 93 if strings.Contains(file.GetName(), defaultCFName) { 94 cfName = defaultCFName 95 } else if strings.Contains(file.GetName(), writeCFName) { 96 cfName = writeCFName 97 } 98 // Find the overlapped part between the file and the region. 99 // Here we rewrites the keys to compare with the keys of the region. 100 rangeStart := regionRule.GetNewKeyPrefix() 101 // rangeStart = max(rangeStart, region.StartKey) 102 if bytes.Compare(rangeStart, region.GetStartKey()) < 0 { 103 rangeStart = region.GetStartKey() 104 } 105 106 // Append 10 * 0xff to make sure rangeEnd cover all file key 107 // If choose to regionRule.NewKeyPrefix + 1, it may cause WrongPrefix here 108 // https://github.com/tikv/tikv/blob/970a9bf2a9ea782a455ae579ad237aaf6cb1daec/ 109 // components/sst_importer/src/sst_importer.rs#L221 110 suffix := []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} 111 rangeEnd := append(append([]byte{}, regionRule.GetNewKeyPrefix()...), suffix...) 112 // rangeEnd = min(rangeEnd, region.EndKey) 113 if len(region.GetEndKey()) > 0 && bytes.Compare(rangeEnd, region.GetEndKey()) > 0 { 114 rangeEnd = region.GetEndKey() 115 } 116 117 if bytes.Compare(rangeStart, rangeEnd) > 0 { 118 log.Panic("range start exceed range end", 119 logutil.File(file), 120 logutil.Key("startKey", rangeStart), 121 logutil.Key("endKey", rangeEnd)) 122 } 123 124 log.Debug("get sstMeta", 125 logutil.File(file), 126 logutil.Key("startKey", rangeStart), 127 logutil.Key("endKey", rangeEnd)) 128 129 return import_sstpb.SSTMeta{ 130 Uuid: id, 131 CfName: cfName, 132 Range: &import_sstpb.Range{ 133 Start: rangeStart, 134 End: rangeEnd, 135 }, 136 Length: file.GetSize_(), 137 RegionId: region.GetId(), 138 RegionEpoch: region.GetRegionEpoch(), 139 } 140 } 141 142 // MakeDBPool makes a session pool with specficated size by sessionFactory. 143 func MakeDBPool(size uint, dbFactory func() (*DB, error)) ([]*DB, error) { 144 dbPool := make([]*DB, 0, size) 145 for i := uint(0); i < size; i++ { 146 db, e := dbFactory() 147 if e != nil { 148 return dbPool, e 149 } 150 dbPool = append(dbPool, db) 151 } 152 return dbPool, nil 153 } 154 155 // EstimateRangeSize estimates the total range count by file. 156 func EstimateRangeSize(files []*backuppb.File) int { 157 result := 0 158 for _, f := range files { 159 if strings.HasSuffix(f.GetName(), "_write.sst") { 160 result++ 161 } 162 } 163 return result 164 } 165 166 // MapTableToFiles makes a map that mapping table ID to its backup files. 167 // aware that one file can and only can hold one table. 168 func MapTableToFiles(files []*backuppb.File) map[int64][]*backuppb.File { 169 result := map[int64][]*backuppb.File{} 170 for _, file := range files { 171 tableID := tablecodec.DecodeTableID(file.GetStartKey()) 172 tableEndID := tablecodec.DecodeTableID(file.GetEndKey()) 173 if tableID != tableEndID { 174 log.Panic("key range spread between many files.", 175 zap.String("file name", file.Name), 176 logutil.Key("startKey", file.StartKey), 177 logutil.Key("endKey", file.EndKey)) 178 } 179 if tableID == 0 { 180 log.Panic("invalid table key of file", 181 zap.String("file name", file.Name), 182 logutil.Key("startKey", file.StartKey), 183 logutil.Key("endKey", file.EndKey)) 184 } 185 result[tableID] = append(result[tableID], file) 186 } 187 return result 188 } 189 190 // GoValidateFileRanges validate files by a stream of tables and yields 191 // tables with range. 192 func GoValidateFileRanges( 193 ctx context.Context, 194 tableStream <-chan CreatedTable, 195 fileOfTable map[int64][]*backuppb.File, 196 splitSizeBytes, splitKeyCount uint64, 197 errCh chan<- error, 198 ) <-chan TableWithRange { 199 // Could we have a smaller outCh size? 200 outCh := make(chan TableWithRange, len(fileOfTable)) 201 go func() { 202 defer close(outCh) 203 defer log.Info("all range generated") 204 for { 205 select { 206 case <-ctx.Done(): 207 errCh <- ctx.Err() 208 return 209 case t, ok := <-tableStream: 210 if !ok { 211 return 212 } 213 files := fileOfTable[t.OldTable.Info.ID] 214 if partitions := t.OldTable.Info.Partition; partitions != nil { 215 log.Debug("table partition", 216 zap.Stringer("database", t.OldTable.DB.Name), 217 zap.Stringer("table", t.Table.Name), 218 zap.Any("partition info", partitions), 219 ) 220 for _, partition := range partitions.Definitions { 221 files = append(files, fileOfTable[partition.ID]...) 222 } 223 } 224 for _, file := range files { 225 err := ValidateFileRewriteRule(file, t.RewriteRule) 226 if err != nil { 227 errCh <- err 228 return 229 } 230 } 231 // Merge small ranges to reduce split and scatter regions. 232 ranges, stat, err := MergeFileRanges( 233 files, splitSizeBytes, splitKeyCount) 234 if err != nil { 235 errCh <- err 236 return 237 } 238 log.Info("merge and validate file", 239 zap.Stringer("database", t.OldTable.DB.Name), 240 zap.Stringer("table", t.Table.Name), 241 zap.Int("Files(total)", stat.TotalFiles), 242 zap.Int("File(write)", stat.TotalWriteCFFile), 243 zap.Int("File(default)", stat.TotalDefaultCFFile), 244 zap.Int("Region(total)", stat.TotalRegions), 245 zap.Int("Regoin(keys avg)", stat.RegionKeysAvg), 246 zap.Int("Region(bytes avg)", stat.RegionBytesAvg), 247 zap.Int("Merged(regions)", stat.MergedRegions), 248 zap.Int("Merged(keys avg)", stat.MergedRegionKeysAvg), 249 zap.Int("Merged(bytes avg)", stat.MergedRegionBytesAvg)) 250 251 tableWithRange := TableWithRange{ 252 CreatedTable: t, 253 Range: ranges, 254 } 255 log.Debug("sending range info", 256 zap.Stringer("table", t.Table.Name), 257 zap.Int("files", len(files)), 258 zap.Int("range size", len(ranges)), 259 zap.Int("output channel size", len(outCh))) 260 outCh <- tableWithRange 261 } 262 } 263 }() 264 return outCh 265 } 266 267 // ValidateFileRewriteRule uses rewrite rules to validate the ranges of a file. 268 func ValidateFileRewriteRule(file *backuppb.File, rewriteRules *RewriteRules) error { 269 // Check if the start key has a matched rewrite key 270 _, startRule := rewriteRawKey(file.GetStartKey(), rewriteRules) 271 if rewriteRules != nil && startRule == nil { 272 tableID := tablecodec.DecodeTableID(file.GetStartKey()) 273 log.Error( 274 "cannot find rewrite rule for file start key", 275 zap.Int64("tableID", tableID), 276 logutil.File(file), 277 ) 278 return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule") 279 } 280 // Check if the end key has a matched rewrite key 281 _, endRule := rewriteRawKey(file.GetEndKey(), rewriteRules) 282 if rewriteRules != nil && endRule == nil { 283 tableID := tablecodec.DecodeTableID(file.GetEndKey()) 284 log.Error( 285 "cannot find rewrite rule for file end key", 286 zap.Int64("tableID", tableID), 287 logutil.File(file), 288 ) 289 return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule") 290 } 291 // the new prefix of the start rule must equal or less than the new prefix of the end rule 292 if bytes.Compare(startRule.GetNewKeyPrefix(), endRule.GetNewKeyPrefix()) > 0 { 293 startTableID := tablecodec.DecodeTableID(file.GetStartKey()) 294 endTableID := tablecodec.DecodeTableID(file.GetEndKey()) 295 log.Error( 296 "unexpected rewrite rules", 297 zap.Int64("startTableID", startTableID), 298 zap.Int64("endTableID", endTableID), 299 zap.Stringer("startRule", startRule), 300 zap.Stringer("endRule", endRule), 301 logutil.File(file), 302 ) 303 return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "unexpected rewrite rules") 304 } 305 306 startID := tablecodec.DecodeTableID(file.GetStartKey()) 307 endID := tablecodec.DecodeTableID(file.GetEndKey()) 308 if startID != endID { 309 log.Error("table ids mismatch", 310 zap.Int64("startID", startID), 311 zap.Int64("endID", endID), 312 logutil.File(file)) 313 return errors.Annotate(berrors.ErrRestoreTableIDMismatch, "file start_key end_key table ids mismatch") 314 } 315 return nil 316 } 317 318 // Rewrites a raw key and returns a encoded key. 319 func rewriteRawKey(key []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) { 320 if rewriteRules == nil { 321 return codec.EncodeBytes([]byte{}, key), nil 322 } 323 if len(key) > 0 { 324 rule := matchOldPrefix(key, rewriteRules) 325 ret := bytes.Replace(key, rule.GetOldKeyPrefix(), rule.GetNewKeyPrefix(), 1) 326 return codec.EncodeBytes([]byte{}, ret), rule 327 } 328 return nil, nil 329 } 330 331 func matchOldPrefix(key []byte, rewriteRules *RewriteRules) *import_sstpb.RewriteRule { 332 for _, rule := range rewriteRules.Data { 333 if bytes.HasPrefix(key, rule.GetOldKeyPrefix()) { 334 return rule 335 } 336 } 337 return nil 338 } 339 340 func matchNewPrefix(key []byte, rewriteRules *RewriteRules) *import_sstpb.RewriteRule { 341 for _, rule := range rewriteRules.Data { 342 if bytes.HasPrefix(key, rule.GetNewKeyPrefix()) { 343 return rule 344 } 345 } 346 return nil 347 } 348 349 func truncateTS(key []byte) []byte { 350 if len(key) == 0 { 351 return nil 352 } 353 return key[:len(key)-8] 354 } 355 356 // SplitRanges splits region by 357 // 1. data range after rewrite. 358 // 2. rewrite rules. 359 func SplitRanges( 360 ctx context.Context, 361 client *Client, 362 ranges []rtree.Range, 363 rewriteRules *RewriteRules, 364 updateCh glue.Progress, 365 ) error { 366 start := time.Now() 367 defer func() { 368 elapsed := time.Since(start) 369 summary.CollectDuration("split region", elapsed) 370 }() 371 splitter := NewRegionSplitter(NewSplitClient(client.GetPDClient(), client.GetTLSConfig())) 372 373 return splitter.Split(ctx, ranges, rewriteRules, func(keys [][]byte) { 374 for range keys { 375 updateCh.Inc() 376 } 377 }) 378 } 379 380 func rewriteFileKeys(file *backuppb.File, rewriteRules *RewriteRules) (startKey, endKey []byte, err error) { 381 startID := tablecodec.DecodeTableID(file.GetStartKey()) 382 endID := tablecodec.DecodeTableID(file.GetEndKey()) 383 var rule *import_sstpb.RewriteRule 384 if startID == endID { 385 startKey, rule = rewriteRawKey(file.GetStartKey(), rewriteRules) 386 if rewriteRules != nil && rule == nil { 387 log.Error("cannot find rewrite rule", 388 logutil.Key("startKey", file.GetStartKey()), 389 zap.Reflect("rewrite data", rewriteRules.Data)) 390 err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule for start key") 391 return 392 } 393 endKey, rule = rewriteRawKey(file.GetEndKey(), rewriteRules) 394 if rewriteRules != nil && rule == nil { 395 err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule for end key") 396 return 397 } 398 } else { 399 log.Error("table ids dont matched", 400 zap.Int64("startID", startID), 401 zap.Int64("endID", endID), 402 logutil.Key("startKey", startKey), 403 logutil.Key("endKey", endKey)) 404 err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "invalid table id") 405 } 406 return 407 } 408 409 func encodeKeyPrefix(key []byte) []byte { 410 encodedPrefix := make([]byte, 0) 411 ungroupedLen := len(key) % 8 412 encodedPrefix = append(encodedPrefix, codec.EncodeBytes([]byte{}, key[:len(key)-ungroupedLen])...) 413 return append(encodedPrefix[:len(encodedPrefix)-9], key[len(key)-ungroupedLen:]...) 414 } 415 416 // ZapTables make zap field of table for debuging, including table names. 417 func ZapTables(tables []CreatedTable) zapcore.Field { 418 return logutil.AbbreviatedArray("tables", tables, func(input interface{}) []string { 419 tables := input.([]CreatedTable) 420 names := make([]string, 0, len(tables)) 421 for _, t := range tables { 422 names = append(names, fmt.Sprintf("%s.%s", 423 utils.EncloseName(t.OldTable.DB.Name.String()), 424 utils.EncloseName(t.OldTable.Info.Name.String()))) 425 } 426 return names 427 }) 428 } 429 430 // ParseQuoteName parse the quote `db`.`table` name, and split it. 431 func ParseQuoteName(name string) (db, table string) { 432 names := quoteRegexp.FindAllStringSubmatch(name, -1) 433 if len(names) != 2 { 434 log.Panic("failed to parse schema name", 435 zap.String("origin name", name), 436 zap.Any("parsed names", names)) 437 } 438 db = names[0][0] 439 table = names[1][0] 440 db = strings.ReplaceAll(unQuoteName(db), "``", "`") 441 table = strings.ReplaceAll(unQuoteName(table), "``", "`") 442 return db, table 443 } 444 445 func unQuoteName(name string) string { 446 name = strings.TrimPrefix(name, "`") 447 return strings.TrimSuffix(name, "`") 448 }