github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/metautil/metafile.go (about) 1 // Copyright 2021 PingCAP, Inc. Licensed under Apache-2.0. 2 3 package metautil 4 5 import ( 6 "bytes" 7 "context" 8 "crypto/sha256" 9 "encoding/json" 10 "fmt" 11 "sync" 12 "time" 13 14 "github.com/docker/go-units" 15 "github.com/gogo/protobuf/proto" 16 "github.com/opentracing/opentracing-go" 17 "github.com/pingcap/errors" 18 backuppb "github.com/pingcap/kvproto/pkg/backup" 19 "github.com/pingcap/log" 20 "github.com/pingcap/parser/model" 21 "github.com/pingcap/tidb/statistics/handle" 22 "github.com/pingcap/tidb/tablecodec" 23 "go.uber.org/zap" 24 25 berrors "github.com/pingcap/br/pkg/errors" 26 "github.com/pingcap/br/pkg/logutil" 27 "github.com/pingcap/br/pkg/storage" 28 "github.com/pingcap/br/pkg/summary" 29 ) 30 31 const ( 32 // LockFile represents file name 33 LockFile = "backup.lock" 34 // MetaFile represents file name 35 MetaFile = "backupmeta" 36 // MetaJSONFile represents backup meta json file name 37 MetaJSONFile = "backupmeta.json" 38 // MaxBatchSize represents the internal channel buffer size of MetaWriter and MetaReader. 39 MaxBatchSize = 1024 40 41 // MetaFileSize represents the limit size of one MetaFile 42 MetaFileSize = 128 * units.MiB 43 ) 44 45 const ( 46 // MetaV1 represents the old version of backupmeta. 47 // because the old version doesn't have version field, so set it to 0 for compatibility. 48 MetaV1 = iota 49 // MetaV2 represents the new version of backupmeta. 50 MetaV2 51 ) 52 53 func walkLeafMetaFile( 54 ctx context.Context, storage storage.ExternalStorage, file *backuppb.MetaFile, output func(*backuppb.MetaFile), 55 ) error { 56 if file == nil { 57 return nil 58 } 59 if len(file.MetaFiles) == 0 { 60 output(file) 61 return nil 62 } 63 for _, node := range file.MetaFiles { 64 content, err := storage.ReadFile(ctx, node.Name) 65 if err != nil { 66 return errors.Trace(err) 67 } 68 checksum := sha256.Sum256(content) 69 if !bytes.Equal(node.Sha256, checksum[:]) { 70 return errors.Annotatef(berrors.ErrInvalidMetaFile, 71 "checksum mismatch expect %x, got %x", node.Sha256, checksum[:]) 72 } 73 child := &backuppb.MetaFile{} 74 if err = proto.Unmarshal(content, child); err != nil { 75 return errors.Trace(err) 76 } 77 if err = walkLeafMetaFile(ctx, storage, child, output); err != nil { 78 return errors.Trace(err) 79 } 80 } 81 return nil 82 } 83 84 // Table wraps the schema and files of a table. 85 type Table struct { 86 DB *model.DBInfo 87 Info *model.TableInfo 88 Crc64Xor uint64 89 TotalKvs uint64 90 TotalBytes uint64 91 Files []*backuppb.File 92 TiFlashReplicas int 93 Stats *handle.JSONTable 94 } 95 96 // NoChecksum checks whether the table has a calculated checksum. 97 func (tbl *Table) NoChecksum() bool { 98 return tbl.Crc64Xor == 0 && tbl.TotalKvs == 0 && tbl.TotalBytes == 0 99 } 100 101 // MetaReader wraps a reader to read both old and new version of backupmeta. 102 type MetaReader struct { 103 storage storage.ExternalStorage 104 backupMeta *backuppb.BackupMeta 105 } 106 107 // NewMetaReader creates MetaReader. 108 func NewMetaReader(backpMeta *backuppb.BackupMeta, storage storage.ExternalStorage) *MetaReader { 109 return &MetaReader{ 110 storage: storage, 111 backupMeta: backpMeta, 112 } 113 } 114 115 func (reader *MetaReader) readDDLs(ctx context.Context, output func([]byte)) error { 116 // Read backupmeta v1 metafiles. 117 // if the backupmeta equals to v1, or doesn't not exists(old version). 118 if reader.backupMeta.Version == MetaV1 { 119 output(reader.backupMeta.Ddls) 120 return nil 121 } 122 // Read backupmeta v2 metafiles. 123 outputFn := func(m *backuppb.MetaFile) { 124 for _, s := range m.Ddls { 125 output(s) 126 } 127 } 128 return walkLeafMetaFile(ctx, reader.storage, reader.backupMeta.DdlIndexes, outputFn) 129 } 130 131 func (reader *MetaReader) readSchemas(ctx context.Context, output func(*backuppb.Schema)) error { 132 // Read backupmeta v1 metafiles. 133 for _, s := range reader.backupMeta.Schemas { 134 output(s) 135 } 136 // Read backupmeta v2 metafiles. 137 outputFn := func(m *backuppb.MetaFile) { 138 for _, s := range m.Schemas { 139 output(s) 140 } 141 } 142 return walkLeafMetaFile(ctx, reader.storage, reader.backupMeta.SchemaIndex, outputFn) 143 } 144 145 func (reader *MetaReader) readDataFiles(ctx context.Context, output func(*backuppb.File)) error { 146 // Read backupmeta v1 data files. 147 for _, f := range reader.backupMeta.Files { 148 output(f) 149 } 150 // Read backupmeta v2 data files. 151 outputFn := func(m *backuppb.MetaFile) { 152 for _, f := range m.DataFiles { 153 output(f) 154 } 155 } 156 return walkLeafMetaFile(ctx, reader.storage, reader.backupMeta.FileIndex, outputFn) 157 } 158 159 // ArchiveSize return the size of Archive data 160 func (reader *MetaReader) ArchiveSize(ctx context.Context, files []*backuppb.File) uint64 { 161 total := uint64(0) 162 for _, file := range files { 163 total += file.Size_ 164 } 165 return total 166 } 167 168 // ReadDDLs reads the ddls from the backupmeta. 169 // This function is compatible with the old backupmeta. 170 func (reader *MetaReader) ReadDDLs(ctx context.Context) ([]byte, error) { 171 var err error 172 ch := make(chan interface{}, MaxBatchSize) 173 errCh := make(chan error) 174 go func() { 175 if err = reader.readDDLs(ctx, func(s []byte) { ch <- s }); err != nil { 176 errCh <- errors.Trace(err) 177 } 178 close(ch) 179 }() 180 181 var ddlBytes []byte 182 var ddlBytesArray [][]byte 183 for { 184 itemCount := 0 185 err := receiveBatch(ctx, errCh, ch, MaxBatchSize, func(item interface{}) error { 186 itemCount++ 187 if reader.backupMeta.Version == MetaV1 { 188 ddlBytes = item.([]byte) 189 } else { 190 // we collect all ddls from files. 191 ddlBytesArray = append(ddlBytesArray, item.([]byte)) 192 } 193 return nil 194 }) 195 if err != nil { 196 return nil, errors.Trace(err) 197 } 198 199 // finish read 200 if itemCount == 0 { 201 if len(ddlBytesArray) != 0 { 202 ddlBytes = mergeDDLs(ddlBytesArray) 203 } 204 return ddlBytes, nil 205 } 206 } 207 } 208 209 // ReadSchemasFiles reads the schema and datafiles from the backupmeta. 210 // This function is compatible with the old backupmeta. 211 func (reader *MetaReader) ReadSchemasFiles(ctx context.Context, output chan<- *Table) error { 212 ch := make(chan interface{}, MaxBatchSize) 213 errCh := make(chan error, 1) 214 go func() { 215 if err := reader.readSchemas(ctx, func(s *backuppb.Schema) { ch <- s }); err != nil { 216 errCh <- errors.Trace(err) 217 } 218 close(ch) 219 }() 220 221 // It's not easy to balance memory and time costs for current structure. 222 // put all files in memory due to https://github.com/pingcap/br/issues/705 223 fileMap := make(map[int64][]*backuppb.File) 224 outputFn := func(file *backuppb.File) { 225 tableID := tablecodec.DecodeTableID(file.GetStartKey()) 226 if tableID == 0 { 227 log.Panic("tableID must not equal to 0", logutil.File(file)) 228 } 229 fileMap[tableID] = append(fileMap[tableID], file) 230 } 231 err := reader.readDataFiles(ctx, outputFn) 232 if err != nil { 233 return errors.Trace(err) 234 } 235 236 for { 237 // table ID -> *Table 238 tableMap := make(map[int64]*Table, MaxBatchSize) 239 err := receiveBatch(ctx, errCh, ch, MaxBatchSize, func(item interface{}) error { 240 s := item.(*backuppb.Schema) 241 tableInfo := &model.TableInfo{} 242 if err := json.Unmarshal(s.Table, tableInfo); err != nil { 243 return errors.Trace(err) 244 } 245 dbInfo := &model.DBInfo{} 246 if err := json.Unmarshal(s.Db, dbInfo); err != nil { 247 return errors.Trace(err) 248 } 249 var stats *handle.JSONTable 250 if s.Stats != nil { 251 stats = &handle.JSONTable{} 252 if err := json.Unmarshal(s.Stats, stats); err != nil { 253 return errors.Trace(err) 254 } 255 } 256 table := &Table{ 257 DB: dbInfo, 258 Info: tableInfo, 259 Crc64Xor: s.Crc64Xor, 260 TotalKvs: s.TotalKvs, 261 TotalBytes: s.TotalBytes, 262 TiFlashReplicas: int(s.TiflashReplicas), 263 Stats: stats, 264 } 265 if files, ok := fileMap[tableInfo.ID]; ok { 266 table.Files = append(table.Files, files...) 267 } 268 if tableInfo.Partition != nil { 269 // Partition table can have many table IDs (partition IDs). 270 for _, p := range tableInfo.Partition.Definitions { 271 if files, ok := fileMap[p.ID]; ok { 272 table.Files = append(table.Files, files...) 273 } 274 } 275 } 276 tableMap[tableInfo.ID] = table 277 return nil 278 }) 279 if err != nil { 280 return errors.Trace(err) 281 } 282 if len(tableMap) == 0 { 283 // We have read all tables. 284 return nil 285 } 286 for _, table := range tableMap { 287 output <- table 288 } 289 } 290 } 291 292 func receiveBatch( 293 ctx context.Context, errCh chan error, ch <-chan interface{}, maxBatchSize int, 294 collectItem func(interface{}) error, 295 ) error { 296 batchSize := 0 297 for { 298 select { 299 case <-ctx.Done(): 300 return errors.Trace(ctx.Err()) 301 case err := <-errCh: 302 return errors.Trace(err) 303 case s, ok := <-ch: 304 if !ok { 305 return nil 306 } 307 if err := collectItem(s); err != nil { 308 return errors.Trace(err) 309 } 310 } 311 // Return if the batch is large enough. 312 batchSize++ 313 if batchSize >= maxBatchSize { 314 return nil 315 } 316 } 317 } 318 319 // AppendOp represents the operation type of meta. 320 type AppendOp int 321 322 const ( 323 // AppendMetaFile represents the MetaFile type. 324 AppendMetaFile AppendOp = 0 325 // AppendDataFile represents the DataFile type. 326 // it records the file meta from tikv. 327 AppendDataFile AppendOp = 1 328 // AppendSchema represents the schema from tidb. 329 AppendSchema AppendOp = 2 330 // AppendDDL represents the ddls before last backup. 331 AppendDDL AppendOp = 3 332 ) 333 334 func (op AppendOp) name() string { 335 var name string 336 switch op { 337 case AppendMetaFile: 338 name = "metafile" 339 case AppendDataFile: 340 name = "datafile" 341 case AppendSchema: 342 name = "schema" 343 case AppendDDL: 344 name = "ddl" 345 default: 346 log.Panic("unsupport op type", zap.Any("op", op)) 347 } 348 return name 349 } 350 351 // appends item to MetaFile 352 func (op AppendOp) appendFile(a *backuppb.MetaFile, b interface{}) (int, int) { 353 size := 0 354 itemCount := 0 355 switch op { 356 case AppendMetaFile: 357 a.MetaFiles = append(a.MetaFiles, b.(*backuppb.File)) 358 size += int(b.(*backuppb.File).Size_) 359 itemCount++ 360 case AppendDataFile: 361 // receive a batch of file because we need write and default sst are adjacent. 362 files := b.([]*backuppb.File) 363 a.DataFiles = append(a.DataFiles, files...) 364 for _, f := range files { 365 itemCount++ 366 size += int(f.Size_) 367 } 368 case AppendSchema: 369 a.Schemas = append(a.Schemas, b.(*backuppb.Schema)) 370 itemCount++ 371 size += b.(*backuppb.Schema).Size() 372 case AppendDDL: 373 a.Ddls = append(a.Ddls, b.([]byte)) 374 itemCount++ 375 size += len(b.([]byte)) 376 } 377 378 return size, itemCount 379 } 380 381 type sizedMetaFile struct { 382 // A stack like array, we always append to the last node. 383 root *backuppb.MetaFile 384 size int 385 itemNum int 386 sizeLimit int 387 } 388 389 // NewSizedMetaFile represents the sizedMetaFile. 390 func NewSizedMetaFile(sizeLimit int) *sizedMetaFile { 391 return &sizedMetaFile{ 392 root: &backuppb.MetaFile{ 393 Schemas: make([]*backuppb.Schema, 0), 394 DataFiles: make([]*backuppb.File, 0), 395 RawRanges: make([]*backuppb.RawRange, 0), 396 }, 397 sizeLimit: sizeLimit, 398 } 399 } 400 401 func (f *sizedMetaFile) append(file interface{}, op AppendOp) bool { 402 // append to root 403 // TODO maybe use multi level index 404 size, itemCount := op.appendFile(f.root, file) 405 f.itemNum += itemCount 406 f.size += size 407 // f.size would reset outside 408 return f.size > f.sizeLimit 409 } 410 411 // MetaWriter represents wraps a writer, and the MetaWriter should be compatible with old version of backupmeta. 412 type MetaWriter struct { 413 storage storage.ExternalStorage 414 metafileSizeLimit int 415 // a flag to control whether we generate v1 or v2 meta. 416 useV2Meta bool 417 backupMeta *backuppb.BackupMeta 418 // used to generate MetaFile name. 419 metafileSizes map[string]int 420 metafileSeqNum map[string]int 421 metafiles *sizedMetaFile 422 // the start time of StartWriteMetas 423 // it's use to calculate the time costs. 424 start time.Time 425 // wg waits StartWriterMetas exits 426 wg sync.WaitGroup 427 // internal item channel 428 metasCh chan interface{} 429 errCh chan error 430 431 // records the total item of in one write meta job. 432 flushedItemNum int 433 } 434 435 // NewMetaWriter creates MetaWriter. 436 func NewMetaWriter(storage storage.ExternalStorage, metafileSizeLimit int, useV2Meta bool) *MetaWriter { 437 return &MetaWriter{ 438 start: time.Now(), 439 storage: storage, 440 metafileSizeLimit: metafileSizeLimit, 441 useV2Meta: useV2Meta, 442 // keep the compatibility for old backupmeta.Ddls 443 // old version: Ddls, _ := json.Marshal(make([]*model.Job, 0)) 444 backupMeta: &backuppb.BackupMeta{Ddls: []byte("[]")}, 445 metafileSizes: make(map[string]int), 446 metafiles: NewSizedMetaFile(metafileSizeLimit), 447 metafileSeqNum: make(map[string]int), 448 } 449 } 450 451 func (writer *MetaWriter) reset() { 452 writer.metasCh = make(chan interface{}, MaxBatchSize) 453 writer.errCh = make(chan error) 454 455 // reset flushedItemNum for next meta. 456 writer.flushedItemNum = 0 457 } 458 459 // Update updates some property of backupmeta. 460 func (writer *MetaWriter) Update(f func(m *backuppb.BackupMeta)) { 461 f(writer.backupMeta) 462 } 463 464 // Send sends the item to buffer. 465 func (writer *MetaWriter) Send(m interface{}, op AppendOp) error { 466 select { 467 case writer.metasCh <- m: 468 // receive an error from StartWriteMetasAsync 469 case err := <-writer.errCh: 470 return errors.Trace(err) 471 } 472 return nil 473 } 474 475 func (writer *MetaWriter) close() { 476 close(writer.metasCh) 477 } 478 479 // StartWriteMetasAsync writes four kind of meta into backupmeta. 480 // 1. file 481 // 2. schema 482 // 3. ddl 483 // 4. rawRange( raw kv ) 484 // when useBackupMetaV2 enabled, it will generate multi-level index backupmetav2. 485 // else it will generate backupmeta as before for compatibility. 486 // User should call FinishWriteMetas after StartWriterMetasAsync. 487 func (writer *MetaWriter) StartWriteMetasAsync(ctx context.Context, op AppendOp) { 488 writer.reset() 489 writer.start = time.Now() 490 writer.wg.Add(1) 491 go func() { 492 defer func() { 493 writer.wg.Done() 494 // close errCh after metaCh closed 495 close(writer.errCh) 496 }() 497 for { 498 select { 499 case <-ctx.Done(): 500 log.Info("exit write metas by context done") 501 return 502 case meta, ok := <-writer.metasCh: 503 if !ok { 504 log.Info("write metas finished", zap.String("type", op.name())) 505 return 506 } 507 needFlush := writer.metafiles.append(meta, op) 508 if writer.useV2Meta && needFlush { 509 err := writer.flushMetasV2(ctx, op) 510 if err != nil { 511 writer.errCh <- err 512 } 513 } 514 } 515 } 516 }() 517 } 518 519 // FinishWriteMetas close the channel in StartWriteMetasAsync and flush the buffered data. 520 func (writer *MetaWriter) FinishWriteMetas(ctx context.Context, op AppendOp) error { 521 writer.close() 522 // always start one goroutine to write one kind of meta. 523 writer.wg.Wait() 524 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 525 span1 := span.Tracer().StartSpan("MetaWriter.Finish", opentracing.ChildOf(span.Context())) 526 defer span1.Finish() 527 ctx = opentracing.ContextWithSpan(ctx, span1) 528 } 529 var err error 530 // flush the buffered meta 531 if !writer.useV2Meta { 532 // Set schema version 533 writer.backupMeta.Version = MetaV1 534 err = writer.flushMetasV1(ctx, op) 535 } else { 536 err = writer.flushMetasV2(ctx, op) 537 if err != nil { 538 return errors.Trace(err) 539 } 540 // Set schema version 541 writer.backupMeta.Version = MetaV2 542 // flush the final backupmeta 543 err = writer.flushBackupMeta(ctx) 544 } 545 if err != nil { 546 return errors.Trace(err) 547 } 548 costs := time.Since(writer.start) 549 if op == AppendDataFile { 550 summary.CollectSuccessUnit("backup ranges", writer.flushedItemNum, costs) 551 } 552 log.Info("finish the write metas", zap.Int("item", writer.flushedItemNum), 553 zap.String("type", op.name()), zap.Duration("costs", costs)) 554 return nil 555 } 556 557 func (writer *MetaWriter) flushBackupMeta(ctx context.Context) error { 558 backupMetaData, err := proto.Marshal(writer.backupMeta) 559 if err != nil { 560 return errors.Trace(err) 561 } 562 log.Debug("backup meta", zap.Reflect("meta", writer.backupMeta)) 563 log.Info("save backup meta", zap.Int("size", len(backupMetaData))) 564 return writer.storage.WriteFile(ctx, MetaFile, backupMetaData) 565 } 566 567 // flushMetasV1 keep the compatibility for old version. 568 func (writer *MetaWriter) flushMetasV1(ctx context.Context, op AppendOp) error { 569 switch op { 570 case AppendDataFile: 571 writer.backupMeta.Files = writer.metafiles.root.DataFiles 572 case AppendSchema: 573 writer.backupMeta.Schemas = writer.metafiles.root.Schemas 574 case AppendDDL: 575 writer.backupMeta.Ddls = mergeDDLs(writer.metafiles.root.Ddls) 576 default: 577 log.Panic("unsupport op type", zap.Any("op", op)) 578 } 579 writer.flushedItemNum += writer.metafiles.itemNum 580 return writer.flushBackupMeta(ctx) 581 } 582 583 func (writer *MetaWriter) flushMetasV2(ctx context.Context, op AppendOp) error { 584 var index *backuppb.MetaFile 585 switch op { 586 case AppendSchema: 587 if len(writer.metafiles.root.Schemas) == 0 { 588 return nil 589 } 590 // Add the metafile to backupmeta and reset metafiles. 591 if writer.backupMeta.SchemaIndex == nil { 592 writer.backupMeta.SchemaIndex = &backuppb.MetaFile{} 593 } 594 index = writer.backupMeta.SchemaIndex 595 case AppendDataFile: 596 if len(writer.metafiles.root.DataFiles) == 0 { 597 return nil 598 } 599 // Add the metafile to backupmeta and reset metafiles. 600 if writer.backupMeta.FileIndex == nil { 601 writer.backupMeta.FileIndex = &backuppb.MetaFile{} 602 } 603 index = writer.backupMeta.FileIndex 604 case AppendDDL: 605 if len(writer.metafiles.root.Ddls) == 0 { 606 return nil 607 } 608 if writer.backupMeta.DdlIndexes == nil { 609 writer.backupMeta.DdlIndexes = &backuppb.MetaFile{} 610 } 611 index = writer.backupMeta.DdlIndexes 612 } 613 content, err := writer.metafiles.root.Marshal() 614 if err != nil { 615 return errors.Trace(err) 616 } 617 618 name := op.name() 619 writer.metafileSizes[name] += writer.metafiles.size 620 // Flush metafiles to external storage. 621 writer.metafileSeqNum["metafiles"] += 1 622 fname := fmt.Sprintf("backupmeta.%s.%09d", name, writer.metafileSeqNum["metafiles"]) 623 if err = writer.storage.WriteFile(ctx, fname, content); err != nil { 624 return errors.Trace(err) 625 } 626 checksum := sha256.Sum256(content) 627 file := &backuppb.File{ 628 Name: fname, 629 Sha256: checksum[:], 630 Size_: uint64(len(content)), 631 } 632 633 index.MetaFiles = append(index.MetaFiles, file) 634 writer.flushedItemNum += writer.metafiles.itemNum 635 writer.metafiles = NewSizedMetaFile(writer.metafiles.sizeLimit) 636 return nil 637 } 638 639 // ArchiveSize represents the size of ArchiveSize. 640 func (writer *MetaWriter) ArchiveSize() uint64 { 641 total := uint64(0) 642 for _, file := range writer.backupMeta.Files { 643 total += file.Size_ 644 } 645 total += uint64(writer.metafileSizes["datafile"]) 646 return total 647 } 648 649 // Backupmeta clones a backupmeta. 650 func (writer *MetaWriter) Backupmeta() *backuppb.BackupMeta { 651 clone := proto.Clone(writer.backupMeta) 652 return clone.(*backuppb.BackupMeta) 653 } 654 655 func mergeDDLs(ddls [][]byte) []byte { 656 b := bytes.Join(ddls, []byte(`,`)) 657 b = append(b, 0) 658 copy(b[1:], b[0:]) 659 b[0] = byte('[') 660 b = append(b, ']') 661 return b 662 }