github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/util.go (about)

     1  // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.
     2  
     3  package restore
     4  
     5  import (
     6  	"bytes"
     7  	"context"
     8  	"fmt"
     9  	"regexp"
    10  	"strings"
    11  	"time"
    12  
    13  	_ "github.com/go-sql-driver/mysql" // mysql driver
    14  	"github.com/pingcap/errors"
    15  	backuppb "github.com/pingcap/kvproto/pkg/backup"
    16  	"github.com/pingcap/kvproto/pkg/import_sstpb"
    17  	"github.com/pingcap/kvproto/pkg/metapb"
    18  	"github.com/pingcap/log"
    19  	"github.com/pingcap/parser/model"
    20  	"github.com/pingcap/tidb/tablecodec"
    21  	"github.com/pingcap/tidb/util/codec"
    22  	"go.uber.org/zap"
    23  	"go.uber.org/zap/zapcore"
    24  
    25  	berrors "github.com/pingcap/br/pkg/errors"
    26  	"github.com/pingcap/br/pkg/glue"
    27  	"github.com/pingcap/br/pkg/logutil"
    28  	"github.com/pingcap/br/pkg/rtree"
    29  	"github.com/pingcap/br/pkg/summary"
    30  	"github.com/pingcap/br/pkg/utils"
    31  )
    32  
    33  var (
    34  	recordPrefixSep = []byte("_r")
    35  	quoteRegexp     = regexp.MustCompile("`(?:[^`]|``)*`")
    36  )
    37  
    38  // GetRewriteRules returns the rewrite rule of the new table and the old table.
    39  func GetRewriteRules(
    40  	newTable, oldTable *model.TableInfo, newTimeStamp uint64,
    41  ) *RewriteRules {
    42  	tableIDs := make(map[int64]int64)
    43  	tableIDs[oldTable.ID] = newTable.ID
    44  	if oldTable.Partition != nil {
    45  		for _, srcPart := range oldTable.Partition.Definitions {
    46  			for _, destPart := range newTable.Partition.Definitions {
    47  				if srcPart.Name == destPart.Name {
    48  					tableIDs[srcPart.ID] = destPart.ID
    49  				}
    50  			}
    51  		}
    52  	}
    53  	indexIDs := make(map[int64]int64)
    54  	for _, srcIndex := range oldTable.Indices {
    55  		for _, destIndex := range newTable.Indices {
    56  			if srcIndex.Name == destIndex.Name {
    57  				indexIDs[srcIndex.ID] = destIndex.ID
    58  			}
    59  		}
    60  	}
    61  
    62  	dataRules := make([]*import_sstpb.RewriteRule, 0)
    63  	for oldTableID, newTableID := range tableIDs {
    64  		dataRules = append(dataRules, &import_sstpb.RewriteRule{
    65  			OldKeyPrefix: append(tablecodec.EncodeTablePrefix(oldTableID), recordPrefixSep...),
    66  			NewKeyPrefix: append(tablecodec.EncodeTablePrefix(newTableID), recordPrefixSep...),
    67  			NewTimestamp: newTimeStamp,
    68  		})
    69  		for oldIndexID, newIndexID := range indexIDs {
    70  			dataRules = append(dataRules, &import_sstpb.RewriteRule{
    71  				OldKeyPrefix: tablecodec.EncodeTableIndexPrefix(oldTableID, oldIndexID),
    72  				NewKeyPrefix: tablecodec.EncodeTableIndexPrefix(newTableID, newIndexID),
    73  				NewTimestamp: newTimeStamp,
    74  			})
    75  		}
    76  	}
    77  
    78  	return &RewriteRules{
    79  		Data: dataRules,
    80  	}
    81  }
    82  
    83  // GetSSTMetaFromFile compares the keys in file, region and rewrite rules, then returns a sst conn.
    84  // The range of the returned sst meta is [regionRule.NewKeyPrefix, append(regionRule.NewKeyPrefix, 0xff)].
    85  func GetSSTMetaFromFile(
    86  	id []byte,
    87  	file *backuppb.File,
    88  	region *metapb.Region,
    89  	regionRule *import_sstpb.RewriteRule,
    90  ) import_sstpb.SSTMeta {
    91  	// Get the column family of the file by the file name.
    92  	var cfName string
    93  	if strings.Contains(file.GetName(), defaultCFName) {
    94  		cfName = defaultCFName
    95  	} else if strings.Contains(file.GetName(), writeCFName) {
    96  		cfName = writeCFName
    97  	}
    98  	// Find the overlapped part between the file and the region.
    99  	// Here we rewrites the keys to compare with the keys of the region.
   100  	rangeStart := regionRule.GetNewKeyPrefix()
   101  	//  rangeStart = max(rangeStart, region.StartKey)
   102  	if bytes.Compare(rangeStart, region.GetStartKey()) < 0 {
   103  		rangeStart = region.GetStartKey()
   104  	}
   105  
   106  	// Append 10 * 0xff to make sure rangeEnd cover all file key
   107  	// If choose to regionRule.NewKeyPrefix + 1, it may cause WrongPrefix here
   108  	// https://github.com/tikv/tikv/blob/970a9bf2a9ea782a455ae579ad237aaf6cb1daec/
   109  	// components/sst_importer/src/sst_importer.rs#L221
   110  	suffix := []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
   111  	rangeEnd := append(append([]byte{}, regionRule.GetNewKeyPrefix()...), suffix...)
   112  	// rangeEnd = min(rangeEnd, region.EndKey)
   113  	if len(region.GetEndKey()) > 0 && bytes.Compare(rangeEnd, region.GetEndKey()) > 0 {
   114  		rangeEnd = region.GetEndKey()
   115  	}
   116  
   117  	if bytes.Compare(rangeStart, rangeEnd) > 0 {
   118  		log.Panic("range start exceed range end",
   119  			logutil.File(file),
   120  			logutil.Key("startKey", rangeStart),
   121  			logutil.Key("endKey", rangeEnd))
   122  	}
   123  
   124  	log.Debug("get sstMeta",
   125  		logutil.File(file),
   126  		logutil.Key("startKey", rangeStart),
   127  		logutil.Key("endKey", rangeEnd))
   128  
   129  	return import_sstpb.SSTMeta{
   130  		Uuid:   id,
   131  		CfName: cfName,
   132  		Range: &import_sstpb.Range{
   133  			Start: rangeStart,
   134  			End:   rangeEnd,
   135  		},
   136  		Length:      file.GetSize_(),
   137  		RegionId:    region.GetId(),
   138  		RegionEpoch: region.GetRegionEpoch(),
   139  	}
   140  }
   141  
   142  // MakeDBPool makes a session pool with specficated size by sessionFactory.
   143  func MakeDBPool(size uint, dbFactory func() (*DB, error)) ([]*DB, error) {
   144  	dbPool := make([]*DB, 0, size)
   145  	for i := uint(0); i < size; i++ {
   146  		db, e := dbFactory()
   147  		if e != nil {
   148  			return dbPool, e
   149  		}
   150  		dbPool = append(dbPool, db)
   151  	}
   152  	return dbPool, nil
   153  }
   154  
   155  // EstimateRangeSize estimates the total range count by file.
   156  func EstimateRangeSize(files []*backuppb.File) int {
   157  	result := 0
   158  	for _, f := range files {
   159  		if strings.HasSuffix(f.GetName(), "_write.sst") {
   160  			result++
   161  		}
   162  	}
   163  	return result
   164  }
   165  
   166  // MapTableToFiles makes a map that mapping table ID to its backup files.
   167  // aware that one file can and only can hold one table.
   168  func MapTableToFiles(files []*backuppb.File) map[int64][]*backuppb.File {
   169  	result := map[int64][]*backuppb.File{}
   170  	for _, file := range files {
   171  		tableID := tablecodec.DecodeTableID(file.GetStartKey())
   172  		tableEndID := tablecodec.DecodeTableID(file.GetEndKey())
   173  		if tableID != tableEndID {
   174  			log.Panic("key range spread between many files.",
   175  				zap.String("file name", file.Name),
   176  				logutil.Key("startKey", file.StartKey),
   177  				logutil.Key("endKey", file.EndKey))
   178  		}
   179  		if tableID == 0 {
   180  			log.Panic("invalid table key of file",
   181  				zap.String("file name", file.Name),
   182  				logutil.Key("startKey", file.StartKey),
   183  				logutil.Key("endKey", file.EndKey))
   184  		}
   185  		result[tableID] = append(result[tableID], file)
   186  	}
   187  	return result
   188  }
   189  
   190  // GoValidateFileRanges validate files by a stream of tables and yields
   191  // tables with range.
   192  func GoValidateFileRanges(
   193  	ctx context.Context,
   194  	tableStream <-chan CreatedTable,
   195  	fileOfTable map[int64][]*backuppb.File,
   196  	splitSizeBytes, splitKeyCount uint64,
   197  	errCh chan<- error,
   198  ) <-chan TableWithRange {
   199  	// Could we have a smaller outCh size?
   200  	outCh := make(chan TableWithRange, len(fileOfTable))
   201  	go func() {
   202  		defer close(outCh)
   203  		defer log.Info("all range generated")
   204  		for {
   205  			select {
   206  			case <-ctx.Done():
   207  				errCh <- ctx.Err()
   208  				return
   209  			case t, ok := <-tableStream:
   210  				if !ok {
   211  					return
   212  				}
   213  				files := fileOfTable[t.OldTable.Info.ID]
   214  				if partitions := t.OldTable.Info.Partition; partitions != nil {
   215  					log.Debug("table partition",
   216  						zap.Stringer("database", t.OldTable.DB.Name),
   217  						zap.Stringer("table", t.Table.Name),
   218  						zap.Any("partition info", partitions),
   219  					)
   220  					for _, partition := range partitions.Definitions {
   221  						files = append(files, fileOfTable[partition.ID]...)
   222  					}
   223  				}
   224  				for _, file := range files {
   225  					err := ValidateFileRewriteRule(file, t.RewriteRule)
   226  					if err != nil {
   227  						errCh <- err
   228  						return
   229  					}
   230  				}
   231  				// Merge small ranges to reduce split and scatter regions.
   232  				ranges, stat, err := MergeFileRanges(
   233  					files, splitSizeBytes, splitKeyCount)
   234  				if err != nil {
   235  					errCh <- err
   236  					return
   237  				}
   238  				log.Info("merge and validate file",
   239  					zap.Stringer("database", t.OldTable.DB.Name),
   240  					zap.Stringer("table", t.Table.Name),
   241  					zap.Int("Files(total)", stat.TotalFiles),
   242  					zap.Int("File(write)", stat.TotalWriteCFFile),
   243  					zap.Int("File(default)", stat.TotalDefaultCFFile),
   244  					zap.Int("Region(total)", stat.TotalRegions),
   245  					zap.Int("Regoin(keys avg)", stat.RegionKeysAvg),
   246  					zap.Int("Region(bytes avg)", stat.RegionBytesAvg),
   247  					zap.Int("Merged(regions)", stat.MergedRegions),
   248  					zap.Int("Merged(keys avg)", stat.MergedRegionKeysAvg),
   249  					zap.Int("Merged(bytes avg)", stat.MergedRegionBytesAvg))
   250  
   251  				tableWithRange := TableWithRange{
   252  					CreatedTable: t,
   253  					Range:        ranges,
   254  				}
   255  				log.Debug("sending range info",
   256  					zap.Stringer("table", t.Table.Name),
   257  					zap.Int("files", len(files)),
   258  					zap.Int("range size", len(ranges)),
   259  					zap.Int("output channel size", len(outCh)))
   260  				outCh <- tableWithRange
   261  			}
   262  		}
   263  	}()
   264  	return outCh
   265  }
   266  
   267  // ValidateFileRewriteRule uses rewrite rules to validate the ranges of a file.
   268  func ValidateFileRewriteRule(file *backuppb.File, rewriteRules *RewriteRules) error {
   269  	// Check if the start key has a matched rewrite key
   270  	_, startRule := rewriteRawKey(file.GetStartKey(), rewriteRules)
   271  	if rewriteRules != nil && startRule == nil {
   272  		tableID := tablecodec.DecodeTableID(file.GetStartKey())
   273  		log.Error(
   274  			"cannot find rewrite rule for file start key",
   275  			zap.Int64("tableID", tableID),
   276  			logutil.File(file),
   277  		)
   278  		return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule")
   279  	}
   280  	// Check if the end key has a matched rewrite key
   281  	_, endRule := rewriteRawKey(file.GetEndKey(), rewriteRules)
   282  	if rewriteRules != nil && endRule == nil {
   283  		tableID := tablecodec.DecodeTableID(file.GetEndKey())
   284  		log.Error(
   285  			"cannot find rewrite rule for file end key",
   286  			zap.Int64("tableID", tableID),
   287  			logutil.File(file),
   288  		)
   289  		return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule")
   290  	}
   291  	// the new prefix of the start rule must equal or less than the new prefix of the end rule
   292  	if bytes.Compare(startRule.GetNewKeyPrefix(), endRule.GetNewKeyPrefix()) > 0 {
   293  		startTableID := tablecodec.DecodeTableID(file.GetStartKey())
   294  		endTableID := tablecodec.DecodeTableID(file.GetEndKey())
   295  		log.Error(
   296  			"unexpected rewrite rules",
   297  			zap.Int64("startTableID", startTableID),
   298  			zap.Int64("endTableID", endTableID),
   299  			zap.Stringer("startRule", startRule),
   300  			zap.Stringer("endRule", endRule),
   301  			logutil.File(file),
   302  		)
   303  		return errors.Annotate(berrors.ErrRestoreInvalidRewrite, "unexpected rewrite rules")
   304  	}
   305  
   306  	startID := tablecodec.DecodeTableID(file.GetStartKey())
   307  	endID := tablecodec.DecodeTableID(file.GetEndKey())
   308  	if startID != endID {
   309  		log.Error("table ids mismatch",
   310  			zap.Int64("startID", startID),
   311  			zap.Int64("endID", endID),
   312  			logutil.File(file))
   313  		return errors.Annotate(berrors.ErrRestoreTableIDMismatch, "file start_key end_key table ids mismatch")
   314  	}
   315  	return nil
   316  }
   317  
   318  // Rewrites a raw key and returns a encoded key.
   319  func rewriteRawKey(key []byte, rewriteRules *RewriteRules) ([]byte, *import_sstpb.RewriteRule) {
   320  	if rewriteRules == nil {
   321  		return codec.EncodeBytes([]byte{}, key), nil
   322  	}
   323  	if len(key) > 0 {
   324  		rule := matchOldPrefix(key, rewriteRules)
   325  		ret := bytes.Replace(key, rule.GetOldKeyPrefix(), rule.GetNewKeyPrefix(), 1)
   326  		return codec.EncodeBytes([]byte{}, ret), rule
   327  	}
   328  	return nil, nil
   329  }
   330  
   331  func matchOldPrefix(key []byte, rewriteRules *RewriteRules) *import_sstpb.RewriteRule {
   332  	for _, rule := range rewriteRules.Data {
   333  		if bytes.HasPrefix(key, rule.GetOldKeyPrefix()) {
   334  			return rule
   335  		}
   336  	}
   337  	return nil
   338  }
   339  
   340  func matchNewPrefix(key []byte, rewriteRules *RewriteRules) *import_sstpb.RewriteRule {
   341  	for _, rule := range rewriteRules.Data {
   342  		if bytes.HasPrefix(key, rule.GetNewKeyPrefix()) {
   343  			return rule
   344  		}
   345  	}
   346  	return nil
   347  }
   348  
   349  func truncateTS(key []byte) []byte {
   350  	if len(key) == 0 {
   351  		return nil
   352  	}
   353  	return key[:len(key)-8]
   354  }
   355  
   356  // SplitRanges splits region by
   357  // 1. data range after rewrite.
   358  // 2. rewrite rules.
   359  func SplitRanges(
   360  	ctx context.Context,
   361  	client *Client,
   362  	ranges []rtree.Range,
   363  	rewriteRules *RewriteRules,
   364  	updateCh glue.Progress,
   365  ) error {
   366  	start := time.Now()
   367  	defer func() {
   368  		elapsed := time.Since(start)
   369  		summary.CollectDuration("split region", elapsed)
   370  	}()
   371  	splitter := NewRegionSplitter(NewSplitClient(client.GetPDClient(), client.GetTLSConfig()))
   372  
   373  	return splitter.Split(ctx, ranges, rewriteRules, func(keys [][]byte) {
   374  		for range keys {
   375  			updateCh.Inc()
   376  		}
   377  	})
   378  }
   379  
   380  func rewriteFileKeys(file *backuppb.File, rewriteRules *RewriteRules) (startKey, endKey []byte, err error) {
   381  	startID := tablecodec.DecodeTableID(file.GetStartKey())
   382  	endID := tablecodec.DecodeTableID(file.GetEndKey())
   383  	var rule *import_sstpb.RewriteRule
   384  	if startID == endID {
   385  		startKey, rule = rewriteRawKey(file.GetStartKey(), rewriteRules)
   386  		if rewriteRules != nil && rule == nil {
   387  			log.Error("cannot find rewrite rule",
   388  				logutil.Key("startKey", file.GetStartKey()),
   389  				zap.Reflect("rewrite data", rewriteRules.Data))
   390  			err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule for start key")
   391  			return
   392  		}
   393  		endKey, rule = rewriteRawKey(file.GetEndKey(), rewriteRules)
   394  		if rewriteRules != nil && rule == nil {
   395  			err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "cannot find rewrite rule for end key")
   396  			return
   397  		}
   398  	} else {
   399  		log.Error("table ids dont matched",
   400  			zap.Int64("startID", startID),
   401  			zap.Int64("endID", endID),
   402  			logutil.Key("startKey", startKey),
   403  			logutil.Key("endKey", endKey))
   404  		err = errors.Annotate(berrors.ErrRestoreInvalidRewrite, "invalid table id")
   405  	}
   406  	return
   407  }
   408  
   409  func encodeKeyPrefix(key []byte) []byte {
   410  	encodedPrefix := make([]byte, 0)
   411  	ungroupedLen := len(key) % 8
   412  	encodedPrefix = append(encodedPrefix, codec.EncodeBytes([]byte{}, key[:len(key)-ungroupedLen])...)
   413  	return append(encodedPrefix[:len(encodedPrefix)-9], key[len(key)-ungroupedLen:]...)
   414  }
   415  
   416  // ZapTables make zap field of table for debuging, including table names.
   417  func ZapTables(tables []CreatedTable) zapcore.Field {
   418  	return logutil.AbbreviatedArray("tables", tables, func(input interface{}) []string {
   419  		tables := input.([]CreatedTable)
   420  		names := make([]string, 0, len(tables))
   421  		for _, t := range tables {
   422  			names = append(names, fmt.Sprintf("%s.%s",
   423  				utils.EncloseName(t.OldTable.DB.Name.String()),
   424  				utils.EncloseName(t.OldTable.Info.Name.String())))
   425  		}
   426  		return names
   427  	})
   428  }
   429  
   430  // ParseQuoteName parse the quote `db`.`table` name, and split it.
   431  func ParseQuoteName(name string) (db, table string) {
   432  	names := quoteRegexp.FindAllStringSubmatch(name, -1)
   433  	if len(names) != 2 {
   434  		log.Panic("failed to parse schema name",
   435  			zap.String("origin name", name),
   436  			zap.Any("parsed names", names))
   437  	}
   438  	db = names[0][0]
   439  	table = names[1][0]
   440  	db = strings.ReplaceAll(unQuoteName(db), "``", "`")
   441  	table = strings.ReplaceAll(unQuoteName(table), "``", "`")
   442  	return db, table
   443  }
   444  
   445  func unQuoteName(name string) string {
   446  	name = strings.TrimPrefix(name, "`")
   447  	return strings.TrimSuffix(name, "`")
   448  }