github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/redo/config.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package redo
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"net/url"
    20  	"os"
    21  	"path/filepath"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/pingcap/tidb/br/pkg/storage"
    26  	"github.com/pingcap/tiflow/pkg/errors"
    27  	"github.com/pingcap/tiflow/pkg/util"
    28  )
    29  
    30  var (
    31  	// DefaultGCIntervalInMs defines GC interval in meta manager, which can be changed in tests.
    32  	DefaultGCIntervalInMs = 5000 // 5 seconds
    33  	// DefaultMaxLogSize is the default max size of log file
    34  	DefaultMaxLogSize = int64(64)
    35  )
    36  
    37  const (
    38  	// DefaultTimeout is the default timeout for writing external storage.
    39  	DefaultTimeout = 5 * time.Minute
    40  	// CloseTimeout is the default timeout for close redo writer.
    41  	CloseTimeout = 15 * time.Second
    42  
    43  	// FlushWarnDuration is the warning duration for flushing external storage.
    44  	FlushWarnDuration = time.Second * 20
    45  	// DefaultFlushIntervalInMs is the default flush interval for redo log.
    46  	DefaultFlushIntervalInMs = 2000
    47  	// DefaultMetaFlushIntervalInMs is the default flush interval for redo meta.
    48  	DefaultMetaFlushIntervalInMs = 200
    49  	// MinFlushIntervalInMs is the minimum flush interval for redo log.
    50  	MinFlushIntervalInMs = 50
    51  
    52  	// DefaultEncodingWorkerNum is the default number of encoding workers.
    53  	DefaultEncodingWorkerNum = 16
    54  	// DefaultEncodingInputChanSize is the default size of input channel for encoding worker.
    55  	DefaultEncodingInputChanSize = 128
    56  	// DefaultEncodingOutputChanSize is the default size of output channel for encoding worker.
    57  	DefaultEncodingOutputChanSize = 2048
    58  	// DefaultFlushWorkerNum is the default number of flush workers.
    59  	// Maximum allocated memory is flushWorkerNum*maxLogSize, which is
    60  	// `8*64MB = 512MB` by default.
    61  	DefaultFlushWorkerNum = 8
    62  
    63  	// DefaultFileMode is the default mode when operation files
    64  	DefaultFileMode = 0o644
    65  	// DefaultDirMode is the default mode when operation dir
    66  	DefaultDirMode = 0o755
    67  
    68  	// TmpEXT is the file ext of log file before safely wrote to disk
    69  	TmpEXT = ".tmp"
    70  	// LogEXT is the file ext of log file after safely wrote to disk
    71  	LogEXT = ".log"
    72  	// MetaEXT is the meta file ext of meta file after safely wrote to disk
    73  	MetaEXT = ".meta"
    74  	// SortLogEXT is the sorted log file ext of log file after safely wrote to disk
    75  	SortLogEXT = ".sort"
    76  
    77  	// MinSectorSize is minimum sector size used when flushing log so that log can safely
    78  	// distinguish between torn writes and ordinary data corruption.
    79  	MinSectorSize = 512
    80  	// PageBytes is the alignment for flushing records to the backing Writer.
    81  	// It should be a multiple of the minimum sector size so that log can safely
    82  	// distinguish between torn writes and ordinary data corruption.
    83  	PageBytes = 8 * MinSectorSize
    84  	// Megabyte is the size of 1MB
    85  	Megabyte int64 = 1024 * 1024
    86  )
    87  
    88  const (
    89  	// RedoMetaFileType is the default file type of meta file
    90  	RedoMetaFileType = "meta"
    91  	// RedoRowLogFileType is the default file type of row log file
    92  	RedoRowLogFileType = "row"
    93  	// RedoDDLLogFileType is the default file type of ddl log file
    94  	RedoDDLLogFileType = "ddl"
    95  )
    96  
    97  // ConsistentLevelType is the level of redo log consistent level.
    98  type ConsistentLevelType string
    99  
   100  const (
   101  	// ConsistentLevelNone no consistent guarantee.
   102  	ConsistentLevelNone ConsistentLevelType = "none"
   103  	// ConsistentLevelEventual eventual consistent.
   104  	ConsistentLevelEventual ConsistentLevelType = "eventual"
   105  )
   106  
   107  // IsValidConsistentLevel checks whether a given consistent level is valid
   108  func IsValidConsistentLevel(level string) bool {
   109  	switch ConsistentLevelType(level) {
   110  	case ConsistentLevelNone, ConsistentLevelEventual:
   111  		return true
   112  	default:
   113  		return false
   114  	}
   115  }
   116  
   117  // IsConsistentEnabled returns whether the consistent feature is enabled.
   118  func IsConsistentEnabled(level string) bool {
   119  	return IsValidConsistentLevel(level) && ConsistentLevelType(level) != ConsistentLevelNone
   120  }
   121  
   122  // ConsistentStorage is the type of consistent storage.
   123  type ConsistentStorage string
   124  
   125  const (
   126  	// consistentStorageBlackhole is a blackhole storage, which will discard all data.
   127  	consistentStorageBlackhole ConsistentStorage = "blackhole"
   128  	// consistentStorageLocal is a local storage, which will store data in local disk.
   129  	consistentStorageLocal ConsistentStorage = "local"
   130  	// consistentStorageNFS is a NFS storage, which will store data in NFS.
   131  	consistentStorageNFS ConsistentStorage = "nfs"
   132  
   133  	// consistentStorageS3 is a S3 storage, which will store data in S3.
   134  	consistentStorageS3 ConsistentStorage = "s3"
   135  	// consistentStorageGCS is a GCS storage, which will store data in GCS.
   136  	consistentStorageGCS ConsistentStorage = "gcs"
   137  	// consistentStorageGS is an alias of GCS storage.
   138  	consistentStorageGS ConsistentStorage = "gs"
   139  	// consistentStorageAzblob is a Azure Blob storage, which will store data in Azure Blob.
   140  	consistentStorageAzblob ConsistentStorage = "azblob"
   141  	// consistentStorageAzure is an alias of Azure Blob storage.
   142  	consistentStorageAzure ConsistentStorage = "azure"
   143  	// consistentStorageFile is an external storage based on local files and
   144  	// will only be used for testing.
   145  	consistentStorageFile ConsistentStorage = "file"
   146  	// consistentStorageNoop is a noop storage, which simply discard all data.
   147  	consistentStorageNoop ConsistentStorage = "noop"
   148  )
   149  
   150  // IsValidConsistentStorage checks whether a give consistent storage is valid.
   151  func IsValidConsistentStorage(scheme string) bool {
   152  	return IsBlackholeStorage(scheme) ||
   153  		IsLocalStorage(scheme) ||
   154  		IsExternalStorage(scheme)
   155  }
   156  
   157  // IsExternalStorage returns whether an external storage is used.
   158  func IsExternalStorage(scheme string) bool {
   159  	switch ConsistentStorage(scheme) {
   160  	case consistentStorageS3, consistentStorageGCS, consistentStorageGS,
   161  		consistentStorageAzblob, consistentStorageAzure, consistentStorageFile,
   162  		consistentStorageNoop:
   163  		return true
   164  	default:
   165  		return false
   166  	}
   167  }
   168  
   169  // IsLocalStorage returns whether a local storage is used.
   170  func IsLocalStorage(scheme string) bool {
   171  	switch ConsistentStorage(scheme) {
   172  	case consistentStorageLocal, consistentStorageNFS:
   173  		return true
   174  	default:
   175  		return false
   176  	}
   177  }
   178  
   179  // FixLocalScheme convert local scheme to externally compatible scheme.
   180  func FixLocalScheme(uri *url.URL) {
   181  	if IsLocalStorage(uri.Scheme) {
   182  		uri.Scheme = string(consistentStorageFile)
   183  	}
   184  }
   185  
   186  // IsBlackholeStorage returns whether a blackhole storage is used.
   187  func IsBlackholeStorage(scheme string) bool {
   188  	return strings.HasPrefix(scheme, string(consistentStorageBlackhole))
   189  }
   190  
   191  // InitExternalStorage init an external storage.
   192  var InitExternalStorage = func(ctx context.Context, uri url.URL) (storage.ExternalStorage, error) {
   193  	s, err := util.GetExternalStorageWithTimeout(ctx, uri.String(), DefaultTimeout)
   194  	if err != nil {
   195  		return nil, errors.WrapError(errors.ErrStorageInitialize, err,
   196  			fmt.Sprintf("can't init external storage for %s", uri.String()))
   197  	}
   198  	return s, nil
   199  }
   200  
   201  func initExternalStorageForTest(ctx context.Context, uri url.URL) (storage.ExternalStorage, error) {
   202  	if ConsistentStorage(uri.Scheme) == consistentStorageS3 && len(uri.Host) == 0 {
   203  		// TODO: this branch is compatible with previous s3 logic and will be removed
   204  		// in the future.
   205  		return nil, errors.WrapChangefeedUnretryableErr(errors.ErrStorageInitialize,
   206  			errors.Errorf("please specify the bucket for %+v", uri))
   207  	}
   208  	s, err := util.GetExternalStorageFromURI(ctx, uri.String())
   209  	if err != nil {
   210  		return nil, errors.WrapChangefeedUnretryableErr(errors.ErrStorageInitialize, err)
   211  	}
   212  	return s, nil
   213  }
   214  
   215  // ValidateStorage validates the storage used by redo.
   216  func ValidateStorage(uri *url.URL) error {
   217  	scheme := uri.Scheme
   218  	if !IsValidConsistentStorage(scheme) {
   219  		return errors.ErrConsistentStorage.GenWithStackByArgs(scheme)
   220  	}
   221  	if IsBlackholeStorage(scheme) {
   222  		return nil
   223  	}
   224  
   225  	if IsExternalStorage(scheme) {
   226  		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
   227  		defer cancel()
   228  		_, err := initExternalStorageForTest(ctx, *uri)
   229  		return err
   230  	}
   231  
   232  	err := os.MkdirAll(uri.Path, DefaultDirMode)
   233  	if err != nil {
   234  		return errors.WrapError(errors.ErrStorageInitialize, errors.Annotate(err,
   235  			fmt.Sprintf("can't make dir for new redo log: %+v", uri)))
   236  	}
   237  
   238  	file := filepath.Join(uri.Path, "file.test")
   239  	if err := os.WriteFile(file, []byte(""), DefaultFileMode); err != nil {
   240  		return errors.WrapError(errors.ErrStorageInitialize, errors.Annotate(err,
   241  			fmt.Sprintf("can't write file for new redo log: %+v", uri)))
   242  	}
   243  
   244  	if _, err := os.ReadFile(file); err != nil {
   245  		return errors.WrapError(errors.ErrStorageInitialize, errors.Annotate(err,
   246  			fmt.Sprintf("can't read file for new redo log: %+v", uri)))
   247  	}
   248  	_ = os.Remove(file)
   249  	return nil
   250  }
   251  
   252  const (
   253  	// RedoLogFileFormatV1 was used before v6.1.0, which doesn't contain namespace information
   254  	// layout: captureID_changefeedID_fileType_maxEventCommitTs_uuid.fileExtName
   255  	RedoLogFileFormatV1 = "%s_%s_%s_%d_%s%s"
   256  	// RedoLogFileFormatV2 is available since v6.1.0, which contains namespace information
   257  	// layout: captureID_namespace_changefeedID_fileType_maxEventCommitTs_uuid.fileExtName
   258  	RedoLogFileFormatV2 = "%s_%s_%s_%s_%d_%s%s"
   259  	// RedoMetaFileFormat is the format of redo meta file, which contains namespace information.
   260  	// layout: captureID_namespace_changefeedID_fileType_uuid.fileExtName
   261  	RedoMetaFileFormat = "%s_%s_%s_%s_%s%s"
   262  )
   263  
   264  // logFormat2ParseFormat converts redo log file name format to the space separated
   265  // format, which can be read and parsed by sscanf. Besides remove the suffix `%s`
   266  // which is used as file name extension, since we will parse extension first.
   267  func logFormat2ParseFormat(fmtStr string) string {
   268  	return strings.TrimSuffix(strings.ReplaceAll(fmtStr, "_", " "), "%s")
   269  }
   270  
   271  // ParseLogFileName extract the commitTs, fileType from log fileName
   272  func ParseLogFileName(name string) (uint64, string, error) {
   273  	ext := filepath.Ext(name)
   274  	if ext == MetaEXT {
   275  		return 0, RedoMetaFileType, nil
   276  	}
   277  
   278  	// if .sort, the name should be like
   279  	// fmt.Sprintf("%s_%s_%s_%d_%s_%d%s", w.cfg.captureID,
   280  	// w.cfg.changeFeedID.Namespace,w.cfg.changeFeedID.ID,
   281  	// w.cfg.fileType, w.commitTS.Load(), uuid, LogEXT)+SortLogEXT
   282  	if ext == SortLogEXT {
   283  		name = strings.TrimSuffix(name, SortLogEXT)
   284  		ext = filepath.Ext(name)
   285  	}
   286  	if ext != LogEXT && ext != TmpEXT {
   287  		return 0, "", nil
   288  	}
   289  
   290  	var commitTs uint64
   291  	var captureID, namespace, changefeedID, fileType, uid string
   292  	// if the namespace is not default, the log looks like:
   293  	// fmt.Sprintf("%s_%s_%s_%s_%d_%s%s", w.cfg.captureID,
   294  	// w.cfg.changeFeedID.Namespace,w.cfg.changeFeedID.ID,
   295  	// w.cfg.fileType, w.commitTS.Load(), uuid, redo.LogEXT)
   296  	// otherwise it looks like:
   297  	// fmt.Sprintf("%s_%s_%s_%d_%s%s", w.cfg.captureID,
   298  	// w.cfg.changeFeedID.ID,
   299  	// w.cfg.fileType, w.commitTS.Load(), uuid, redo.LogEXT)
   300  	var (
   301  		vars      []any
   302  		formatStr string
   303  	)
   304  	if len(strings.Split(name, "_")) == 6 {
   305  		formatStr = logFormat2ParseFormat(RedoLogFileFormatV2)
   306  		vars = []any{&captureID, &namespace, &changefeedID, &fileType, &commitTs, &uid}
   307  	} else {
   308  		formatStr = logFormat2ParseFormat(RedoLogFileFormatV1)
   309  		vars = []any{&captureID, &changefeedID, &fileType, &commitTs, &uid}
   310  	}
   311  	name = strings.ReplaceAll(name, "_", " ")
   312  	_, err := fmt.Sscanf(name, formatStr, vars...)
   313  	if err != nil {
   314  		return 0, "", errors.Annotatef(err, "bad log name: %s", name)
   315  	}
   316  	return commitTs, fileType, nil
   317  }