github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/redo/config.go (about) 1 // Copyright 2023 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package redo 15 16 import ( 17 "context" 18 "fmt" 19 "net/url" 20 "os" 21 "path/filepath" 22 "strings" 23 "time" 24 25 "github.com/pingcap/tidb/br/pkg/storage" 26 "github.com/pingcap/tiflow/pkg/errors" 27 "github.com/pingcap/tiflow/pkg/util" 28 ) 29 30 var ( 31 // DefaultGCIntervalInMs defines GC interval in meta manager, which can be changed in tests. 32 DefaultGCIntervalInMs = 5000 // 5 seconds 33 // DefaultMaxLogSize is the default max size of log file 34 DefaultMaxLogSize = int64(64) 35 ) 36 37 const ( 38 // DefaultTimeout is the default timeout for writing external storage. 39 DefaultTimeout = 5 * time.Minute 40 // CloseTimeout is the default timeout for close redo writer. 41 CloseTimeout = 15 * time.Second 42 43 // FlushWarnDuration is the warning duration for flushing external storage. 44 FlushWarnDuration = time.Second * 20 45 // DefaultFlushIntervalInMs is the default flush interval for redo log. 46 DefaultFlushIntervalInMs = 2000 47 // DefaultMetaFlushIntervalInMs is the default flush interval for redo meta. 48 DefaultMetaFlushIntervalInMs = 200 49 // MinFlushIntervalInMs is the minimum flush interval for redo log. 50 MinFlushIntervalInMs = 50 51 52 // DefaultEncodingWorkerNum is the default number of encoding workers. 53 DefaultEncodingWorkerNum = 16 54 // DefaultEncodingInputChanSize is the default size of input channel for encoding worker. 55 DefaultEncodingInputChanSize = 128 56 // DefaultEncodingOutputChanSize is the default size of output channel for encoding worker. 57 DefaultEncodingOutputChanSize = 2048 58 // DefaultFlushWorkerNum is the default number of flush workers. 59 // Maximum allocated memory is flushWorkerNum*maxLogSize, which is 60 // `8*64MB = 512MB` by default. 61 DefaultFlushWorkerNum = 8 62 63 // DefaultFileMode is the default mode when operation files 64 DefaultFileMode = 0o644 65 // DefaultDirMode is the default mode when operation dir 66 DefaultDirMode = 0o755 67 68 // TmpEXT is the file ext of log file before safely wrote to disk 69 TmpEXT = ".tmp" 70 // LogEXT is the file ext of log file after safely wrote to disk 71 LogEXT = ".log" 72 // MetaEXT is the meta file ext of meta file after safely wrote to disk 73 MetaEXT = ".meta" 74 // SortLogEXT is the sorted log file ext of log file after safely wrote to disk 75 SortLogEXT = ".sort" 76 77 // MinSectorSize is minimum sector size used when flushing log so that log can safely 78 // distinguish between torn writes and ordinary data corruption. 79 MinSectorSize = 512 80 // PageBytes is the alignment for flushing records to the backing Writer. 81 // It should be a multiple of the minimum sector size so that log can safely 82 // distinguish between torn writes and ordinary data corruption. 83 PageBytes = 8 * MinSectorSize 84 // Megabyte is the size of 1MB 85 Megabyte int64 = 1024 * 1024 86 ) 87 88 const ( 89 // RedoMetaFileType is the default file type of meta file 90 RedoMetaFileType = "meta" 91 // RedoRowLogFileType is the default file type of row log file 92 RedoRowLogFileType = "row" 93 // RedoDDLLogFileType is the default file type of ddl log file 94 RedoDDLLogFileType = "ddl" 95 ) 96 97 // ConsistentLevelType is the level of redo log consistent level. 98 type ConsistentLevelType string 99 100 const ( 101 // ConsistentLevelNone no consistent guarantee. 102 ConsistentLevelNone ConsistentLevelType = "none" 103 // ConsistentLevelEventual eventual consistent. 104 ConsistentLevelEventual ConsistentLevelType = "eventual" 105 ) 106 107 // IsValidConsistentLevel checks whether a given consistent level is valid 108 func IsValidConsistentLevel(level string) bool { 109 switch ConsistentLevelType(level) { 110 case ConsistentLevelNone, ConsistentLevelEventual: 111 return true 112 default: 113 return false 114 } 115 } 116 117 // IsConsistentEnabled returns whether the consistent feature is enabled. 118 func IsConsistentEnabled(level string) bool { 119 return IsValidConsistentLevel(level) && ConsistentLevelType(level) != ConsistentLevelNone 120 } 121 122 // ConsistentStorage is the type of consistent storage. 123 type ConsistentStorage string 124 125 const ( 126 // consistentStorageBlackhole is a blackhole storage, which will discard all data. 127 consistentStorageBlackhole ConsistentStorage = "blackhole" 128 // consistentStorageLocal is a local storage, which will store data in local disk. 129 consistentStorageLocal ConsistentStorage = "local" 130 // consistentStorageNFS is a NFS storage, which will store data in NFS. 131 consistentStorageNFS ConsistentStorage = "nfs" 132 133 // consistentStorageS3 is a S3 storage, which will store data in S3. 134 consistentStorageS3 ConsistentStorage = "s3" 135 // consistentStorageGCS is a GCS storage, which will store data in GCS. 136 consistentStorageGCS ConsistentStorage = "gcs" 137 // consistentStorageGS is an alias of GCS storage. 138 consistentStorageGS ConsistentStorage = "gs" 139 // consistentStorageAzblob is a Azure Blob storage, which will store data in Azure Blob. 140 consistentStorageAzblob ConsistentStorage = "azblob" 141 // consistentStorageAzure is an alias of Azure Blob storage. 142 consistentStorageAzure ConsistentStorage = "azure" 143 // consistentStorageFile is an external storage based on local files and 144 // will only be used for testing. 145 consistentStorageFile ConsistentStorage = "file" 146 // consistentStorageNoop is a noop storage, which simply discard all data. 147 consistentStorageNoop ConsistentStorage = "noop" 148 ) 149 150 // IsValidConsistentStorage checks whether a give consistent storage is valid. 151 func IsValidConsistentStorage(scheme string) bool { 152 return IsBlackholeStorage(scheme) || 153 IsLocalStorage(scheme) || 154 IsExternalStorage(scheme) 155 } 156 157 // IsExternalStorage returns whether an external storage is used. 158 func IsExternalStorage(scheme string) bool { 159 switch ConsistentStorage(scheme) { 160 case consistentStorageS3, consistentStorageGCS, consistentStorageGS, 161 consistentStorageAzblob, consistentStorageAzure, consistentStorageFile, 162 consistentStorageNoop: 163 return true 164 default: 165 return false 166 } 167 } 168 169 // IsLocalStorage returns whether a local storage is used. 170 func IsLocalStorage(scheme string) bool { 171 switch ConsistentStorage(scheme) { 172 case consistentStorageLocal, consistentStorageNFS: 173 return true 174 default: 175 return false 176 } 177 } 178 179 // FixLocalScheme convert local scheme to externally compatible scheme. 180 func FixLocalScheme(uri *url.URL) { 181 if IsLocalStorage(uri.Scheme) { 182 uri.Scheme = string(consistentStorageFile) 183 } 184 } 185 186 // IsBlackholeStorage returns whether a blackhole storage is used. 187 func IsBlackholeStorage(scheme string) bool { 188 return strings.HasPrefix(scheme, string(consistentStorageBlackhole)) 189 } 190 191 // InitExternalStorage init an external storage. 192 var InitExternalStorage = func(ctx context.Context, uri url.URL) (storage.ExternalStorage, error) { 193 s, err := util.GetExternalStorageWithTimeout(ctx, uri.String(), DefaultTimeout) 194 if err != nil { 195 return nil, errors.WrapError(errors.ErrStorageInitialize, err, 196 fmt.Sprintf("can't init external storage for %s", uri.String())) 197 } 198 return s, nil 199 } 200 201 func initExternalStorageForTest(ctx context.Context, uri url.URL) (storage.ExternalStorage, error) { 202 if ConsistentStorage(uri.Scheme) == consistentStorageS3 && len(uri.Host) == 0 { 203 // TODO: this branch is compatible with previous s3 logic and will be removed 204 // in the future. 205 return nil, errors.WrapChangefeedUnretryableErr(errors.ErrStorageInitialize, 206 errors.Errorf("please specify the bucket for %+v", uri)) 207 } 208 s, err := util.GetExternalStorageFromURI(ctx, uri.String()) 209 if err != nil { 210 return nil, errors.WrapChangefeedUnretryableErr(errors.ErrStorageInitialize, err) 211 } 212 return s, nil 213 } 214 215 // ValidateStorage validates the storage used by redo. 216 func ValidateStorage(uri *url.URL) error { 217 scheme := uri.Scheme 218 if !IsValidConsistentStorage(scheme) { 219 return errors.ErrConsistentStorage.GenWithStackByArgs(scheme) 220 } 221 if IsBlackholeStorage(scheme) { 222 return nil 223 } 224 225 if IsExternalStorage(scheme) { 226 ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) 227 defer cancel() 228 _, err := initExternalStorageForTest(ctx, *uri) 229 return err 230 } 231 232 err := os.MkdirAll(uri.Path, DefaultDirMode) 233 if err != nil { 234 return errors.WrapError(errors.ErrStorageInitialize, errors.Annotate(err, 235 fmt.Sprintf("can't make dir for new redo log: %+v", uri))) 236 } 237 238 file := filepath.Join(uri.Path, "file.test") 239 if err := os.WriteFile(file, []byte(""), DefaultFileMode); err != nil { 240 return errors.WrapError(errors.ErrStorageInitialize, errors.Annotate(err, 241 fmt.Sprintf("can't write file for new redo log: %+v", uri))) 242 } 243 244 if _, err := os.ReadFile(file); err != nil { 245 return errors.WrapError(errors.ErrStorageInitialize, errors.Annotate(err, 246 fmt.Sprintf("can't read file for new redo log: %+v", uri))) 247 } 248 _ = os.Remove(file) 249 return nil 250 } 251 252 const ( 253 // RedoLogFileFormatV1 was used before v6.1.0, which doesn't contain namespace information 254 // layout: captureID_changefeedID_fileType_maxEventCommitTs_uuid.fileExtName 255 RedoLogFileFormatV1 = "%s_%s_%s_%d_%s%s" 256 // RedoLogFileFormatV2 is available since v6.1.0, which contains namespace information 257 // layout: captureID_namespace_changefeedID_fileType_maxEventCommitTs_uuid.fileExtName 258 RedoLogFileFormatV2 = "%s_%s_%s_%s_%d_%s%s" 259 // RedoMetaFileFormat is the format of redo meta file, which contains namespace information. 260 // layout: captureID_namespace_changefeedID_fileType_uuid.fileExtName 261 RedoMetaFileFormat = "%s_%s_%s_%s_%s%s" 262 ) 263 264 // logFormat2ParseFormat converts redo log file name format to the space separated 265 // format, which can be read and parsed by sscanf. Besides remove the suffix `%s` 266 // which is used as file name extension, since we will parse extension first. 267 func logFormat2ParseFormat(fmtStr string) string { 268 return strings.TrimSuffix(strings.ReplaceAll(fmtStr, "_", " "), "%s") 269 } 270 271 // ParseLogFileName extract the commitTs, fileType from log fileName 272 func ParseLogFileName(name string) (uint64, string, error) { 273 ext := filepath.Ext(name) 274 if ext == MetaEXT { 275 return 0, RedoMetaFileType, nil 276 } 277 278 // if .sort, the name should be like 279 // fmt.Sprintf("%s_%s_%s_%d_%s_%d%s", w.cfg.captureID, 280 // w.cfg.changeFeedID.Namespace,w.cfg.changeFeedID.ID, 281 // w.cfg.fileType, w.commitTS.Load(), uuid, LogEXT)+SortLogEXT 282 if ext == SortLogEXT { 283 name = strings.TrimSuffix(name, SortLogEXT) 284 ext = filepath.Ext(name) 285 } 286 if ext != LogEXT && ext != TmpEXT { 287 return 0, "", nil 288 } 289 290 var commitTs uint64 291 var captureID, namespace, changefeedID, fileType, uid string 292 // if the namespace is not default, the log looks like: 293 // fmt.Sprintf("%s_%s_%s_%s_%d_%s%s", w.cfg.captureID, 294 // w.cfg.changeFeedID.Namespace,w.cfg.changeFeedID.ID, 295 // w.cfg.fileType, w.commitTS.Load(), uuid, redo.LogEXT) 296 // otherwise it looks like: 297 // fmt.Sprintf("%s_%s_%s_%d_%s%s", w.cfg.captureID, 298 // w.cfg.changeFeedID.ID, 299 // w.cfg.fileType, w.commitTS.Load(), uuid, redo.LogEXT) 300 var ( 301 vars []any 302 formatStr string 303 ) 304 if len(strings.Split(name, "_")) == 6 { 305 formatStr = logFormat2ParseFormat(RedoLogFileFormatV2) 306 vars = []any{&captureID, &namespace, &changefeedID, &fileType, &commitTs, &uid} 307 } else { 308 formatStr = logFormat2ParseFormat(RedoLogFileFormatV1) 309 vars = []any{&captureID, &changefeedID, &fileType, &commitTs, &uid} 310 } 311 name = strings.ReplaceAll(name, "_", " ") 312 _, err := fmt.Sscanf(name, formatStr, vars...) 313 if err != nil { 314 return 0, "", errors.Annotatef(err, "bad log name: %s", name) 315 } 316 return commitTs, fileType, nil 317 }