github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/relay/purger_helper.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package relay
    15  
    16  import (
    17  	"os"
    18  	"path/filepath"
    19  	"time"
    20  
    21  	"github.com/pingcap/tiflow/dm/pkg/log"
    22  	"github.com/pingcap/tiflow/dm/pkg/streamer"
    23  	"github.com/pingcap/tiflow/dm/pkg/terror"
    24  	"github.com/pingcap/tiflow/dm/pkg/utils"
    25  	"go.uber.org/zap"
    26  )
    27  
    28  // subRelayFiles represents relay log files in one subdirectory.
    29  type subRelayFiles struct {
    30  	dir    string   // subdirectory path
    31  	files  []string // path of relay log files
    32  	hasAll bool     // whether all relay log files in @dir are included in @files
    33  }
    34  
    35  // purgeRelayFilesBeforeFile purge relay log files which are older than safeRelay.
    36  func purgeRelayFilesBeforeFile(logger log.Logger, relayBaseDir string, subDirs []string, safeRelay *streamer.RelayLogInfo) error {
    37  	files, err := getRelayFilesBeforeFile(logger, relayBaseDir, subDirs, safeRelay)
    38  	if err != nil {
    39  		return terror.Annotatef(err, "get relay files from directory %s before file %+v with UUIDs %v", relayBaseDir, safeRelay, subDirs)
    40  	}
    41  
    42  	return purgeRelayFiles(logger, files)
    43  }
    44  
    45  // purgeRelayFilesBeforeFileAndTime purge relay log files which are older than safeRelay and safeTime.
    46  func purgeRelayFilesBeforeFileAndTime(logger log.Logger, relayBaseDir string, subDirs []string, safeRelay *streamer.RelayLogInfo, safeTime time.Time) error {
    47  	files, err := getRelayFilesBeforeFileAndTime(logger, relayBaseDir, subDirs, safeRelay, safeTime)
    48  	if err != nil {
    49  		return terror.Annotatef(err, "get relay files from directory %s before file %+v and time %v with UUIDs %v", relayBaseDir, safeRelay, safeTime, subDirs)
    50  	}
    51  
    52  	return purgeRelayFiles(logger, files)
    53  }
    54  
    55  // getRelayFilesBeforeFile gets a list of relay log files which are older than safeRelay.
    56  func getRelayFilesBeforeFile(logger log.Logger, relayBaseDir string, subDirs []string, safeRelay *streamer.RelayLogInfo) ([]*subRelayFiles, error) {
    57  	// discard all newer UUIDs
    58  	subDirs, err := getSubDirsOlderAndEqual(subDirs, safeRelay.SubDir)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  
    63  	zeroTime := time.Unix(0, 0)
    64  	files, err := collectRelayFilesBeforeFileAndTime(logger, relayBaseDir, subDirs, safeRelay.Filename, zeroTime)
    65  	return files, err
    66  }
    67  
    68  // getRelayFilesBeforeTime gets a list of relay log files which have modified time earlier than safeTime.
    69  func getRelayFilesBeforeFileAndTime(logger log.Logger, relayBaseDir string, subDirs []string, safeRelay *streamer.RelayLogInfo, safeTime time.Time) ([]*subRelayFiles, error) {
    70  	subDirs, err := getSubDirsOlderAndEqual(subDirs, safeRelay.SubDir)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  
    75  	return collectRelayFilesBeforeFileAndTime(logger, relayBaseDir, subDirs, safeRelay.Filename, safeTime)
    76  }
    77  
    78  // getSubDirsOlderAndEqual returns all subdirectories older than and equal to targetSubDir.
    79  func getSubDirsOlderAndEqual(subDirs []string, targetSubDir string) ([]string, error) {
    80  	endIdx := -1
    81  	for i, uuid := range subDirs {
    82  		if uuid == targetSubDir {
    83  			endIdx = i
    84  			break
    85  		}
    86  	}
    87  	if endIdx < 0 {
    88  		return nil, terror.ErrRelayTrimUUIDNotFound.Generate(targetSubDir, subDirs)
    89  	}
    90  
    91  	return subDirs[:endIdx+1], nil
    92  }
    93  
    94  // collectRelayFilesBeforeFileAndTime collects relay log files before safeFilename (and before safeTime).
    95  func collectRelayFilesBeforeFileAndTime(logger log.Logger, relayBaseDir string, subDirs []string, safeFilename string, safeTime time.Time) ([]*subRelayFiles, error) {
    96  	// NOTE: test performance when removing a large number of relay log files and decide whether need to limit files removed every time
    97  	files := make([]*subRelayFiles, 0, 1)
    98  
    99  	for i, uuid := range subDirs {
   100  		dir := filepath.Join(relayBaseDir, uuid)
   101  		var (
   102  			shortFiles []string
   103  			err        error
   104  			hasAll     bool
   105  		)
   106  		if i+1 == len(subDirs) {
   107  			// same sub dir, only collect relay files newer than safeRelay.filename
   108  			shortFiles, err = CollectBinlogFilesCmp(dir, safeFilename, FileCmpLess)
   109  			if err != nil {
   110  				return nil, terror.Annotatef(err, "dir %s", dir)
   111  			}
   112  		} else {
   113  			if !utils.IsDirExists(dir) {
   114  				logger.Warn("relay log directory not exists", zap.String("directory", dir))
   115  				continue
   116  			}
   117  			// earlier sub dir, collect all relay files
   118  			shortFiles, err = CollectAllBinlogFiles(dir)
   119  			if err != nil {
   120  				return nil, terror.Annotatef(err, "dir %s", dir)
   121  			}
   122  			hasAll = true // collected all relay files
   123  		}
   124  		if len(shortFiles) == 0 {
   125  			continue // no relay log files exist
   126  		}
   127  		fullFiles := make([]string, 0, len(shortFiles))
   128  		for _, f := range shortFiles {
   129  			fp := filepath.Join(dir, f)
   130  			if safeTime.Unix() > 0 {
   131  				// check modified time
   132  				fs, err := os.Stat(fp)
   133  				if err != nil {
   134  					return nil, terror.ErrGetRelayLogStat.Delegate(err, fp)
   135  				}
   136  				if fs.ModTime().After(safeTime) {
   137  					hasAll = false // newer found, reset to false
   138  					logger.Debug("ignore newer relay log file in dir", zap.String("file", f), zap.String("directory", dir))
   139  					break
   140  				}
   141  			}
   142  			fullFiles = append(fullFiles, fp)
   143  		}
   144  		files = append(files, &subRelayFiles{
   145  			dir:    dir,
   146  			files:  fullFiles,
   147  			hasAll: hasAll,
   148  		})
   149  
   150  		if !hasAll {
   151  			// once newer file encountered, we think later files are newer too, so stop to collect
   152  			break
   153  		}
   154  	}
   155  
   156  	return files, nil
   157  }
   158  
   159  // purgeRelayFiles purges relay log files and directories if them become empty.
   160  func purgeRelayFiles(logger log.Logger, files []*subRelayFiles) error {
   161  	startTime := time.Now()
   162  	defer func() {
   163  		logger.Info("purge relay log files", zap.Duration("cost time", time.Since(startTime)))
   164  	}()
   165  
   166  	for _, subRelay := range files {
   167  		for _, f := range subRelay.files {
   168  			logger.Info("purging relay log file", zap.String("file", f))
   169  			err := os.Remove(f)
   170  			if err != nil {
   171  				return terror.ErrRelayRemoveFileFail.Delegate(err, "file", f)
   172  			}
   173  		}
   174  		if subRelay.hasAll {
   175  			// if all relay log files removed, remove the directory and all other files (like relay.meta)
   176  			logger.Info("purging relay log directory", zap.String("directory", subRelay.dir))
   177  			err := os.RemoveAll(subRelay.dir)
   178  			if err != nil {
   179  				return terror.ErrRelayRemoveFileFail.Delegate(err, "dir", subRelay.dir)
   180  			}
   181  		}
   182  	}
   183  	return nil
   184  }