github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/relay/purge_strategy.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package relay
    15  
    16  import (
    17  	"fmt"
    18  	"strings"
    19  	"time"
    20  
    21  	"github.com/pingcap/tiflow/dm/pkg/log"
    22  	"github.com/pingcap/tiflow/dm/pkg/streamer"
    23  	"github.com/pingcap/tiflow/dm/pkg/terror"
    24  	"github.com/pingcap/tiflow/dm/pkg/utils"
    25  	"go.uber.org/atomic"
    26  	"go.uber.org/zap"
    27  )
    28  
    29  type strategyType uint32
    30  
    31  const (
    32  	strategyNone strategyType = iota
    33  	strategyInactive
    34  	strategyFilename
    35  	strategyTime
    36  	strategySpace
    37  )
    38  
    39  func (s strategyType) String() string {
    40  	switch s {
    41  	case strategyInactive:
    42  		return "inactive strategy"
    43  	case strategyFilename:
    44  		return "filename strategy"
    45  	case strategyTime:
    46  		return "time strategy"
    47  	case strategySpace:
    48  		return "space strategy"
    49  	default:
    50  		return "unknown strategy"
    51  	}
    52  }
    53  
    54  // PurgeStrategy represents a relay log purge strategy
    55  // two purge behaviors
    56  //  1. purge in the background
    57  //  2. do one time purge process
    58  //
    59  // a strategy can support both or one of them.
    60  type PurgeStrategy interface {
    61  	// Check checks whether need to do the purge in the background automatically
    62  	Check(args interface{}) (bool, error)
    63  
    64  	// Do does the purge process one time
    65  	Do(args interface{}) error
    66  
    67  	// Purging indicates whether is doing purge
    68  	Purging() bool
    69  
    70  	// Type returns the strategy type
    71  	Type() strategyType
    72  }
    73  
    74  // StrategyArgs represents args needed by purge strategy.
    75  type StrategyArgs interface {
    76  	// SetActiveRelayLog sets active relay log info in args
    77  	// this should be called before do the purging
    78  	SetActiveRelayLog(active *streamer.RelayLogInfo)
    79  }
    80  
    81  var fakeStrategyTaskName = strategyFilename.String()
    82  
    83  // filenameArgs represents args needed by filenameStrategy
    84  // NOTE: should handle master-slave switch.
    85  type filenameArgs struct {
    86  	relayBaseDir string
    87  	filename     string // specified end safe filename
    88  	subDir       string // sub dir for @filename, empty indicates latest sub dir
    89  	uuids        []string
    90  	safeRelayLog *streamer.RelayLogInfo // all relay log files prior to this should be purged
    91  }
    92  
    93  func (fa *filenameArgs) SetActiveRelayLog(active *streamer.RelayLogInfo) {
    94  	uuid := fa.subDir
    95  	if len(uuid) == 0 && len(fa.uuids) > 0 {
    96  		// no sub dir specified, use the latest one
    97  		uuid = fa.uuids[len(fa.uuids)-1]
    98  	}
    99  	_, endSuffix, _ := utils.ParseRelaySubDir(uuid)
   100  
   101  	safeRelayLog := &streamer.RelayLogInfo{
   102  		TaskName:     fakeStrategyTaskName,
   103  		SubDir:       uuid,
   104  		SubDirSuffix: endSuffix,
   105  		Filename:     fa.filename,
   106  	}
   107  
   108  	if active.Earlier(safeRelayLog) {
   109  		safeRelayLog = active
   110  	}
   111  
   112  	fa.safeRelayLog = safeRelayLog
   113  
   114  	// discard newer UUIDs
   115  	uuids := make([]string, 0, len(fa.uuids))
   116  	for _, uuid := range fa.uuids {
   117  		_, suffix, _ := utils.ParseRelaySubDir(uuid)
   118  		if suffix > endSuffix {
   119  			break
   120  		}
   121  		uuids = append(uuids, uuid)
   122  	}
   123  	fa.uuids = uuids
   124  }
   125  
   126  func (fa *filenameArgs) String() string {
   127  	return fmt.Sprintf("(RelayBaseDir: %s, Filename: %s, SubDir: %s, UUIDs: %s, SafeRelayLog: %s)",
   128  		fa.relayBaseDir, fa.filename, fa.subDir, strings.Join(fa.uuids, ";"), fa.safeRelayLog)
   129  }
   130  
   131  // filenameStrategy represents a relay purge strategy by filename
   132  // similar to `PURGE BINARY LOGS TO`.
   133  type filenameStrategy struct {
   134  	purging atomic.Bool
   135  
   136  	logger log.Logger
   137  }
   138  
   139  func newFilenameStrategy() PurgeStrategy {
   140  	return &filenameStrategy{
   141  		logger: log.With(zap.String("component", "relay purger"), zap.String("strategy", "file name")),
   142  	}
   143  }
   144  
   145  func (s *filenameStrategy) Check(args interface{}) (bool, error) {
   146  	// do not support purge in the background
   147  	return false, nil
   148  }
   149  
   150  func (s *filenameStrategy) Do(args interface{}) error {
   151  	if !s.purging.CAS(false, true) {
   152  		return terror.ErrRelayThisStrategyIsPurging.Generate()
   153  	}
   154  	defer s.purging.Store(false)
   155  
   156  	fa, ok := args.(*filenameArgs)
   157  	if !ok {
   158  		return terror.ErrRelayPurgeArgsNotValid.Generate(args, args)
   159  	}
   160  
   161  	return purgeRelayFilesBeforeFile(s.logger, fa.relayBaseDir, fa.uuids, fa.safeRelayLog)
   162  }
   163  
   164  func (s *filenameStrategy) Purging() bool {
   165  	return s.purging.Load()
   166  }
   167  
   168  func (s *filenameStrategy) Type() strategyType {
   169  	return strategyFilename
   170  }
   171  
   172  // inactiveArgs represents args needed by inactiveStrategy.
   173  type inactiveArgs struct {
   174  	relayBaseDir   string
   175  	uuids          []string
   176  	activeRelayLog *streamer.RelayLogInfo // earliest active relay log info
   177  }
   178  
   179  func (ia *inactiveArgs) SetActiveRelayLog(active *streamer.RelayLogInfo) {
   180  	ia.activeRelayLog = active
   181  }
   182  
   183  func (ia *inactiveArgs) String() string {
   184  	return fmt.Sprintf("(RelayBaseDir: %s, UUIDs: %s, ActiveRelayLog: %s)",
   185  		ia.relayBaseDir, strings.Join(ia.uuids, ";"), ia.activeRelayLog)
   186  }
   187  
   188  // inactiveStrategy represents a relay purge strategy which purge all inactive relay log files
   189  // definition of inactive relay log files:
   190  //   - not writing by relay unit
   191  //   - not reading by sync unit and will not be read by any running tasks
   192  //     TODO zxc: judge tasks are running dumper / loader
   193  type inactiveStrategy struct {
   194  	purging atomic.Bool
   195  
   196  	logger log.Logger
   197  }
   198  
   199  func newInactiveStrategy() PurgeStrategy {
   200  	return &inactiveStrategy{
   201  		logger: log.With(zap.String("component", "relay purger"), zap.String("strategy", "inactive binlog file")),
   202  	}
   203  }
   204  
   205  func (s *inactiveStrategy) Check(args interface{}) (bool, error) {
   206  	// do not support purge in the background
   207  	return false, nil
   208  }
   209  
   210  func (s *inactiveStrategy) Do(args interface{}) error {
   211  	if !s.purging.CAS(false, true) {
   212  		return terror.ErrRelayThisStrategyIsPurging.Generate()
   213  	}
   214  	defer s.purging.Store(false)
   215  
   216  	ia, ok := args.(*inactiveArgs)
   217  	if !ok {
   218  		return terror.ErrRelayPurgeArgsNotValid.Generate(args, args)
   219  	}
   220  
   221  	return purgeRelayFilesBeforeFile(s.logger, ia.relayBaseDir, ia.uuids, ia.activeRelayLog)
   222  }
   223  
   224  func (s *inactiveStrategy) Purging() bool {
   225  	return s.purging.Load()
   226  }
   227  
   228  func (s *inactiveStrategy) Type() strategyType {
   229  	return strategyInactive
   230  }
   231  
   232  // spaceArgs represents args needed by spaceStrategy.
   233  type spaceArgs struct {
   234  	relayBaseDir   string
   235  	remainSpace    int64 // if remain space (GB) in @RelayBaseDir less than this, then it can be purged
   236  	uuids          []string
   237  	activeRelayLog *streamer.RelayLogInfo // earliest active relay log info
   238  }
   239  
   240  func (sa *spaceArgs) SetActiveRelayLog(active *streamer.RelayLogInfo) {
   241  	sa.activeRelayLog = active
   242  }
   243  
   244  func (sa *spaceArgs) String() string {
   245  	return fmt.Sprintf("(RelayBaseDir: %s, AllowMinRemainSpace: %dGB, UUIDs: %s, ActiveRelayLog: %s)",
   246  		sa.relayBaseDir, sa.remainSpace, strings.Join(sa.uuids, ";"), sa.activeRelayLog)
   247  }
   248  
   249  // spaceStrategy represents a relay purge strategy by remain space in dm-worker node.
   250  type spaceStrategy struct {
   251  	purging atomic.Bool
   252  
   253  	logger log.Logger
   254  }
   255  
   256  func newSpaceStrategy() PurgeStrategy {
   257  	return &spaceStrategy{
   258  		logger: log.With(zap.String("component", "relay purger"), zap.String("strategy", "space")),
   259  	}
   260  }
   261  
   262  func (s *spaceStrategy) Check(args interface{}) (bool, error) {
   263  	sa, ok := args.(*spaceArgs)
   264  	if !ok {
   265  		return false, terror.ErrRelayPurgeArgsNotValid.Generate(args, args)
   266  	}
   267  
   268  	storageSize, err := utils.GetStorageSize(sa.relayBaseDir)
   269  	if err != nil {
   270  		return false, terror.Annotatef(err, "get storage size for directory %s", sa.relayBaseDir)
   271  	}
   272  
   273  	requiredBytes := uint64(sa.remainSpace) * 1024 * 1024 * 1024
   274  	return storageSize.Available < requiredBytes, nil
   275  }
   276  
   277  func (s *spaceStrategy) Do(args interface{}) error {
   278  	if !s.purging.CAS(false, true) {
   279  		return terror.ErrRelayThisStrategyIsPurging.Generate()
   280  	}
   281  	defer s.purging.Store(false)
   282  
   283  	sa, ok := args.(*spaceArgs)
   284  	if !ok {
   285  		return terror.ErrRelayPurgeArgsNotValid.Generate(args, args)
   286  	}
   287  
   288  	// NOTE: we purge all inactive relay log files when available space less than @remainSpace
   289  	// maybe we can refine this to purge only part of this files every time
   290  	return purgeRelayFilesBeforeFile(s.logger, sa.relayBaseDir, sa.uuids, sa.activeRelayLog)
   291  }
   292  
   293  func (s *spaceStrategy) Purging() bool {
   294  	return s.purging.Load()
   295  }
   296  
   297  func (s *spaceStrategy) Type() strategyType {
   298  	return strategySpace
   299  }
   300  
   301  // timeArgs represents args needed by timeStrategy.
   302  type timeArgs struct {
   303  	relayBaseDir   string
   304  	safeTime       time.Time // if file's modified time is older than this, then it can be purged
   305  	uuids          []string
   306  	activeRelayLog *streamer.RelayLogInfo // earliest active relay log info
   307  }
   308  
   309  func (ta *timeArgs) SetActiveRelayLog(active *streamer.RelayLogInfo) {
   310  	ta.activeRelayLog = active
   311  }
   312  
   313  func (ta *timeArgs) String() string {
   314  	return fmt.Sprintf("(RelayBaseDir: %s, SafeTime: %s, UUIDs: %s, ActiveRelayLog: %s)",
   315  		ta.relayBaseDir, ta.safeTime, strings.Join(ta.uuids, ";"), ta.activeRelayLog)
   316  }
   317  
   318  // timeStrategy represents a relay purge strategy by time
   319  // similar to `PURGE BINARY LOGS BEFORE` in MySQL.
   320  type timeStrategy struct {
   321  	purging atomic.Bool
   322  
   323  	logger log.Logger
   324  }
   325  
   326  func newTimeStrategy() PurgeStrategy {
   327  	return &timeStrategy{
   328  		logger: log.With(zap.String("component", "relay purger"), zap.String("strategy", "time")),
   329  	}
   330  }
   331  
   332  func (s *timeStrategy) Check(args interface{}) (bool, error) {
   333  	// for time strategy, we always try to do the purging
   334  	return true, nil
   335  }
   336  
   337  func (s *timeStrategy) Stop() {
   338  }
   339  
   340  func (s *timeStrategy) Do(args interface{}) error {
   341  	if !s.purging.CAS(false, true) {
   342  		return terror.ErrRelayThisStrategyIsPurging.Generate()
   343  	}
   344  	defer s.purging.Store(false)
   345  
   346  	ta, ok := args.(*timeArgs)
   347  	if !ok {
   348  		return terror.ErrRelayPurgeArgsNotValid.Generate(args, args)
   349  	}
   350  
   351  	return purgeRelayFilesBeforeFileAndTime(s.logger, ta.relayBaseDir, ta.uuids, ta.activeRelayLog, ta.safeTime)
   352  }
   353  
   354  func (s *timeStrategy) Purging() bool {
   355  	return s.purging.Load()
   356  }
   357  
   358  func (s *timeStrategy) Type() strategyType {
   359  	return strategyTime
   360  }