github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/txnutil/gc/gc_manager.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package gc
    15  
    16  import (
    17  	"context"
    18  	"time"
    19  
    20  	"github.com/pingcap/failpoint"
    21  	"github.com/pingcap/log"
    22  	"github.com/pingcap/tiflow/cdc/model"
    23  	"github.com/pingcap/tiflow/pkg/config"
    24  	cerror "github.com/pingcap/tiflow/pkg/errors"
    25  	"github.com/pingcap/tiflow/pkg/pdutil"
    26  	"github.com/tikv/client-go/v2/oracle"
    27  	pd "github.com/tikv/pd/client"
    28  	"go.uber.org/atomic"
    29  	"go.uber.org/zap"
    30  )
    31  
    32  // gcSafepointUpdateInterval is the minimum interval that CDC can update gc safepoint
    33  var gcSafepointUpdateInterval = 1 * time.Minute
    34  
    35  // Manager is an interface for gc manager
    36  type Manager interface {
    37  	// TryUpdateGCSafePoint tries to update TiCDC service GC safepoint.
    38  	// Manager may skip update when it thinks it is too frequent.
    39  	// Set `forceUpdate` to force Manager update.
    40  	TryUpdateGCSafePoint(ctx context.Context, checkpointTs model.Ts, forceUpdate bool) error
    41  	CheckStaleCheckpointTs(ctx context.Context, changefeedID model.ChangeFeedID, checkpointTs model.Ts) error
    42  }
    43  
    44  type gcManager struct {
    45  	gcServiceID string
    46  	pdClient    pd.Client
    47  	pdClock     pdutil.Clock
    48  	gcTTL       int64
    49  
    50  	lastUpdatedTime   time.Time
    51  	lastSucceededTime time.Time
    52  	lastSafePointTs   atomic.Uint64
    53  	isTiCDCBlockGC    atomic.Bool
    54  }
    55  
    56  // NewManager creates a new Manager.
    57  func NewManager(gcServiceID string, pdClient pd.Client, pdClock pdutil.Clock) Manager {
    58  	serverConfig := config.GetGlobalServerConfig()
    59  	failpoint.Inject("InjectGcSafepointUpdateInterval", func(val failpoint.Value) {
    60  		gcSafepointUpdateInterval = time.Duration(val.(int) * int(time.Millisecond))
    61  	})
    62  	return &gcManager{
    63  		gcServiceID:       gcServiceID,
    64  		pdClient:          pdClient,
    65  		pdClock:           pdClock,
    66  		lastSucceededTime: time.Now(),
    67  		gcTTL:             serverConfig.GcTTL,
    68  	}
    69  }
    70  
    71  func (m *gcManager) TryUpdateGCSafePoint(
    72  	ctx context.Context, checkpointTs model.Ts, forceUpdate bool,
    73  ) error {
    74  	if time.Since(m.lastUpdatedTime) < gcSafepointUpdateInterval && !forceUpdate {
    75  		return nil
    76  	}
    77  	m.lastUpdatedTime = time.Now()
    78  
    79  	actual, err := SetServiceGCSafepoint(
    80  		ctx, m.pdClient, m.gcServiceID, m.gcTTL, checkpointTs)
    81  	if err != nil {
    82  		log.Warn("updateGCSafePoint failed",
    83  			zap.Uint64("safePointTs", checkpointTs),
    84  			zap.Error(err))
    85  		if time.Since(m.lastSucceededTime) >= time.Second*time.Duration(m.gcTTL) {
    86  			return cerror.ErrUpdateServiceSafepointFailed.Wrap(err)
    87  		}
    88  		return nil
    89  	}
    90  	failpoint.Inject("InjectActualGCSafePoint", func(val failpoint.Value) {
    91  		actual = uint64(val.(int))
    92  	})
    93  	if actual == checkpointTs {
    94  		log.Info("update gc safe point success", zap.Uint64("gcSafePointTs", checkpointTs))
    95  	}
    96  	if actual > checkpointTs {
    97  		log.Warn("update gc safe point failed, the gc safe point is larger than checkpointTs",
    98  			zap.Uint64("actual", actual), zap.Uint64("checkpointTs", checkpointTs))
    99  	}
   100  	// if the min checkpoint ts is equal to the current gc safe point, it
   101  	// means that the service gc safe point set by TiCDC is the min service
   102  	// gc safe point
   103  	m.isTiCDCBlockGC.Store(actual == checkpointTs)
   104  	m.lastSafePointTs.Store(actual)
   105  	m.lastSucceededTime = time.Now()
   106  	minServiceGCSafePointGauge.Set(float64(oracle.ExtractPhysical(actual)))
   107  	cdcGCSafePointGauge.Set(float64(oracle.ExtractPhysical(checkpointTs)))
   108  	return nil
   109  }
   110  
   111  func (m *gcManager) CheckStaleCheckpointTs(
   112  	ctx context.Context, changefeedID model.ChangeFeedID, checkpointTs model.Ts,
   113  ) error {
   114  	gcSafepointUpperBound := checkpointTs - 1
   115  	if m.isTiCDCBlockGC.Load() {
   116  		pdTime := m.pdClock.CurrentTime()
   117  		if pdTime.Sub(
   118  			oracle.GetTimeFromTS(gcSafepointUpperBound),
   119  		) > time.Duration(m.gcTTL)*time.Second {
   120  			return cerror.ErrGCTTLExceeded.
   121  				GenWithStackByArgs(
   122  					checkpointTs,
   123  					changefeedID,
   124  				)
   125  		}
   126  	} else {
   127  		// if `isTiCDCBlockGC` is false, it means there is another service gc
   128  		// point less than the min checkpoint ts.
   129  		if gcSafepointUpperBound < m.lastSafePointTs.Load() {
   130  			return cerror.ErrSnapshotLostByGC.
   131  				GenWithStackByArgs(
   132  					checkpointTs,
   133  					m.lastSafePointTs.Load(),
   134  				)
   135  		}
   136  	}
   137  	return nil
   138  }