github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/kv/regionlock/region_range_lock.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package regionlock
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"encoding/hex"
    20  	"fmt"
    21  	"math"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/google/btree"
    27  	"github.com/pingcap/log"
    28  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    29  	"github.com/pingcap/tiflow/pkg/spanz"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  const (
    34  	// LockRangeStatusSuccess means a LockRange operation succeeded.
    35  	LockRangeStatusSuccess = 0
    36  	// LockRangeStatusWait means a LockRange operation is blocked and should wait for it being finished.
    37  	LockRangeStatusWait = 1
    38  	// LockRangeStatusStale means a LockRange operation is rejected because of the range's version is stale.
    39  	LockRangeStatusStale = 2
    40  	// LockRangeStatusCancel means a LockRange operation is cancelled.
    41  	LockRangeStatusCancel = 3
    42  )
    43  
    44  // LockRangeResult represents the result of LockRange method of RangeLock.
    45  // If Status is LockRangeStatusSuccess:
    46  //   - LockedRangeState is for recording real-time state changes of the locked range.
    47  //     Its ResolvedTs is the minimum resolvedTs of the range.
    48  //
    49  // If Status is LockRangeStatusWait, it means the lock cannot be acquired immediately. WaitFn must be invoked to
    50  // continue waiting and acquiring the lock.
    51  //
    52  // If Status is LockRangeStatusStale, it means the LockRange request is stale because there's already a overlapping
    53  // locked range, whose version is greater or equals to the requested one.
    54  type LockRangeResult struct {
    55  	Status           int
    56  	LockedRangeState *LockedRangeState
    57  	WaitFn           func() LockRangeResult
    58  
    59  	// RetryRanges is only used when Status is LockRangeStatusStale.
    60  	// It contains the ranges that should be retried to lock.
    61  	RetryRanges []tablepb.Span
    62  }
    63  
    64  // LockedRangeState is used to access the real-time state changes of a locked range.
    65  type LockedRangeState struct {
    66  	ResolvedTs  atomic.Uint64
    67  	Initialzied atomic.Bool
    68  	Created     time.Time
    69  }
    70  
    71  // rangeLockEntry represents a locked range that defined by [startKey, endKey).
    72  type rangeLockEntry struct {
    73  	startKey      []byte
    74  	endKey        []byte
    75  	regionID      uint64
    76  	regionVersion uint64
    77  	// lockedRangeState is used to record the real-time state changes of this locked range.
    78  	lockedRangeState LockedRangeState
    79  	// waiterSignalChs is a list of channels that are used to
    80  	// notify the waiter of this lock entry that the lock is released.
    81  	waiterSignalChs []chan<- interface{}
    82  }
    83  
    84  func rangeLockEntryWithKey(key []byte) *rangeLockEntry {
    85  	return &rangeLockEntry{
    86  		startKey: key,
    87  	}
    88  }
    89  
    90  func rangeLockEntryLess(a, b *rangeLockEntry) bool {
    91  	return bytes.Compare(a.startKey, b.startKey) < 0
    92  }
    93  
    94  func (e *rangeLockEntry) String() string {
    95  	return fmt.Sprintf("region %v [%v, %v), version %v, %d waiters",
    96  		e.regionID,
    97  		hex.EncodeToString(e.startKey),
    98  		hex.EncodeToString(e.endKey),
    99  		e.regionVersion,
   100  		len(e.waiterSignalChs))
   101  }
   102  
   103  // RangeLock is used to ensure that a table's same range is only requested once at a time.
   104  // Before sending a region request to TiKV, the client should lock the region's range to avoid sending another
   105  // request to the same region. After stopping the table or removing the region, the client should unlock the range.
   106  // It also helps calculate the resolvedTs ts of the table it belongs to.
   107  // Note(dongmen): A table has one RangeLock, and within that RangeLock, there are multiple regionLocks for each region.
   108  type RangeLock struct {
   109  	// ID to identify different RangeLock instances, so logs of different instances can be distinguished.
   110  	id uint64
   111  	// totalSpan is the total range of the table, totalSpan = unlockedRanges + lockedRanges
   112  	totalSpan tablepb.Span
   113  	// changefeed is used to identify the changefeed which the RangeLock belongs to.
   114  	changefeed string
   115  
   116  	mu sync.RWMutex
   117  	// unlockedRanges is used to store the resolvedTs of unlocked ranges.
   118  	unlockedRanges *rangeTsMap
   119  	// lockedRanges is a btree that stores all locked ranges.
   120  	lockedRanges *btree.BTreeG[*rangeLockEntry]
   121  	// regionIDToLockedRanges is used to quickly locate the lock entry by regionID.
   122  	regionIDToLockedRanges map[uint64]*rangeLockEntry
   123  	stopped                bool
   124  }
   125  
   126  // NewRangeLock creates a new RangeLock.
   127  func NewRangeLock(
   128  	id uint64,
   129  	startKey, endKey []byte, startTs uint64, changefeedLogInfo string,
   130  ) *RangeLock {
   131  	return &RangeLock{
   132  		id:                     id,
   133  		totalSpan:              tablepb.Span{StartKey: startKey, EndKey: endKey},
   134  		changefeed:             changefeedLogInfo,
   135  		unlockedRanges:         newRangeTsMap(startKey, endKey, startTs),
   136  		lockedRanges:           btree.NewG(16, rangeLockEntryLess),
   137  		regionIDToLockedRanges: make(map[uint64]*rangeLockEntry),
   138  	}
   139  }
   140  
   141  // LockRange locks a range with specified version.
   142  func (l *RangeLock) LockRange(
   143  	ctx context.Context, startKey, endKey []byte, regionID, version uint64,
   144  ) LockRangeResult {
   145  	res, signalChs := l.tryLockRange(startKey, endKey, regionID, version)
   146  
   147  	if res.Status != LockRangeStatusWait {
   148  		return res
   149  	}
   150  
   151  	res.WaitFn = func() LockRangeResult {
   152  		signalChs1 := signalChs
   153  		var res1 LockRangeResult
   154  		for {
   155  			for _, ch := range signalChs1 {
   156  				select {
   157  				case <-ctx.Done():
   158  					return LockRangeResult{Status: LockRangeStatusCancel}
   159  				case <-ch:
   160  				}
   161  			}
   162  			res1, signalChs1 = l.tryLockRange(startKey, endKey, regionID, version)
   163  			if res1.Status != LockRangeStatusWait {
   164  				return res1
   165  			}
   166  		}
   167  	}
   168  
   169  	return res
   170  }
   171  
   172  // UnlockRange unlocks a range and update resolvedTs of the range to specified value.
   173  // If it returns true it means it is stopped and all ranges are unlocked correctly.
   174  func (l *RangeLock) UnlockRange(
   175  	startKey, endKey []byte, regionID, version uint64,
   176  	resolvedTs ...uint64,
   177  ) (drained bool) {
   178  	l.mu.Lock()
   179  	defer l.mu.Unlock()
   180  
   181  	entry, ok := l.lockedRanges.Get(rangeLockEntryWithKey(startKey))
   182  	if !ok {
   183  		log.Panic("unlocking a not locked range",
   184  			zap.String("changefeed", l.changefeed),
   185  			zap.Uint64("regionID", regionID),
   186  			zap.String("startKey", hex.EncodeToString(startKey)),
   187  			zap.String("endKey", hex.EncodeToString(endKey)),
   188  			zap.Uint64("version", version))
   189  	}
   190  	if entry.regionID != regionID {
   191  		log.Panic("unlocked a range but regionID mismatch",
   192  			zap.String("changefeed", l.changefeed),
   193  			zap.Uint64("expectedRegionID", regionID),
   194  			zap.Uint64("foundRegionID", entry.regionID),
   195  			zap.String("startKey", hex.EncodeToString(startKey)),
   196  			zap.String("endKey", hex.EncodeToString(endKey)))
   197  	}
   198  	if entry != l.regionIDToLockedRanges[regionID] {
   199  		log.Panic("range lock and region id lock mismatch when trying to unlock",
   200  			zap.String("changefeed", l.changefeed),
   201  			zap.Uint64("unlockingRegionID", regionID),
   202  			zap.String("rangeLockEntry", entry.String()),
   203  			zap.String("regionIDLockEntry", l.regionIDToLockedRanges[regionID].String()))
   204  	}
   205  	delete(l.regionIDToLockedRanges, regionID)
   206  	drained = l.stopped && len(l.regionIDToLockedRanges) == 0
   207  
   208  	if entry.regionVersion != version || !bytes.Equal(entry.endKey, endKey) {
   209  		log.Panic("unlocking region doesn't match the locked region",
   210  			zap.String("changefeed", l.changefeed),
   211  			zap.Uint64("regionID", regionID),
   212  			zap.String("startKey", hex.EncodeToString(startKey)),
   213  			zap.String("endKey", hex.EncodeToString(endKey)),
   214  			zap.Uint64("version", version),
   215  			zap.String("foundLockEntry", entry.String()))
   216  	}
   217  
   218  	for _, ch := range entry.waiterSignalChs {
   219  		ch <- nil
   220  	}
   221  
   222  	if entry, ok = l.lockedRanges.Delete(entry); !ok {
   223  		panic("unreachable")
   224  	}
   225  
   226  	var newResolvedTs uint64
   227  	if len(resolvedTs) > 0 {
   228  		newResolvedTs = resolvedTs[0]
   229  	} else {
   230  		newResolvedTs = entry.lockedRangeState.ResolvedTs.Load()
   231  	}
   232  
   233  	l.unlockedRanges.set(startKey, endKey, newResolvedTs)
   234  	log.Debug("unlocked range",
   235  		zap.String("changefeed", l.changefeed),
   236  		zap.Uint64("lockID", l.id), zap.Uint64("regionID", entry.regionID),
   237  		zap.Uint64("resolvedTs", newResolvedTs),
   238  		zap.String("startKey", hex.EncodeToString(startKey)),
   239  		zap.String("endKey", hex.EncodeToString(endKey)))
   240  	return
   241  }
   242  
   243  // Len returns len of locked ranges.
   244  func (l *RangeLock) Len() int {
   245  	l.mu.RLock()
   246  	defer l.mu.RUnlock()
   247  	return l.lockedRanges.Len()
   248  }
   249  
   250  // ResolvedTs calculates and returns the minimum resolvedTs
   251  // of all ranges in the RangeLock.
   252  func (l *RangeLock) ResolvedTs() uint64 {
   253  	l.mu.RLock()
   254  	defer l.mu.RUnlock()
   255  
   256  	var minTs uint64 = math.MaxUint64
   257  	l.lockedRanges.Ascend(func(item *rangeLockEntry) bool {
   258  		ts := item.lockedRangeState.ResolvedTs.Load()
   259  		if ts < minTs {
   260  			minTs = ts
   261  		}
   262  		return true
   263  	})
   264  
   265  	unlockedMinTs := l.unlockedRanges.getMinTs()
   266  	if unlockedMinTs < minTs {
   267  		minTs = unlockedMinTs
   268  	}
   269  
   270  	return minTs
   271  }
   272  
   273  // RangeLockStatistics represents some statistics of a RangeLock.
   274  type RangeLockStatistics struct {
   275  	LockedRegionCount int
   276  	// UnLockedRanges represents the unlocked ranges in the table.
   277  	// If UnLockedRanges isn't empty, it implies that some regions were not captured at the time.
   278  	// These regions could have been split, merged, transferred, or temporarily unavailable.
   279  	UnLockedRanges []UnLockRangeStatistic
   280  
   281  	FastestRegion LockedRangeStatistic
   282  	SlowestRegion LockedRangeStatistic
   283  }
   284  
   285  // LockedRangeStatistic represents a locked range.
   286  type LockedRangeStatistic struct {
   287  	RegionID    uint64
   288  	ResolvedTs  uint64
   289  	Initialized bool
   290  	Created     time.Time
   291  }
   292  
   293  // UnLockRangeStatistic represents a range that is unlocked.
   294  type UnLockRangeStatistic struct {
   295  	Span       tablepb.Span
   296  	ResolvedTs uint64
   297  }
   298  
   299  // IterAll iterates all locked ranges in the RangeLock and performs the action on each locked range.
   300  // It also returns some statistics of the RangeLock.
   301  func (l *RangeLock) IterAll(
   302  	action func(regionID uint64, state *LockedRangeState),
   303  ) (r RangeLockStatistics) {
   304  	l.mu.RLock()
   305  	defer l.mu.RUnlock()
   306  	r.LockedRegionCount = l.lockedRanges.Len()
   307  	r.FastestRegion.ResolvedTs = 0
   308  	r.SlowestRegion.ResolvedTs = math.MaxUint64
   309  
   310  	lastEnd := l.totalSpan.StartKey
   311  	l.lockedRanges.Ascend(func(item *rangeLockEntry) bool {
   312  		if action != nil {
   313  			action(item.regionID, &item.lockedRangeState)
   314  		}
   315  
   316  		if spanz.EndCompare(lastEnd, item.startKey) < 0 {
   317  			span := tablepb.Span{StartKey: lastEnd, EndKey: item.startKey}
   318  			ts := l.unlockedRanges.getMinTsInRange(lastEnd, item.startKey)
   319  			r.UnLockedRanges = append(r.UnLockedRanges, UnLockRangeStatistic{Span: span, ResolvedTs: ts})
   320  		}
   321  		resolvedTs := item.lockedRangeState.ResolvedTs.Load()
   322  		if resolvedTs > r.FastestRegion.ResolvedTs {
   323  			r.FastestRegion.RegionID = item.regionID
   324  			r.FastestRegion.ResolvedTs = resolvedTs
   325  			r.FastestRegion.Initialized = item.lockedRangeState.Initialzied.Load()
   326  			r.FastestRegion.Created = item.lockedRangeState.Created
   327  		}
   328  		if resolvedTs < r.SlowestRegion.ResolvedTs {
   329  			r.SlowestRegion.RegionID = item.regionID
   330  			r.SlowestRegion.ResolvedTs = resolvedTs
   331  			r.SlowestRegion.Initialized = item.lockedRangeState.Initialzied.Load()
   332  			r.SlowestRegion.Created = item.lockedRangeState.Created
   333  		}
   334  		lastEnd = item.endKey
   335  		return true
   336  	})
   337  	if spanz.EndCompare(lastEnd, l.totalSpan.EndKey) < 0 {
   338  		span := tablepb.Span{StartKey: lastEnd, EndKey: l.totalSpan.EndKey}
   339  		ts := l.unlockedRanges.getMinTsInRange(lastEnd, l.totalSpan.EndKey)
   340  		r.UnLockedRanges = append(r.UnLockedRanges, UnLockRangeStatistic{Span: span, ResolvedTs: ts})
   341  	}
   342  	return
   343  }
   344  
   345  // IterForTest iterates all locked ranges in the RangeLock and performs the action on each locked range.
   346  // It is used for testing only.
   347  func (l *RangeLock) IterForTest(
   348  	action func(regionID, version uint64, state *LockedRangeState, span tablepb.Span),
   349  ) {
   350  	l.mu.RLock()
   351  	defer l.mu.RUnlock()
   352  	l.lockedRanges.Ascend(func(item *rangeLockEntry) bool {
   353  		if action != nil {
   354  			span := tablepb.Span{StartKey: item.startKey, EndKey: item.endKey}
   355  			action(item.regionID, item.regionVersion, &item.lockedRangeState, span)
   356  		}
   357  		return true
   358  	})
   359  }
   360  
   361  // Stop stops the instance.
   362  func (l *RangeLock) Stop() (drained bool) {
   363  	l.mu.Lock()
   364  	defer l.mu.Unlock()
   365  	l.stopped = true
   366  	return l.stopped && len(l.regionIDToLockedRanges) == 0
   367  }
   368  
   369  func (l *RangeLock) getOverlappedLockEntries(startKey, endKey []byte, regionID uint64) []*rangeLockEntry {
   370  	regionIDFound := false
   371  
   372  	overlappedLocks := make([]*rangeLockEntry, 0)
   373  	l.lockedRanges.DescendLessOrEqual(rangeLockEntryWithKey(startKey),
   374  		func(entry *rangeLockEntry) bool {
   375  			if bytes.Compare(entry.startKey, startKey) < 0 &&
   376  				bytes.Compare(startKey, entry.endKey) < 0 {
   377  				overlappedLocks = append(overlappedLocks, entry)
   378  				if entry.regionID == regionID {
   379  					regionIDFound = true
   380  				}
   381  			}
   382  			return false
   383  		})
   384  	l.lockedRanges.AscendRange(rangeLockEntryWithKey(startKey), rangeLockEntryWithKey(endKey),
   385  		func(entry *rangeLockEntry) bool {
   386  			overlappedLocks = append(overlappedLocks, entry)
   387  			if entry.regionID == regionID {
   388  				regionIDFound = true
   389  			}
   390  			return true
   391  		})
   392  
   393  	// The entry with the same regionID should also be checked.
   394  	if !regionIDFound {
   395  		entry, ok := l.regionIDToLockedRanges[regionID]
   396  		if ok {
   397  			overlappedLocks = append(overlappedLocks, entry)
   398  		}
   399  	}
   400  
   401  	return overlappedLocks
   402  }
   403  
   404  // tryLockRange works in this way:
   405  // 1. If the range is totally disjointed with all locked ranges, it will be directly locked.
   406  // 2. If the range is overlapping with some locked ranges:
   407  //   - If the current region's version is stale, it will return LockRangeStatusStale and the overlapping ranges to the caller.
   408  //   - If the current region's version is not stale, it will return LockRangeStatusWait and the overlapping ranges to the caller,
   409  //     and the caller should wait for the overlapping ranges to be released and retry to lock the rest of the range.
   410  func (l *RangeLock) tryLockRange(startKey, endKey []byte, regionID, regionVersion uint64) (LockRangeResult, []<-chan interface{}) {
   411  	l.mu.Lock()
   412  	defer l.mu.Unlock()
   413  	if l.stopped {
   414  		return LockRangeResult{Status: LockRangeStatusCancel}, nil
   415  	}
   416  
   417  	overlappedRangeLocks := l.getOverlappedLockEntries(startKey, endKey, regionID)
   418  
   419  	// 1. If the range is totally disjointed with all locked ranges, it will be directly locked.
   420  	if len(overlappedRangeLocks) == 0 {
   421  		resolvedTs := l.unlockedRanges.getMinTsInRange(startKey, endKey)
   422  		newEntry := &rangeLockEntry{
   423  			startKey:      startKey,
   424  			endKey:        endKey,
   425  			regionID:      regionID,
   426  			regionVersion: regionVersion,
   427  		}
   428  		newEntry.lockedRangeState.ResolvedTs.Store(resolvedTs)
   429  		newEntry.lockedRangeState.Created = time.Now()
   430  		l.lockedRanges.ReplaceOrInsert(newEntry)
   431  		l.regionIDToLockedRanges[regionID] = newEntry
   432  
   433  		l.unlockedRanges.unset(startKey, endKey)
   434  		log.Debug("range locked",
   435  			zap.String("changefeed", l.changefeed),
   436  			zap.Uint64("lockID", l.id),
   437  			zap.Uint64("regionID", regionID),
   438  			zap.Uint64("version", regionVersion),
   439  			zap.Uint64("resolvedTs", resolvedTs),
   440  			zap.String("startKey", hex.EncodeToString(startKey)),
   441  			zap.String("endKey", hex.EncodeToString(endKey)))
   442  
   443  		return LockRangeResult{
   444  			Status:           LockRangeStatusSuccess,
   445  			LockedRangeState: &newEntry.lockedRangeState,
   446  		}, nil
   447  	}
   448  
   449  	// Format overlapping ranges for printing log
   450  	var overlapStr []string
   451  	for _, r := range overlappedRangeLocks {
   452  		overlapStr = append(overlapStr, fmt.Sprintf("regionID: %v, ver: %v, start: %v, end: %v",
   453  			r.regionID, r.regionVersion, hex.EncodeToString(r.startKey), hex.EncodeToString(r.endKey))) // DEBUG
   454  	}
   455  
   456  	// Check if the current acuqiring range is stale,
   457  	// which means there's already a locked range with a equal or greater version.
   458  	isStale := false
   459  	for _, rangeLock := range overlappedRangeLocks {
   460  		if rangeLock.regionVersion >= regionVersion {
   461  			isStale = true
   462  			break
   463  		}
   464  	}
   465  	// If the range is stale, we should return the overlapping ranges to the caller,
   466  	// so that the caller can retry to lock the rest of the range.
   467  	if isStale {
   468  		retryRanges := make([]tablepb.Span, 0)
   469  		currentRangeStartKey := startKey
   470  
   471  		log.Info("try lock range staled",
   472  			zap.String("changefeed", l.changefeed),
   473  			zap.Uint64("lockID", l.id), zap.Uint64("regionID", regionID),
   474  			zap.String("startKey", hex.EncodeToString(startKey)),
   475  			zap.String("endKey", hex.EncodeToString(endKey)),
   476  			zap.Strings("allOverlapping", overlapStr)) // DEBUG
   477  
   478  		for _, r := range overlappedRangeLocks {
   479  			// Ignore the totally-disjointed range which may be added to the list because of
   480  			// searching by regionID.
   481  			if bytes.Compare(r.endKey, startKey) <= 0 || bytes.Compare(endKey, r.startKey) <= 0 {
   482  				continue
   483  			}
   484  			// The rest should come from range searching and is sorted in increasing order, and they
   485  			// must intersect with the current given range.
   486  			if bytes.Compare(currentRangeStartKey, r.startKey) < 0 {
   487  				retryRanges = append(retryRanges,
   488  					tablepb.Span{StartKey: currentRangeStartKey, EndKey: r.startKey})
   489  			}
   490  			currentRangeStartKey = r.endKey
   491  		}
   492  
   493  		if bytes.Compare(currentRangeStartKey, endKey) < 0 {
   494  			retryRanges = append(retryRanges,
   495  				tablepb.Span{StartKey: currentRangeStartKey, EndKey: endKey})
   496  		}
   497  
   498  		return LockRangeResult{
   499  			Status:      LockRangeStatusStale,
   500  			RetryRanges: retryRanges,
   501  		}, nil
   502  	}
   503  
   504  	var lockReleaseSignalChs []<-chan interface{}
   505  
   506  	for _, r := range overlappedRangeLocks {
   507  		ch := make(chan interface{}, 1)
   508  		lockReleaseSignalChs = append(lockReleaseSignalChs, ch)
   509  		r.waiterSignalChs = append(r.waiterSignalChs, ch)
   510  	}
   511  
   512  	log.Info("lock range blocked",
   513  		zap.String("changefeed", l.changefeed),
   514  		zap.Uint64("lockID", l.id), zap.Uint64("regionID", regionID),
   515  		zap.String("startKey", hex.EncodeToString(startKey)),
   516  		zap.String("endKey", hex.EncodeToString(endKey)),
   517  		zap.Strings("blockedBy", overlapStr)) // DEBUG
   518  
   519  	return LockRangeResult{
   520  		Status: LockRangeStatusWait,
   521  	}, lockReleaseSignalChs
   522  }