github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/kv/matcher.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package kv
    15  
    16  import (
    17  	"github.com/pingcap/kvproto/pkg/cdcpb"
    18  	"github.com/pingcap/log"
    19  	"go.uber.org/zap"
    20  )
    21  
    22  type matchKey struct {
    23  	startTs uint64
    24  	key     string
    25  }
    26  
    27  func newMatchKey(row *cdcpb.Event_Row) matchKey {
    28  	return matchKey{startTs: row.GetStartTs(), key: string(row.GetKey())}
    29  }
    30  
    31  type matcher struct {
    32  	// TODO : clear the single prewrite
    33  	unmatchedValue map[matchKey]*cdcpb.Event_Row
    34  	cachedCommit   []*cdcpb.Event_Row
    35  	cachedRollback []*cdcpb.Event_Row
    36  }
    37  
    38  func newMatcher() *matcher {
    39  	return &matcher{
    40  		unmatchedValue: make(map[matchKey]*cdcpb.Event_Row),
    41  	}
    42  }
    43  
    44  func (m *matcher) putPrewriteRow(row *cdcpb.Event_Row) {
    45  	key := newMatchKey(row)
    46  	// tikv may send a fake prewrite event with empty value caused by txn heartbeat.
    47  	// here we need to avoid the fake prewrite event overwrite the prewrite value.
    48  
    49  	// when the old-value is disabled, the value of the fake prewrite event is empty.
    50  	// when the old-value is enabled, the value of the fake prewrite event is also empty,
    51  	// but the old value of the fake prewrite event is not empty.
    52  	// We can distinguish fake prewrite events by whether the value is empty,
    53  	// no matter the old-value is enabled or disabled
    54  	if _, exist := m.unmatchedValue[key]; exist && len(row.GetValue()) == 0 {
    55  		return
    56  	}
    57  	m.unmatchedValue[key] = row
    58  }
    59  
    60  // matchRow matches the commit event with the cached prewrite event
    61  // the Value and OldValue will be assigned if a matched prewrite event exists.
    62  func (m *matcher) matchRow(row *cdcpb.Event_Row, initialized bool) bool {
    63  	if value, exist := m.unmatchedValue[newMatchKey(row)]; exist {
    64  		// TiKV may send a fake prewrite event with empty value caused by txn heartbeat.
    65  		//
    66  		// We need to skip match if the region is not initialized,
    67  		// as prewrite events may be sent out of order.
    68  		if !initialized && len(value.GetValue()) == 0 {
    69  			return false
    70  		}
    71  		row.Value = value.GetValue()
    72  		row.OldValue = value.GetOldValue()
    73  		delete(m.unmatchedValue, newMatchKey(row))
    74  		return true
    75  	}
    76  	return false
    77  }
    78  
    79  func (m *matcher) cacheCommitRow(row *cdcpb.Event_Row) {
    80  	m.cachedCommit = append(m.cachedCommit, row)
    81  }
    82  
    83  //nolint:unparam
    84  func (m *matcher) matchCachedRow(initialized bool) []*cdcpb.Event_Row {
    85  	if !initialized {
    86  		log.Panic("must be initialized before match cahced rows")
    87  	}
    88  	cachedCommit := m.cachedCommit
    89  	m.cachedCommit = nil
    90  	top := 0
    91  	for i := 0; i < len(cachedCommit); i++ {
    92  		cacheEntry := cachedCommit[i]
    93  		ok := m.matchRow(cacheEntry, true)
    94  		if !ok {
    95  			// when cdc receives a commit log without a corresponding
    96  			// prewrite log before initialized, a committed log  with
    97  			// the same key and start-ts must have been received.
    98  			log.Info("ignore commit event without prewrite",
    99  				zap.Binary("key", cacheEntry.GetKey()),
   100  				zap.Uint64("startTs", cacheEntry.GetStartTs()))
   101  			continue
   102  		}
   103  		cachedCommit[top] = cacheEntry
   104  		top++
   105  	}
   106  	return cachedCommit[:top]
   107  }
   108  
   109  func (m *matcher) rollbackRow(row *cdcpb.Event_Row) {
   110  	delete(m.unmatchedValue, newMatchKey(row))
   111  }
   112  
   113  func (m *matcher) cacheRollbackRow(row *cdcpb.Event_Row) {
   114  	m.cachedRollback = append(m.cachedRollback, row)
   115  }
   116  
   117  //nolint:unparam
   118  func (m *matcher) matchCachedRollbackRow(initialized bool) {
   119  	if !initialized {
   120  		log.Panic("must be initialized before match cahced rollback rows")
   121  	}
   122  	rollback := m.cachedRollback
   123  	m.cachedRollback = nil
   124  	for i := 0; i < len(rollback); i++ {
   125  		cacheEntry := rollback[i]
   126  		m.rollbackRow(cacheEntry)
   127  	}
   128  }