github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/processor/sourcemanager/sorter/engine.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package sorter
    15  
    16  import (
    17  	"github.com/pingcap/tiflow/cdc/model"
    18  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    19  )
    20  
    21  // SortEngine is a storage engine to store and sort CDC events.
    22  // Every changefeed will have one SortEngine instance.
    23  // NOTE: All interfaces are thread-safe.
    24  type SortEngine interface {
    25  	// IsTableBased tells whether the sort engine is based on table or not.
    26  	// If it's based on table, fetching events by table is preferred.
    27  	IsTableBased() bool
    28  
    29  	// AddTable adds the table into the engine.
    30  	AddTable(span tablepb.Span, startTs model.Ts)
    31  
    32  	// RemoveTable removes the table from the engine.
    33  	RemoveTable(span tablepb.Span)
    34  
    35  	// Add adds the given events into the sort engine.
    36  	//
    37  	// NOTE: it's an asynchronous interface. To get the notification of when
    38  	// events are available for fetching, OnResolve is what you want.
    39  	Add(span tablepb.Span, events ...*model.PolymorphicEvent)
    40  
    41  	// OnResolve pushes action into SortEngine's hook list, which
    42  	// will be called after any events are resolved.
    43  	OnResolve(action func(tablepb.Span, model.Ts))
    44  
    45  	// FetchByTable creates an iterator to fetch events from the given table.
    46  	// lowerBound is inclusive and only resolved events can be retrieved.
    47  	//
    48  	// NOTE: FetchByTable is always available even if IsTableBased returns false.
    49  	FetchByTable(span tablepb.Span, lowerBound, upperBound Position) EventIterator
    50  
    51  	// FetchAllTables creates an iterator to fetch events from all tables.
    52  	// lowerBound is inclusive and only resolved events can be retrieved.
    53  	//
    54  	// NOTE: It's only available if IsTableBased returns false.
    55  	FetchAllTables(lowerBound Position) EventIterator
    56  
    57  	// CleanByTable tells the engine events of the given table in the given range
    58  	// (unlimited, upperBound] are committed and not necessary any more.
    59  	// The SortEngine instance can GC them later.
    60  	//
    61  	// NOTE: CleanByTable is always available even if IsTableBased returns false.
    62  	CleanByTable(span tablepb.Span, upperBound Position) error
    63  
    64  	// CleanAllTables tells the engine events of all tables in the given range
    65  	// (unlimited, upperBound] are committed and not necessary any more.
    66  	// The SortEngine instance can GC them later.
    67  	//
    68  	// NOTE: It's only available if IsTableBased returns false.
    69  	CleanAllTables(upperBound Position) error
    70  
    71  	// GetStatsByTable gets the statistics of the given table.
    72  	GetStatsByTable(span tablepb.Span) TableStats
    73  
    74  	// Close closes the engine. All data written by this instance can be deleted.
    75  	//
    76  	// NOTE: it leads an undefined behavior to close an engine with active iterators.
    77  	Close() error
    78  
    79  	// SlotsAndHasher returns how many slots contained by the Engine, and
    80  	// a hasher for table spans.
    81  	// The hasher should return a slot index for the given table span.
    82  	SlotsAndHasher() (slotCount int, hasher func(tablepb.Span, int) int)
    83  }
    84  
    85  // EventIterator is an iterator to fetch events from SortEngine.
    86  // It's unnecessary to be thread-safe.
    87  type EventIterator interface {
    88  	// Next is used to fetch one event. nil indicates it reaches the stop point.
    89  	//
    90  	// txnFinished indicates whether all events in the current transaction are
    91  	// fetched or not. Users should keep fetching events until txnFinished.Valid()
    92  	// returns true.
    93  	//
    94  	// NOTE: event.IsResolved() will always be false.
    95  	Next() (event *model.PolymorphicEvent, txnFinished Position, err error)
    96  
    97  	// Close closes the iterator.
    98  	Close() error
    99  }
   100  
   101  // Position is used to
   102  //  1. fetch or clear events from an engine, for example, see SortEngine.FetchByTable.
   103  //  2. calculate the next position with method Next.
   104  type Position struct {
   105  	StartTs  model.Ts
   106  	CommitTs model.Ts
   107  }
   108  
   109  // GenCommitFence generates a Position which is a commit fence.
   110  // CommitFence indicates all transactions with same CommitTs are less than the position.
   111  func GenCommitFence(commitTs model.Ts) Position {
   112  	return Position{
   113  		StartTs:  commitTs - 1,
   114  		CommitTs: commitTs,
   115  	}
   116  }
   117  
   118  // Valid indicates whether the position is valid or not.
   119  func (p Position) Valid() bool {
   120  	return p.CommitTs != 0
   121  }
   122  
   123  // Next can only be called on a valid Position.
   124  func (p Position) Next() Position {
   125  	return Position{
   126  		StartTs:  p.StartTs + 1, // it will never overflow.
   127  		CommitTs: p.CommitTs,
   128  	}
   129  }
   130  
   131  // Prev can only be called on a valid Position.
   132  func (p Position) Prev() Position {
   133  	if p.StartTs == 0 {
   134  		return Position{
   135  			StartTs:  p.CommitTs - 2,
   136  			CommitTs: p.CommitTs - 1,
   137  		}
   138  	}
   139  	return Position{
   140  		StartTs:  p.StartTs - 1,
   141  		CommitTs: p.CommitTs,
   142  	}
   143  }
   144  
   145  // Compare compares 2 Position, just like strcmp in C.
   146  func (p Position) Compare(q Position) int {
   147  	if p.CommitTs < q.CommitTs {
   148  		return -1
   149  	} else if p.CommitTs == q.CommitTs {
   150  		if p.StartTs < q.StartTs {
   151  			return -1
   152  		} else if p.StartTs == q.StartTs {
   153  			return 0
   154  		} else {
   155  			return 1
   156  		}
   157  	} else {
   158  		return 1
   159  	}
   160  }
   161  
   162  // IsCommitFence indicates all transactions with same CommitTs are less than the position.
   163  func (p Position) IsCommitFence() bool {
   164  	// NOTE: currently p.StartTs will always less than p.CommitTs.
   165  	// But maybe we will allow p.StartTs == p.CommitTs later.
   166  	return p.CommitTs > 0 && p.StartTs+1 >= p.CommitTs
   167  }
   168  
   169  // TableStats of a sort engine.
   170  type TableStats struct {
   171  	ReceivedMaxCommitTs   model.Ts
   172  	ReceivedMaxResolvedTs model.Ts
   173  }