github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdcv2/metadata/metadata.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package metadata
    15  
    16  import (
    17  	"context"
    18  
    19  	"github.com/pingcap/tiflow/cdc/model"
    20  	"github.com/pingcap/tiflow/pkg/election"
    21  	"github.com/pingcap/tiflow/pkg/errors"
    22  )
    23  
    24  // Querier is used to query information from metadata storage.
    25  type Querier interface {
    26  	// GetChangefeeds queries some or all changefeeds.
    27  	GetChangefeed(...ChangefeedUUID) ([]*ChangefeedInfo, error)
    28  	// GetChangefeedState queries some or all changefeed states.
    29  	GetChangefeedState(...ChangefeedUUID) ([]*ChangefeedState, error)
    30  	// GetChangefeedProgress queries some or all changefeed progresses.
    31  	GetChangefeedProgress(...ChangefeedUUID) (map[ChangefeedUUID]ChangefeedProgress, error)
    32  }
    33  
    34  // -------------------- About owner schedule -------------------- //
    35  // 1. ControllerObservation.SetOwner puts an owner on a given capture;
    36  // 2. ControllerObservation.SetOwner can also stop an owner;
    37  // 3. Capture fetches owner launch/stop events with CaptureObservation.OwnerChanges;
    38  // 4. Capture calls Capture.PostOwnerRemoved when the owner exits;
    39  // 5. After controller confirms the old owner exits, it can re-reschedule it.
    40  // -------------------------------------------------------------- //
    41  
    42  // ---------- About changefeed processor captures schedule ---------- //
    43  // 1. ControllerObservation.SetProcessors attaches some captures to a changefeed;
    44  // 2. ControllerObservation.SetProcessors can also detach captures from a changefeed;
    45  // 3. Owner calls OwnerObservation.ProcessorChanges to know processors are created;
    46  // 4. Capture fetches processor launch/stop events with CaptureObservation.ProcessorChanges;
    47  // 5. How to rolling-update a changefeed with only one worker capture:
    48  //    * controller needs only to attach more captures to the changefeed;
    49  //    * it's owner's responsibility to evict tables between captures.
    50  // 5. What if owner knows processors are created before captures?
    51  //    * table schedule should be robust enough.
    52  // ------------------------------------------------------------------ //
    53  
    54  // ---------------- About keep-alive and heartbeat ---------------- //
    55  // 1. Capture updates heartbeats to metadata by calling CaptureObservation.Heartbeat,
    56  //    with a given timeout, for example, 1s;
    57  // 2. On a capture, controller, owners and processors share one same Context, which is
    58  //    associated with deadline 10s. CaptureObservation.Heartbeat will refresh the deadline.
    59  // 3. Controller is binded with a lease (10+1+1)s, for deadline, heartbeat time-elapsed
    60  //    and network clock skew.
    61  // 4. Controller needs to consider re-schedule owners and processors from a capture,
    62  //    if the capture has been partitioned with metadata storage more than lease+5s;
    63  // ---------------------------------------------------------------- //
    64  
    65  // CaptureObservation is for observing and updating metadata on a CAPTURE instance.
    66  //
    67  // All intrefaces are thread-safe and shares one same Context.
    68  type CaptureObservation interface {
    69  	Elector
    70  
    71  	// Run runs
    72  	// the `eclector.RunElection` and other background tasks.
    73  	// controllerCallback will be called when the capture campaign as the controller.
    74  	Run(
    75  		ctx context.Context,
    76  		controllerCallback func(context.Context, ControllerObservation) error,
    77  	) error
    78  
    79  	// Advance advances some changefeed progresses that are collected from processors.
    80  	Advance(cp CaptureProgress) error
    81  
    82  	// OwnerChanges fetch owner modifications.
    83  	OwnerChanges() <-chan ScheduledChangefeed
    84  
    85  	// OnOwnerLaunched create an owner observation for a changefeed owner.
    86  	OnOwnerLaunched(cf ChangefeedUUID) OwnerObservation
    87  
    88  	// PostOwnerRemoved inform the metadata storage when an owner exits.
    89  	PostOwnerRemoved(cf ChangefeedUUID, taskPosition ChangefeedProgress) error
    90  }
    91  
    92  // ControllerObservation is for observing and updating meta by Controller.
    93  //
    94  // All intrefaces are thread-safe and shares one same Context.
    95  type ControllerObservation interface {
    96  	// CreateChangefeed creates a changefeed, UUID will be filled into the input ChangefeedInfo.
    97  	CreateChangefeed(cf *ChangefeedInfo, up *model.UpstreamInfo) (ChangefeedIdent, error)
    98  
    99  	// RemoveChangefeed removes a changefeed, will mark it as removed and stop the owner and processors asynchronizely.
   100  	RemoveChangefeed(cf ChangefeedUUID) error
   101  
   102  	// CleanupChangefeed cleans up a changefeed, will delete info, schdule and state metadata.
   103  	CleanupChangefeed(cf ChangefeedUUID) error
   104  
   105  	// RefreshCaptures Fetch the latest capture list in the TiCDC cluster.
   106  	RefreshCaptures() (captures []*model.CaptureInfo, changed bool)
   107  
   108  	// SetOwner Schedule a changefeed owner to a given target.
   109  	// Notes:
   110  	//   * the target capture can fetch the event by `OwnerChanges`.
   111  	//   * target state can only be `SchedLaunched` or `SchedRemoving`.
   112  	SetOwner(target ScheduledChangefeed) error
   113  
   114  	// GetChangefeedSchedule Get current schedule of the given changefeed.
   115  	GetChangefeedSchedule(cf ChangefeedUUID) (ScheduledChangefeed, error)
   116  
   117  	// ScheduleSnapshot Get a snapshot of all changefeeds current schedule.
   118  	ScheduleSnapshot() ([]ScheduledChangefeed, []*model.CaptureInfo, error)
   119  }
   120  
   121  // OwnerObservation is for observing and updating running status of a changefeed.
   122  //
   123  // All intrefaces are thread-safe and shares one same Context.
   124  type OwnerObservation interface {
   125  	// Self returns the changefeed info of the owner.
   126  	Self() ChangefeedUUID
   127  
   128  	// UpdateChangefeed updates changefeed metadata, must be called on a paused one.
   129  	UpdateChangefeed(*ChangefeedInfo) error
   130  
   131  	// PauseChangefeed pauses a changefeed.
   132  	PauseChangefeed() error
   133  
   134  	// ResumeChangefeed resumes a changefeed.
   135  	ResumeChangefeed() error
   136  
   137  	// SetChangefeedFinished set the changefeed to state finished.
   138  	SetChangefeedFinished() error
   139  
   140  	// SetChangefeedRemoved set the changefeed to state removed.
   141  	SetChangefeedRemoved() error
   142  
   143  	// SetChangefeedFailed set the changefeed to state failed.
   144  	SetChangefeedFailed(err *model.RunningError) error
   145  
   146  	// SetChangefeedWarning set the changefeed to state warning.
   147  	SetChangefeedWarning(warn *model.RunningError) error
   148  
   149  	// SetChangefeedPending sets the changefeed to state pending.
   150  	SetChangefeedPending(err *model.RunningError) error
   151  }
   152  
   153  // Elector is used to campaign for capture controller.
   154  type Elector interface {
   155  	// Self tells the caller who am I.
   156  	Self() *model.CaptureInfo
   157  
   158  	// RunElection runs the elector to continuously campaign for leadership
   159  	// until the context is canceled.
   160  	// onTakeControl will be called when the capture campaign as the controller.
   161  	RunElection(ctx context.Context, onTakeControl func(ctx context.Context) error) error
   162  
   163  	// GetController returns the last observed controller whose lease is still valid.
   164  	GetController() (*model.CaptureInfo, error)
   165  
   166  	// GetCaptures queries some or all captures.
   167  	GetCaptures(...model.CaptureID) ([]*model.CaptureInfo, error)
   168  }
   169  
   170  // NewElector creates a new elector.
   171  func NewElector(
   172  	selfInfo *model.CaptureInfo,
   173  	storage election.Storage,
   174  ) Elector {
   175  	return &electorImpl{
   176  		selfInfo: selfInfo,
   177  		config: election.Config{
   178  			ID:              selfInfo.ID,
   179  			Name:            selfInfo.Version, /* TODO: refine this filed */
   180  			Address:         selfInfo.AdvertiseAddr,
   181  			Storage:         storage,
   182  			ExitOnRenewFail: true,
   183  		},
   184  	}
   185  }
   186  
   187  type electorImpl struct {
   188  	selfInfo *model.CaptureInfo
   189  
   190  	config  election.Config
   191  	elector election.Elector
   192  }
   193  
   194  func (e *electorImpl) Self() *model.CaptureInfo {
   195  	return e.selfInfo
   196  }
   197  
   198  func (e *electorImpl) RunElection(
   199  	ctx context.Context, onTakeControl func(ctx context.Context) error,
   200  ) (err error) {
   201  	e.config.LeaderCallback = onTakeControl
   202  	e.elector, err = election.NewElector(e.config)
   203  	if err != nil {
   204  		return errors.Trace(err)
   205  	}
   206  	return e.elector.RunElection(ctx)
   207  }
   208  
   209  func (e *electorImpl) GetController() (*model.CaptureInfo, error) {
   210  	leader, ok := e.elector.GetLeader()
   211  	if !ok {
   212  		return nil, errors.ErrOwnerNotFound.GenWithStackByArgs()
   213  	}
   214  
   215  	return &model.CaptureInfo{
   216  		ID:            leader.ID,
   217  		AdvertiseAddr: leader.Address,
   218  		Version:       leader.Name,
   219  	}, nil
   220  }
   221  
   222  func (e *electorImpl) GetCaptures(captureIDs ...model.CaptureID) ([]*model.CaptureInfo, error) {
   223  	captureIDSet := make(map[string]struct{}, len(captureIDs))
   224  	for _, cp := range captureIDs {
   225  		captureIDSet[cp] = struct{}{}
   226  	}
   227  
   228  	members := e.elector.GetMembers()
   229  	captureInfos := make([]*model.CaptureInfo, 0, len(members))
   230  	for _, m := range members {
   231  		if _, ok := captureIDSet[m.ID]; ok || len(captureIDs) == 0 {
   232  			captureInfos = append(captureInfos, &model.CaptureInfo{
   233  				ID:            m.ID,
   234  				AdvertiseAddr: m.Address,
   235  				Version:       m.Name,
   236  			})
   237  		}
   238  	}
   239  	return captureInfos, nil
   240  }