github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/processor/pipeline/table.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package pipeline
    15  
    16  import (
    17  	"context"
    18  	"time"
    19  
    20  	"github.com/pingcap/log"
    21  	"github.com/pingcap/ticdc/cdc/entry"
    22  	"github.com/pingcap/ticdc/cdc/model"
    23  	"github.com/pingcap/ticdc/cdc/sink"
    24  	"github.com/pingcap/ticdc/cdc/sink/common"
    25  	serverConfig "github.com/pingcap/ticdc/pkg/config"
    26  	cdcContext "github.com/pingcap/ticdc/pkg/context"
    27  	cerror "github.com/pingcap/ticdc/pkg/errors"
    28  	"github.com/pingcap/ticdc/pkg/pipeline"
    29  	"go.uber.org/zap"
    30  )
    31  
    32  const (
    33  	// TODO determine a reasonable default value
    34  	// This is part of sink performance optimization
    35  	resolvedTsInterpolateInterval = 200 * time.Millisecond
    36  )
    37  
    38  // TablePipeline is a pipeline which capture the change log from tikv in a table
    39  type TablePipeline interface {
    40  	// ID returns the ID of source table and mark table
    41  	ID() (tableID, markTableID int64)
    42  	// Name returns the quoted schema and table name
    43  	Name() string
    44  	// ResolvedTs returns the resolved ts in this table pipeline
    45  	ResolvedTs() model.Ts
    46  	// CheckpointTs returns the checkpoint ts in this table pipeline
    47  	CheckpointTs() model.Ts
    48  	// UpdateBarrierTs updates the barrier ts in this table pipeline
    49  	UpdateBarrierTs(ts model.Ts)
    50  	// AsyncStop tells the pipeline to stop, and returns true is the pipeline is already stopped.
    51  	AsyncStop(targetTs model.Ts) bool
    52  	// Workload returns the workload of this table
    53  	Workload() model.WorkloadInfo
    54  	// Status returns the status of this table pipeline
    55  	Status() TableStatus
    56  	// Cancel stops this table pipeline immediately and destroy all resources created by this table pipeline
    57  	Cancel()
    58  	// Wait waits for table pipeline destroyed
    59  	Wait()
    60  }
    61  
    62  type tablePipelineImpl struct {
    63  	p *pipeline.Pipeline
    64  
    65  	tableID     int64
    66  	markTableID int64
    67  	tableName   string // quoted schema and table, used in metircs only
    68  
    69  	sinkNode *sinkNode
    70  	cancel   context.CancelFunc
    71  }
    72  
    73  // TODO find a better name or avoid using an interface
    74  // We use an interface here for ease in unit testing.
    75  type tableFlowController interface {
    76  	Consume(commitTs uint64, size uint64, blockCallBack func() error) error
    77  	Release(resolvedTs uint64)
    78  	Abort()
    79  	GetConsumption() uint64
    80  }
    81  
    82  // ResolvedTs returns the resolved ts in this table pipeline
    83  func (t *tablePipelineImpl) ResolvedTs() model.Ts {
    84  	return t.sinkNode.ResolvedTs()
    85  }
    86  
    87  // CheckpointTs returns the checkpoint ts in this table pipeline
    88  func (t *tablePipelineImpl) CheckpointTs() model.Ts {
    89  	return t.sinkNode.CheckpointTs()
    90  }
    91  
    92  // UpdateBarrierTs updates the barrier ts in this table pipeline
    93  func (t *tablePipelineImpl) UpdateBarrierTs(ts model.Ts) {
    94  	err := t.p.SendToFirstNode(pipeline.BarrierMessage(ts))
    95  	if err != nil && !cerror.ErrSendToClosedPipeline.Equal(err) && !cerror.ErrPipelineTryAgain.Equal(err) {
    96  		log.Panic("unexpect error from send to first node", zap.Error(err))
    97  	}
    98  }
    99  
   100  // AsyncStop tells the pipeline to stop, and returns true is the pipeline is already stopped.
   101  func (t *tablePipelineImpl) AsyncStop(targetTs model.Ts) bool {
   102  	err := t.p.SendToFirstNode(pipeline.CommandMessage(&pipeline.Command{
   103  		Tp: pipeline.CommandTypeStop,
   104  	}))
   105  	log.Info("send async stop signal to table", zap.Int64("tableID", t.tableID), zap.Uint64("targetTs", targetTs))
   106  	if err != nil {
   107  		if cerror.ErrPipelineTryAgain.Equal(err) {
   108  			return false
   109  		}
   110  		if cerror.ErrSendToClosedPipeline.Equal(err) {
   111  			return true
   112  		}
   113  		log.Panic("unexpect error from send to first node", zap.Error(err))
   114  	}
   115  	return true
   116  }
   117  
   118  var workload = model.WorkloadInfo{Workload: 1}
   119  
   120  // Workload returns the workload of this table
   121  func (t *tablePipelineImpl) Workload() model.WorkloadInfo {
   122  	// TODO(leoppro) calculate the workload of this table
   123  	// We temporarily set the value to constant 1
   124  	return workload
   125  }
   126  
   127  // Status returns the status of this table pipeline
   128  func (t *tablePipelineImpl) Status() TableStatus {
   129  	return t.sinkNode.Status()
   130  }
   131  
   132  // ID returns the ID of source table and mark table
   133  func (t *tablePipelineImpl) ID() (tableID, markTableID int64) {
   134  	return t.tableID, t.markTableID
   135  }
   136  
   137  // Name returns the quoted schema and table name
   138  func (t *tablePipelineImpl) Name() string {
   139  	return t.tableName
   140  }
   141  
   142  // Cancel stops this table pipeline immediately and destroy all resources created by this table pipeline
   143  func (t *tablePipelineImpl) Cancel() {
   144  	t.cancel()
   145  }
   146  
   147  // Wait waits for table pipeline destroyed
   148  func (t *tablePipelineImpl) Wait() {
   149  	t.p.Wait()
   150  }
   151  
   152  // Assume 1KB per row in upstream TiDB, it takes about 250 MB (1024*4*64) for
   153  // replicating 1024 tables in the worst case.
   154  const defaultOutputChannelSize = 64
   155  
   156  // There are 5 or 6 runners in table pipeline: header, puller, sorter, mounter,
   157  // sink, cyclic if cyclic replication is enabled
   158  const defaultRunnersSize = 5
   159  
   160  // NewTablePipeline creates a table pipeline
   161  // TODO(leoppro): implement a mock kvclient to test the table pipeline
   162  func NewTablePipeline(ctx cdcContext.Context,
   163  	mounter entry.Mounter,
   164  	tableID model.TableID,
   165  	tableName string,
   166  	replicaInfo *model.TableReplicaInfo,
   167  	sink sink.Sink,
   168  	targetTs model.Ts) TablePipeline {
   169  	ctx, cancel := cdcContext.WithCancel(ctx)
   170  	tablePipeline := &tablePipelineImpl{
   171  		tableID:     tableID,
   172  		markTableID: replicaInfo.MarkTableID,
   173  		tableName:   tableName,
   174  		cancel:      cancel,
   175  	}
   176  
   177  	perTableMemoryQuota := serverConfig.GetGlobalServerConfig().PerTableMemoryQuota
   178  	log.Debug("creating table flow controller",
   179  		zap.String("changefeed-id", ctx.ChangefeedVars().ID),
   180  		zap.String("table-name", tableName),
   181  		zap.Int64("table-id", tableID),
   182  		zap.Uint64("quota", perTableMemoryQuota))
   183  	flowController := common.NewTableFlowController(perTableMemoryQuota)
   184  	config := ctx.ChangefeedVars().Info.Config
   185  	cyclicEnabled := config.Cyclic != nil && config.Cyclic.IsEnabled()
   186  	runnerSize := defaultRunnersSize
   187  	if cyclicEnabled {
   188  		runnerSize++
   189  	}
   190  	p := pipeline.NewPipeline(ctx, 500*time.Millisecond, runnerSize, defaultOutputChannelSize)
   191  	p.AppendNode(ctx, "puller", newPullerNode(tableID, replicaInfo, tableName))
   192  	p.AppendNode(ctx, "sorter", newSorterNode(tableName, tableID, flowController, mounter))
   193  	p.AppendNode(ctx, "mounter", newMounterNode())
   194  	if cyclicEnabled {
   195  		p.AppendNode(ctx, "cyclic", newCyclicMarkNode(replicaInfo.MarkTableID))
   196  	}
   197  	tablePipeline.sinkNode = newSinkNode(sink, replicaInfo.StartTs, targetTs, flowController)
   198  	p.AppendNode(ctx, "sink", tablePipeline.sinkNode)
   199  	tablePipeline.p = p
   200  	return tablePipeline
   201  }