github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/processor/sinkmanager/tasks.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package sinkmanager 15 16 import ( 17 "time" 18 19 "github.com/pingcap/log" 20 "github.com/pingcap/tiflow/cdc/model" 21 "github.com/pingcap/tiflow/cdc/processor/sourcemanager/sorter" 22 "github.com/pingcap/tiflow/cdc/processor/tablepb" 23 "github.com/tikv/client-go/v2/oracle" 24 "go.uber.org/zap" 25 ) 26 27 const ( 28 // defaultRequestMemSize is the default memory usage for a request. 29 defaultRequestMemSize = uint64(1024 * 1024) // 1MB 30 // Avoid update resolved ts too frequently, if there are too many small transactions. 31 defaultMaxUpdateIntervalSize = uint64(1024 * 256) // 256KB 32 // bufferSize is the size of the buffer used to store the events. 33 bufferSize = 1024 34 ) 35 36 // Make these values be variables, so that we can mock them in unit tests. 37 var ( 38 requestMemSize = defaultRequestMemSize 39 maxUpdateIntervalSize = defaultMaxUpdateIntervalSize 40 41 // Sink manager schedules table tasks based on lag. Limit the max task range 42 // can be helpful to reduce changefeed latency for large initial data. 43 maxTaskTimeRange = 30 * time.Minute 44 ) 45 46 // Used to record the progress of the table. 47 type writeSuccessCallback func(lastWrittenPos sorter.Position) 48 49 // Used to get an upper bound. 50 type upperBoundGetter func(tableSinkUpperBoundTs model.Ts) sorter.Position 51 52 // Used to abort the task processing of the table. 53 type isCanceled func() bool 54 55 // sinkTask is a task for a table sink. 56 // It only considers how to control the table sink. 57 type sinkTask struct { 58 span tablepb.Span 59 // lowerBound indicates the lower bound of the task. 60 // It is a closed interval. 61 lowerBound sorter.Position 62 // getUpperBound is used to get the upper bound of the task. 63 // It is a closed interval. 64 // Use a method to get the latest value, because the upper bound may change(only can increase). 65 getUpperBound upperBoundGetter 66 tableSink *tableSinkWrapper 67 callback writeSuccessCallback 68 isCanceled isCanceled 69 } 70 71 // redoTask is a task for the redo log. 72 type redoTask struct { 73 span tablepb.Span 74 lowerBound sorter.Position 75 getUpperBound upperBoundGetter 76 tableSink *tableSinkWrapper 77 callback writeSuccessCallback 78 isCanceled isCanceled 79 } 80 81 func validateAndAdjustBound( 82 changefeedID model.ChangeFeedID, 83 span *tablepb.Span, 84 lowerBound, upperBound sorter.Position, 85 ) (sorter.Position, sorter.Position) { 86 lowerPhs := oracle.GetTimeFromTS(lowerBound.CommitTs) 87 upperPhs := oracle.GetTimeFromTS(upperBound.CommitTs) 88 // The time range of a task should not exceed maxTaskTimeRange. 89 // This would help for reduce changefeed latency. 90 if upperPhs.Sub(lowerPhs) > maxTaskTimeRange { 91 newUpperCommitTs := oracle.GoTimeToTS(lowerPhs.Add(maxTaskTimeRange)) 92 upperBound = sorter.GenCommitFence(newUpperCommitTs) 93 } 94 95 if !upperBound.IsCommitFence() { 96 log.Panic("Task upperbound must be a ResolvedTs", 97 zap.String("namespace", changefeedID.Namespace), 98 zap.String("changefeed", changefeedID.ID), 99 zap.Stringer("span", span), 100 zap.Any("upperBound", upperBound)) 101 } 102 return lowerBound, upperBound 103 }