github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/sink/dmlsink/txn/worker.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package txn 15 16 import ( 17 "context" 18 "fmt" 19 "time" 20 21 "github.com/pingcap/log" 22 "github.com/pingcap/tiflow/cdc/model" 23 "github.com/pingcap/tiflow/cdc/sink/metrics/txn" 24 "github.com/pingcap/tiflow/cdc/sink/tablesink/state" 25 "github.com/pingcap/tiflow/pkg/causality" 26 "github.com/prometheus/client_golang/prometheus" 27 "go.uber.org/zap" 28 ) 29 30 type worker struct { 31 ctx context.Context 32 changefeed string 33 workerCount int 34 35 ID int 36 backend backend 37 38 // Metrics. 39 metricConflictDetectDuration prometheus.Observer 40 metricQueueDuration prometheus.Observer 41 metricTxnWorkerFlushDuration prometheus.Observer 42 metricTxnWorkerTotalDuration prometheus.Observer 43 metricTxnWorkerHandledRows prometheus.Counter 44 45 // Fields only used in the background loop. 46 flushInterval time.Duration 47 hasPending bool 48 postTxnExecutedCallbacks []func() 49 } 50 51 func newWorker(ctx context.Context, changefeedID model.ChangeFeedID, 52 ID int, backend backend, workerCount int, 53 ) *worker { 54 wid := fmt.Sprintf("%d", ID) 55 56 return &worker{ 57 ctx: ctx, 58 changefeed: fmt.Sprintf("%s.%s", changefeedID.Namespace, changefeedID.ID), 59 workerCount: workerCount, 60 61 ID: ID, 62 backend: backend, 63 64 metricConflictDetectDuration: txn.ConflictDetectDuration.WithLabelValues(changefeedID.Namespace, changefeedID.ID), 65 metricQueueDuration: txn.QueueDuration.WithLabelValues(changefeedID.Namespace, changefeedID.ID), 66 metricTxnWorkerFlushDuration: txn.WorkerFlushDuration.WithLabelValues(changefeedID.Namespace, changefeedID.ID, wid), 67 metricTxnWorkerTotalDuration: txn.WorkerTotalDuration.WithLabelValues(changefeedID.Namespace, changefeedID.ID, wid), 68 metricTxnWorkerHandledRows: txn.WorkerHandledRows.WithLabelValues(changefeedID.Namespace, changefeedID.ID, wid), 69 70 flushInterval: backend.MaxFlushInterval(), 71 hasPending: false, 72 postTxnExecutedCallbacks: make([]func(), 0, 1024), 73 } 74 } 75 76 // Run a loop. 77 func (w *worker) runLoop(txnCh <-chan causality.TxnWithNotifier[*txnEvent]) error { 78 defer func() { 79 if err := w.backend.Close(); err != nil { 80 log.Info("Transaction dmlSink backend close fail", 81 zap.String("changefeedID", w.changefeed), 82 zap.Int("workerID", w.ID), 83 zap.Error(err)) 84 } 85 }() 86 log.Info("Transaction dmlSink worker starts", 87 zap.String("changefeedID", w.changefeed), 88 zap.Int("workerID", w.ID)) 89 90 start := time.Now() 91 for { 92 select { 93 case <-w.ctx.Done(): 94 log.Info("Transaction dmlSink worker exits as canceled", 95 zap.String("changefeedID", w.changefeed), 96 zap.Int("workerID", w.ID)) 97 return nil 98 case txn := <-txnCh: 99 // we get the data from txnCh until no more data here or reach the state that can be flushed. 100 // If no more data in txnCh, and also not reach the state that can be flushed, 101 // we will wait for 10ms and then do flush to avoid too much flush with small amount of txns. 102 if txn.TxnEvent != nil { 103 needFlush := w.onEvent(txn.TxnEvent, txn.PostTxnExecuted) 104 if !needFlush { 105 delay := time.NewTimer(w.flushInterval) 106 for !needFlush { 107 select { 108 case txn := <-txnCh: 109 needFlush = w.onEvent(txn.TxnEvent, txn.PostTxnExecuted) 110 case <-delay.C: 111 needFlush = true 112 } 113 } 114 // Release resources promptly 115 if !delay.Stop() { 116 select { 117 case <-delay.C: 118 default: 119 } 120 } 121 } 122 // needFlush must be true here, so we can do flush. 123 if err := w.doFlush(); err != nil { 124 log.Error("Transaction dmlSink worker exits unexpectly", 125 zap.String("changefeedID", w.changefeed), 126 zap.Int("workerID", w.ID), 127 zap.Error(err)) 128 return err 129 } 130 // we record total time to calcuate the worker busy ratio. 131 // so we record the total time after flushing, to unified statistics on 132 // flush time and total time 133 w.metricTxnWorkerTotalDuration.Observe(time.Since(start).Seconds()) 134 start = time.Now() 135 } 136 } 137 } 138 } 139 140 // onEvent is called when a new event is received. 141 // It returns true if the event is sent to backend. 142 func (w *worker) onEvent(txn *txnEvent, postTxnExecuted func()) bool { 143 w.hasPending = true 144 145 if txn.GetTableSinkState() != state.TableSinkSinking { 146 // The table where the event comes from is in stopping, so it's safe 147 // to drop the event directly. 148 txn.Callback() 149 // Still necessary to append the callbacks into the pending list. 150 w.postTxnExecutedCallbacks = append(w.postTxnExecutedCallbacks, postTxnExecuted) 151 return false 152 } 153 154 w.metricConflictDetectDuration.Observe(txn.conflictResolved.Sub(txn.start).Seconds()) 155 w.metricQueueDuration.Observe(time.Since(txn.start).Seconds()) 156 w.metricTxnWorkerHandledRows.Add(float64(len(txn.Event.Rows))) 157 w.postTxnExecutedCallbacks = append(w.postTxnExecutedCallbacks, postTxnExecuted) 158 return w.backend.OnTxnEvent(txn.TxnCallbackableEvent) 159 } 160 161 // doFlush flushes the backend. 162 func (w *worker) doFlush() error { 163 if w.hasPending { 164 start := time.Now() 165 defer func() { 166 w.metricTxnWorkerFlushDuration.Observe(time.Since(start).Seconds()) 167 }() 168 if err := w.backend.Flush(w.ctx); err != nil { 169 log.Warn("Transaction dmlSink backend flush fail", 170 zap.String("changefeedID", w.changefeed), 171 zap.Int("workerID", w.ID), 172 zap.Error(err)) 173 return err 174 } 175 // Flush successfully, call callbacks to notify conflict detector. 176 for _, postTxnExecuted := range w.postTxnExecutedCallbacks { 177 postTxnExecuted() 178 } 179 w.postTxnExecutedCallbacks = w.postTxnExecutedCallbacks[:0] 180 if cap(w.postTxnExecutedCallbacks) > 1024 { 181 // Resize the buffer if it's too big. 182 w.postTxnExecutedCallbacks = make([]func(), 0, 1024) 183 } 184 } 185 186 w.hasPending = false 187 return nil 188 }