github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/compactor.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package syncer 15 16 import ( 17 "fmt" 18 "strconv" 19 "time" 20 21 "github.com/pingcap/failpoint" 22 "github.com/pingcap/tiflow/dm/pkg/log" 23 "github.com/pingcap/tiflow/dm/syncer/metrics" 24 "github.com/pingcap/tiflow/pkg/sqlmodel" 25 "go.uber.org/zap" 26 ) 27 28 // compactor compacts multiple statements into one statement. 29 type compactor struct { 30 inCh chan *job 31 outCh chan *job 32 bufferSize int 33 logger log.Logger 34 safeMode bool 35 36 keyMap map[string]map[string]int // table -> key(pk or (uk + not null)) -> index in buffer 37 buffer []*job 38 39 // for MetricsProxies 40 task string 41 source string 42 metricProxies *metrics.Proxies 43 updateJobMetricsFn func(bool, string, *job) 44 } 45 46 // compactorWrap creates and runs a compactor instance. 47 func compactorWrap(inCh chan *job, syncer *Syncer) chan *job { 48 // Actually we can use a larger compact buffer-size, but if so, when user pause-task/stop-task, they may need to wait a longer time to wait all jobs flushed. 49 // TODO: implement ping-pong buffer. 50 bufferSize := syncer.cfg.QueueSize * syncer.cfg.WorkerCount / 4 51 compactor := &compactor{ 52 inCh: inCh, 53 outCh: make(chan *job, bufferSize), 54 bufferSize: bufferSize, 55 logger: syncer.tctx.Logger.WithFields(zap.String("component", "compactor")), 56 keyMap: make(map[string]map[string]int), 57 buffer: make([]*job, 0, bufferSize), 58 task: syncer.cfg.Name, 59 source: syncer.cfg.SourceID, 60 metricProxies: syncer.metricsProxies, 61 updateJobMetricsFn: syncer.updateJobMetrics, 62 } 63 go func() { 64 compactor.run() 65 compactor.close() 66 }() 67 return compactor.outCh 68 } 69 70 // run runs a compactor instance. 71 func (c *compactor) run() { 72 for { 73 select { 74 case j, ok := <-c.inCh: 75 if !ok { 76 return 77 } 78 c.metricProxies.QueueSizeGauge.WithLabelValues(c.task, "compactor_input", c.source).Set(float64(len(c.inCh))) 79 80 if j.tp == flush || j.tp == asyncFlush { 81 c.flushBuffer() 82 c.outCh <- j 83 continue 84 } 85 86 if j.tp == gc { 87 c.outCh <- j 88 continue 89 } 90 91 // set safeMode when receive first job 92 if len(c.buffer) == 0 { 93 c.safeMode = j.safeMode 94 } 95 // if dml has no PK/NOT NULL UK, do not compact it. 96 if !j.dml.HasNotNullUniqueIdx() { 97 c.buffer = append(c.buffer, j) 98 continue 99 } 100 101 // if update job update its identify keys, turn it into delete + insert 102 if j.dml.IsIdentityUpdated() { 103 delDML, insertDML := j.dml.SplitUpdate() 104 delJob := j.clone() 105 delJob.dml = delDML 106 107 insertJob := j.clone() 108 insertJob.dml = insertDML 109 110 c.compactJob(delJob) 111 c.compactJob(insertJob) 112 } else { 113 c.compactJob(j) 114 } 115 116 failpoint.Inject("SkipFlushCompactor", func() { 117 failpoint.Continue() 118 }) 119 // if the number of outer jobs is zero or buffer is full, flush the buffer 120 if len(c.outCh) == 0 || len(c.buffer) >= c.bufferSize { 121 c.flushBuffer() 122 } 123 // if no inner jobs and the number of outer jobs is zero, flush the buffer 124 case <-time.After(waitTime): 125 failpoint.Inject("SkipFlushCompactor", func() { 126 failpoint.Continue() 127 }) 128 c.flushBuffer() 129 } 130 } 131 } 132 133 // close closes outer channels. 134 func (c *compactor) close() { 135 close(c.outCh) 136 } 137 138 // flushBuffer flush buffer and reset compactor. 139 func (c *compactor) flushBuffer() { 140 for _, j := range c.buffer { 141 if j != nil { 142 // set safemode for all jobs by first job in buffer. 143 // or safemode for insert(delete + insert = insert with safemode) 144 j.safeMode = c.safeMode || j.safeMode 145 c.outCh <- j 146 } 147 } 148 c.keyMap = make(map[string]map[string]int) 149 c.buffer = c.buffer[0:0] 150 } 151 152 // compactJob compact jobs. 153 // INSERT + INSERT => X ‾| 154 // UPDATE + INSERT => X |=> DELETE + INSERT => INSERT ON DUPLICATE KEY UPDATE(REPLACE) 155 // DELETE + INSERT => REPLACE _| 156 // INSERT + DELETE => DELETE ‾| 157 // UPDATE + DELETE => DELETE |=> anything + DELETE => DELETE 158 // DELETE + DELETE => X _| 159 // INSERT + UPDATE => INSERT ‾| 160 // UPDATE + UPDATE => UPDATE |=> INSERT + UPDATE => INSERT, UPDATE + UPDATE => UPDATE 161 // DELETE + UPDATE => X _| 162 // . 163 func (c *compactor) compactJob(j *job) { 164 tableName := j.dml.TargetTableID() 165 tableKeyMap, ok := c.keyMap[tableName] 166 if !ok { 167 // do not alloc a large buffersize, otherwise if the downstream latency is low 168 // compactor will constantly flush the buffer and golang gc will affect performance 169 c.keyMap[tableName] = make(map[string]int) 170 tableKeyMap = c.keyMap[tableName] 171 } 172 173 key := j.dml.IdentityKey() 174 175 failpoint.Inject("DownstreamIdentifyKeyCheckInCompact", func(v failpoint.Value) { 176 value, err := strconv.Atoi(key) 177 upper := v.(int) 178 if err != nil || value > upper { 179 panic(fmt.Sprintf("downstream identifyKey check failed. key value %v should less than %v", value, upper)) 180 } 181 }) 182 183 prevPos, ok := tableKeyMap[key] 184 // if no such key in the buffer, add it 185 if !ok { 186 tableKeyMap[key] = len(c.buffer) 187 c.buffer = append(c.buffer, j) 188 return 189 } 190 191 prevJob := c.buffer[prevPos] 192 c.logger.Debug("start to compact", zap.Stringer("previous dml", prevJob.dml), zap.Stringer("current dml", j.dml)) 193 194 // adjust safemode 195 adjustSafeMode(j, prevJob) 196 if !shouldSkipReduce(j, prevJob) { 197 j.dml.Reduce(prevJob.dml) 198 } 199 200 // mark previous job as compacted(nil), add new job 201 c.buffer[prevPos] = nil 202 tableKeyMap[key] = len(c.buffer) 203 c.buffer = append(c.buffer, j) 204 c.logger.Debug("finish to compact", zap.Stringer("dml", j.dml)) 205 c.updateJobMetricsFn(true, adminQueueName, newCompactJob(prevJob.targetTable)) 206 } 207 208 func shouldSkipReduce(j, prevJob *job) bool { 209 return j.dml.Type() == sqlmodel.RowChangeInsert && 210 prevJob.dml.Type() == sqlmodel.RowChangeDelete 211 } 212 213 func adjustSafeMode(j, prevJob *job) { 214 switch j.dml.Type() { 215 case sqlmodel.RowChangeUpdate: 216 if prevJob.dml.Type() == sqlmodel.RowChangeInsert { 217 // DELETE + INSERT + UPDATE => INSERT with safemode 218 j.safeMode = prevJob.safeMode 219 } 220 case sqlmodel.RowChangeInsert: 221 if prevJob.dml.Type() == sqlmodel.RowChangeDelete { 222 j.safeMode = true 223 } 224 } 225 }