github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/sink/cdclog/utils.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package cdclog 15 16 import ( 17 "context" 18 "encoding/json" 19 "fmt" 20 "sync" 21 "time" 22 23 "github.com/pingcap/br/pkg/storage" 24 "github.com/pingcap/log" 25 "github.com/pingcap/ticdc/cdc/model" 26 "github.com/pingcap/ticdc/cdc/sink/codec" 27 "github.com/pingcap/ticdc/pkg/quotes" 28 "github.com/uber-go/atomic" 29 "go.uber.org/zap" 30 "golang.org/x/sync/errgroup" 31 ) 32 33 const ( 34 tablePrefix = "t_" 35 logMetaFile = "log.meta" 36 37 ddlEventsDir = "ddls" 38 ddlEventsPrefix = "ddl" 39 40 maxUint64 = ^uint64(0) 41 ) 42 43 type logUnit interface { 44 TableID() int64 45 Events() *atomic.Int64 46 Size() *atomic.Int64 47 48 dataChan() chan *model.RowChangedEvent 49 50 isEmpty() bool 51 shouldFlush() bool 52 // flush data to storage. 53 flush(ctx context.Context, sink *logSink) error 54 } 55 56 type logSink struct { 57 notifyChan chan []logUnit 58 notifyWaitChan chan struct{} 59 60 encoder func() codec.EventBatchEncoder 61 units []logUnit 62 63 // file sink use 64 rootPath string 65 // s3 sink use 66 storagePath storage.ExternalStorage 67 68 hashMap sync.Map 69 } 70 71 func newLogSink(root string, storage storage.ExternalStorage) *logSink { 72 return &logSink{ 73 notifyChan: make(chan []logUnit), 74 notifyWaitChan: make(chan struct{}), 75 encoder: func() codec.EventBatchEncoder { 76 ret := codec.NewJSONEventBatchEncoder() 77 ret.(*codec.JSONEventBatchEncoder).SetMixedBuildSupport(true) 78 return ret 79 }, 80 units: make([]logUnit, 0), 81 rootPath: root, 82 storagePath: storage, 83 } 84 } 85 86 // s3Sink need this 87 func (l *logSink) storage() storage.ExternalStorage { 88 return l.storagePath 89 } 90 91 // fileSink need this 92 func (l *logSink) root() string { 93 return l.rootPath 94 } 95 96 func (l *logSink) startFlush(ctx context.Context) error { 97 ticker := time.NewTicker(500 * time.Millisecond) 98 defer ticker.Stop() 99 for { 100 select { 101 case <-ctx.Done(): 102 log.Info("[startFlush] log sink stopped") 103 return ctx.Err() 104 case needFlushedUnits := <-l.notifyChan: 105 // try specify buffers 106 eg, ectx := errgroup.WithContext(ctx) 107 for _, u := range needFlushedUnits { 108 uReplica := u 109 eg.Go(func() error { 110 log.Info("start Flush asynchronously to storage by caller", 111 zap.Int64("table id", uReplica.TableID()), 112 zap.Int64("size", uReplica.Size().Load()), 113 zap.Int64("event count", uReplica.Events().Load()), 114 ) 115 return uReplica.flush(ectx, l) 116 }) 117 } 118 if err := eg.Wait(); err != nil { 119 return err 120 } 121 // tell flush goroutine this time flush finished 122 l.notifyWaitChan <- struct{}{} 123 124 case <-ticker.C: 125 // try all tableBuffers 126 eg, ectx := errgroup.WithContext(ctx) 127 for _, u := range l.units { 128 uReplica := u 129 if u.shouldFlush() { 130 eg.Go(func() error { 131 log.Info("start Flush asynchronously to storage", 132 zap.Int64("table id", uReplica.TableID()), 133 zap.Int64("size", uReplica.Size().Load()), 134 zap.Int64("event count", uReplica.Events().Load()), 135 ) 136 return uReplica.flush(ectx, l) 137 }) 138 } 139 } 140 if err := eg.Wait(); err != nil { 141 return err 142 } 143 } 144 } 145 } 146 147 func (l *logSink) emitRowChangedEvents(ctx context.Context, newUnit func(int64) logUnit, rows ...*model.RowChangedEvent) error { 148 for _, row := range rows { 149 // dispatch row event by tableID 150 tableID := row.Table.GetTableID() 151 var ( 152 ok bool 153 item interface{} 154 hash int 155 ) 156 if item, ok = l.hashMap.Load(tableID); !ok { 157 // found new tableID 158 l.units = append(l.units, newUnit(tableID)) 159 hash = len(l.units) - 1 160 l.hashMap.Store(tableID, hash) 161 } else { 162 hash = item.(int) 163 } 164 select { 165 case <-ctx.Done(): 166 return ctx.Err() 167 case l.units[hash].dataChan() <- row: 168 l.units[hash].Size().Add(row.ApproximateSize) 169 l.units[hash].Events().Inc() 170 } 171 } 172 return nil 173 } 174 175 func (l *logSink) flushRowChangedEvents(ctx context.Context, resolvedTs uint64) (uint64, error) { 176 // TODO update flush policy with size 177 select { 178 case <-ctx.Done(): 179 return 0, ctx.Err() 180 181 default: 182 needFlushedUnits := make([]logUnit, 0, len(l.units)) 183 for _, u := range l.units { 184 if !u.isEmpty() { 185 needFlushedUnits = append(needFlushedUnits, u) 186 } 187 } 188 if len(needFlushedUnits) > 0 { 189 select { 190 case <-ctx.Done(): 191 return 0, ctx.Err() 192 193 case <-time.After(defaultFlushRowChangedEventDuration): 194 // cannot accumulate enough row events in 5 second 195 // call flushed worker to flush 196 l.notifyChan <- needFlushedUnits 197 // wait flush worker finished 198 <-l.notifyWaitChan 199 } 200 } 201 } 202 return resolvedTs, nil 203 } 204 205 type logMeta struct { 206 Names map[int64]string `json:"names"` 207 GlobalResolvedTS uint64 `json:"global_resolved_ts"` 208 } 209 210 func newLogMeta() *logMeta { 211 return &logMeta{ 212 Names: make(map[int64]string), 213 } 214 } 215 216 // Marshal saves logMeta 217 func (l *logMeta) Marshal() ([]byte, error) { 218 return json.Marshal(l) 219 } 220 221 func makeTableDirectoryName(tableID int64) string { 222 return fmt.Sprintf("%s%d", tablePrefix, tableID) 223 } 224 225 func makeTableFileObject(tableID int64, commitTS uint64) string { 226 return fmt.Sprintf("%s%d/%s", tablePrefix, tableID, makeTableFileName(commitTS)) 227 } 228 229 func makeTableFileName(commitTS uint64) string { 230 return fmt.Sprintf("cdclog.%d", commitTS) 231 } 232 233 func makeLogMetaContent(tableInfos []*model.SimpleTableInfo) *logMeta { 234 meta := new(logMeta) 235 names := make(map[int64]string) 236 for _, table := range tableInfos { 237 if table != nil { 238 log.Info("[makeLogMetaContent]", zap.Reflect("table", table)) 239 names[table.TableID] = quotes.QuoteSchema(table.Schema, table.Table) 240 } 241 } 242 meta.Names = names 243 return meta 244 } 245 246 func makeDDLFileObject(commitTS uint64) string { 247 return fmt.Sprintf("%s/%s", ddlEventsDir, makeDDLFileName(commitTS)) 248 } 249 250 func makeDDLFileName(commitTS uint64) string { 251 return fmt.Sprintf("%s.%d", ddlEventsPrefix, maxUint64-commitTS) 252 }