github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/sink/cdclog/utils.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package cdclog
    15  
    16  import (
    17  	"context"
    18  	"encoding/json"
    19  	"fmt"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/pingcap/br/pkg/storage"
    24  	"github.com/pingcap/log"
    25  	"github.com/pingcap/ticdc/cdc/model"
    26  	"github.com/pingcap/ticdc/cdc/sink/codec"
    27  	"github.com/pingcap/ticdc/pkg/quotes"
    28  	"github.com/uber-go/atomic"
    29  	"go.uber.org/zap"
    30  	"golang.org/x/sync/errgroup"
    31  )
    32  
    33  const (
    34  	tablePrefix = "t_"
    35  	logMetaFile = "log.meta"
    36  
    37  	ddlEventsDir    = "ddls"
    38  	ddlEventsPrefix = "ddl"
    39  
    40  	maxUint64 = ^uint64(0)
    41  )
    42  
    43  type logUnit interface {
    44  	TableID() int64
    45  	Events() *atomic.Int64
    46  	Size() *atomic.Int64
    47  
    48  	dataChan() chan *model.RowChangedEvent
    49  
    50  	isEmpty() bool
    51  	shouldFlush() bool
    52  	// flush data to storage.
    53  	flush(ctx context.Context, sink *logSink) error
    54  }
    55  
    56  type logSink struct {
    57  	notifyChan     chan []logUnit
    58  	notifyWaitChan chan struct{}
    59  
    60  	encoder func() codec.EventBatchEncoder
    61  	units   []logUnit
    62  
    63  	// file sink use
    64  	rootPath string
    65  	// s3 sink use
    66  	storagePath storage.ExternalStorage
    67  
    68  	hashMap sync.Map
    69  }
    70  
    71  func newLogSink(root string, storage storage.ExternalStorage) *logSink {
    72  	return &logSink{
    73  		notifyChan:     make(chan []logUnit),
    74  		notifyWaitChan: make(chan struct{}),
    75  		encoder: func() codec.EventBatchEncoder {
    76  			ret := codec.NewJSONEventBatchEncoder()
    77  			ret.(*codec.JSONEventBatchEncoder).SetMixedBuildSupport(true)
    78  			return ret
    79  		},
    80  		units:       make([]logUnit, 0),
    81  		rootPath:    root,
    82  		storagePath: storage,
    83  	}
    84  }
    85  
    86  // s3Sink need this
    87  func (l *logSink) storage() storage.ExternalStorage {
    88  	return l.storagePath
    89  }
    90  
    91  // fileSink need this
    92  func (l *logSink) root() string {
    93  	return l.rootPath
    94  }
    95  
    96  func (l *logSink) startFlush(ctx context.Context) error {
    97  	ticker := time.NewTicker(500 * time.Millisecond)
    98  	defer ticker.Stop()
    99  	for {
   100  		select {
   101  		case <-ctx.Done():
   102  			log.Info("[startFlush] log sink stopped")
   103  			return ctx.Err()
   104  		case needFlushedUnits := <-l.notifyChan:
   105  			// try specify buffers
   106  			eg, ectx := errgroup.WithContext(ctx)
   107  			for _, u := range needFlushedUnits {
   108  				uReplica := u
   109  				eg.Go(func() error {
   110  					log.Info("start Flush asynchronously to storage by caller",
   111  						zap.Int64("table id", uReplica.TableID()),
   112  						zap.Int64("size", uReplica.Size().Load()),
   113  						zap.Int64("event count", uReplica.Events().Load()),
   114  					)
   115  					return uReplica.flush(ectx, l)
   116  				})
   117  			}
   118  			if err := eg.Wait(); err != nil {
   119  				return err
   120  			}
   121  			// tell flush goroutine this time flush finished
   122  			l.notifyWaitChan <- struct{}{}
   123  
   124  		case <-ticker.C:
   125  			// try all tableBuffers
   126  			eg, ectx := errgroup.WithContext(ctx)
   127  			for _, u := range l.units {
   128  				uReplica := u
   129  				if u.shouldFlush() {
   130  					eg.Go(func() error {
   131  						log.Info("start Flush asynchronously to storage",
   132  							zap.Int64("table id", uReplica.TableID()),
   133  							zap.Int64("size", uReplica.Size().Load()),
   134  							zap.Int64("event count", uReplica.Events().Load()),
   135  						)
   136  						return uReplica.flush(ectx, l)
   137  					})
   138  				}
   139  			}
   140  			if err := eg.Wait(); err != nil {
   141  				return err
   142  			}
   143  		}
   144  	}
   145  }
   146  
   147  func (l *logSink) emitRowChangedEvents(ctx context.Context, newUnit func(int64) logUnit, rows ...*model.RowChangedEvent) error {
   148  	for _, row := range rows {
   149  		// dispatch row event by tableID
   150  		tableID := row.Table.GetTableID()
   151  		var (
   152  			ok   bool
   153  			item interface{}
   154  			hash int
   155  		)
   156  		if item, ok = l.hashMap.Load(tableID); !ok {
   157  			// found new tableID
   158  			l.units = append(l.units, newUnit(tableID))
   159  			hash = len(l.units) - 1
   160  			l.hashMap.Store(tableID, hash)
   161  		} else {
   162  			hash = item.(int)
   163  		}
   164  		select {
   165  		case <-ctx.Done():
   166  			return ctx.Err()
   167  		case l.units[hash].dataChan() <- row:
   168  			l.units[hash].Size().Add(row.ApproximateSize)
   169  			l.units[hash].Events().Inc()
   170  		}
   171  	}
   172  	return nil
   173  }
   174  
   175  func (l *logSink) flushRowChangedEvents(ctx context.Context, resolvedTs uint64) (uint64, error) {
   176  	// TODO update flush policy with size
   177  	select {
   178  	case <-ctx.Done():
   179  		return 0, ctx.Err()
   180  
   181  	default:
   182  		needFlushedUnits := make([]logUnit, 0, len(l.units))
   183  		for _, u := range l.units {
   184  			if !u.isEmpty() {
   185  				needFlushedUnits = append(needFlushedUnits, u)
   186  			}
   187  		}
   188  		if len(needFlushedUnits) > 0 {
   189  			select {
   190  			case <-ctx.Done():
   191  				return 0, ctx.Err()
   192  
   193  			case <-time.After(defaultFlushRowChangedEventDuration):
   194  				// cannot accumulate enough row events in 5 second
   195  				// call flushed worker to flush
   196  				l.notifyChan <- needFlushedUnits
   197  				// wait flush worker finished
   198  				<-l.notifyWaitChan
   199  			}
   200  		}
   201  	}
   202  	return resolvedTs, nil
   203  }
   204  
   205  type logMeta struct {
   206  	Names            map[int64]string `json:"names"`
   207  	GlobalResolvedTS uint64           `json:"global_resolved_ts"`
   208  }
   209  
   210  func newLogMeta() *logMeta {
   211  	return &logMeta{
   212  		Names: make(map[int64]string),
   213  	}
   214  }
   215  
   216  // Marshal saves logMeta
   217  func (l *logMeta) Marshal() ([]byte, error) {
   218  	return json.Marshal(l)
   219  }
   220  
   221  func makeTableDirectoryName(tableID int64) string {
   222  	return fmt.Sprintf("%s%d", tablePrefix, tableID)
   223  }
   224  
   225  func makeTableFileObject(tableID int64, commitTS uint64) string {
   226  	return fmt.Sprintf("%s%d/%s", tablePrefix, tableID, makeTableFileName(commitTS))
   227  }
   228  
   229  func makeTableFileName(commitTS uint64) string {
   230  	return fmt.Sprintf("cdclog.%d", commitTS)
   231  }
   232  
   233  func makeLogMetaContent(tableInfos []*model.SimpleTableInfo) *logMeta {
   234  	meta := new(logMeta)
   235  	names := make(map[int64]string)
   236  	for _, table := range tableInfos {
   237  		if table != nil {
   238  			log.Info("[makeLogMetaContent]", zap.Reflect("table", table))
   239  			names[table.TableID] = quotes.QuoteSchema(table.Schema, table.Table)
   240  		}
   241  	}
   242  	meta.Names = names
   243  	return meta
   244  }
   245  
   246  func makeDDLFileObject(commitTS uint64) string {
   247  	return fmt.Sprintf("%s/%s", ddlEventsDir, makeDDLFileName(commitTS))
   248  }
   249  
   250  func makeDDLFileName(commitTS uint64) string {
   251  	return fmt.Sprintf("%s.%d", ddlEventsPrefix, maxUint64-commitTS)
   252  }