github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/processor/sourcemanager/sorter/pebble/db.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package pebble
    15  
    16  import (
    17  	"fmt"
    18  	"math"
    19  	"os"
    20  	"path/filepath"
    21  	"strconv"
    22  
    23  	"github.com/cockroachdb/pebble"
    24  	"github.com/cockroachdb/pebble/bloom"
    25  	"github.com/pingcap/log"
    26  	"github.com/pingcap/tiflow/cdc/model"
    27  	"github.com/pingcap/tiflow/cdc/processor/sourcemanager/sorter"
    28  	"github.com/pingcap/tiflow/cdc/processor/sourcemanager/sorter/pebble/encoding"
    29  	"github.com/pingcap/tiflow/pkg/config"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  const (
    34  	minTableCRTsLabel      string = "minCRTs"
    35  	maxTableCRTsLabel      string = "maxCRTs"
    36  	tableCRTsCollectorName string = "table-crts-collector"
    37  )
    38  
    39  type tableCRTsCollector struct {
    40  	minTs uint64
    41  	maxTs uint64
    42  }
    43  
    44  func (t *tableCRTsCollector) Add(key pebble.InternalKey, value []byte) error {
    45  	crts := encoding.DecodeCRTs(key.UserKey)
    46  	if crts > t.maxTs {
    47  		t.maxTs = crts
    48  	}
    49  	if crts < t.minTs {
    50  		t.minTs = crts
    51  	}
    52  	return nil
    53  }
    54  
    55  func (t *tableCRTsCollector) Finish(userProps map[string]string) error {
    56  	userProps[minTableCRTsLabel] = fmt.Sprintf("%d", t.minTs)
    57  	userProps[maxTableCRTsLabel] = fmt.Sprintf("%d", t.maxTs)
    58  	return nil
    59  }
    60  
    61  func (t *tableCRTsCollector) Name() string {
    62  	return tableCRTsCollectorName
    63  }
    64  
    65  // NOTE: both lowerBound and upperBound are included.
    66  func iterTable(
    67  	db *pebble.DB,
    68  	uniqueID uint32, tableID model.TableID,
    69  	lowerBound, upperBound sorter.Position,
    70  ) *pebble.Iterator {
    71  	// Pebble's iterator range is left-included but right-excluded.
    72  	upperBoundNext := upperBound.Next()
    73  	start := encoding.EncodeTsKey(uniqueID, uint64(tableID), lowerBound.CommitTs, lowerBound.StartTs)
    74  	end := encoding.EncodeTsKey(uniqueID, uint64(tableID), upperBoundNext.CommitTs, upperBoundNext.StartTs)
    75  
    76  	iter, err := db.NewIter(&pebble.IterOptions{
    77  		LowerBound: start,
    78  		UpperBound: end,
    79  		TableFilter: func(userProps map[string]string) bool {
    80  			tableMinCRTs, _ := strconv.Atoi(userProps[minTableCRTsLabel])
    81  			tableMaxCRTs, _ := strconv.Atoi(userProps[maxTableCRTsLabel])
    82  			return uint64(tableMaxCRTs) >= lowerBound.CommitTs && uint64(tableMinCRTs) <= upperBound.CommitTs
    83  		},
    84  		UseL6Filters: true,
    85  	})
    86  	if err != nil {
    87  		log.Panic("fail to create iterator")
    88  		return nil
    89  	}
    90  	iter.First()
    91  	return iter
    92  }
    93  
    94  // OpenPebble opens a pebble.
    95  func OpenPebble(
    96  	id int, path string, cfg *config.DBConfig,
    97  	cache *pebble.Cache,
    98  	tableCache *pebble.TableCache,
    99  	adjusts ...func(*pebble.Options),
   100  ) (db *pebble.DB, err error) {
   101  	dbDir := filepath.Join(path, fmt.Sprintf("%04d", id))
   102  	if err = os.RemoveAll(dbDir); err != nil {
   103  		log.Warn("clean data dir fails", zap.String("dir", dbDir), zap.Error(err))
   104  		return
   105  	}
   106  
   107  	opts := buildPebbleOption(cfg)
   108  	opts.Cache = cache
   109  	opts.TableCache = tableCache
   110  	for _, adjust := range adjusts {
   111  		adjust(opts)
   112  	}
   113  
   114  	db, err = pebble.Open(dbDir, opts)
   115  	return
   116  }
   117  
   118  func buildPebbleOption(cfg *config.DBConfig) (opts *pebble.Options) {
   119  	opts = new(pebble.Options)
   120  	opts.ErrorIfExists = true
   121  	opts.DisableWAL = false // Delete range requires WAL.
   122  	opts.MaxOpenFiles = cfg.MaxOpenFiles / cfg.Count
   123  	opts.MaxConcurrentCompactions = func() int { return 6 }
   124  	opts.L0CompactionThreshold = 4 // Default for PebbleDB.
   125  	opts.L0CompactionFileThreshold = cfg.CompactionL0Trigger
   126  	opts.L0StopWritesThreshold = cfg.WriteL0PauseTrigger
   127  	opts.LBaseMaxBytes = 64 << 20 // 64 MB
   128  	opts.MemTableSize = uint64(cfg.WriterBufferSize)
   129  	opts.MemTableStopWritesThreshold = 4
   130  	opts.Levels = make([]pebble.LevelOptions, 7)
   131  	opts.TablePropertyCollectors = append(opts.TablePropertyCollectors,
   132  		func() pebble.TablePropertyCollector {
   133  			return &tableCRTsCollector{minTs: math.MaxUint64, maxTs: 0}
   134  		},
   135  	)
   136  
   137  	for i := 0; i < len(opts.Levels); i++ {
   138  		l := &opts.Levels[i]
   139  		l.BlockSize = cfg.BlockSize
   140  		l.IndexBlockSize = 256 << 10 // 256 KB
   141  		l.FilterPolicy = bloom.FilterPolicy(10)
   142  		l.FilterType = pebble.TableFilter
   143  		// 8M is large enough because generally Sorter won't carry too much data.
   144  		// Avoiding large targe file is helpful to reduce write-amplification.
   145  		l.TargetFileSize = 8 << 20 // 8 MB
   146  		switch cfg.Compression {
   147  		case "none":
   148  			l.Compression = pebble.NoCompression
   149  		case "snappy":
   150  			l.Compression = pebble.SnappyCompression
   151  		}
   152  		l.EnsureDefaults()
   153  	}
   154  	opts.Levels[6].FilterPolicy = nil
   155  	opts.FlushSplitBytes = opts.Levels[0].TargetFileSize
   156  	opts.EnsureDefaults()
   157  	return
   158  }