github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/processor/sourcemanager/sorter/pebble/db.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package pebble 15 16 import ( 17 "fmt" 18 "math" 19 "os" 20 "path/filepath" 21 "strconv" 22 23 "github.com/cockroachdb/pebble" 24 "github.com/cockroachdb/pebble/bloom" 25 "github.com/pingcap/log" 26 "github.com/pingcap/tiflow/cdc/model" 27 "github.com/pingcap/tiflow/cdc/processor/sourcemanager/sorter" 28 "github.com/pingcap/tiflow/cdc/processor/sourcemanager/sorter/pebble/encoding" 29 "github.com/pingcap/tiflow/pkg/config" 30 "go.uber.org/zap" 31 ) 32 33 const ( 34 minTableCRTsLabel string = "minCRTs" 35 maxTableCRTsLabel string = "maxCRTs" 36 tableCRTsCollectorName string = "table-crts-collector" 37 ) 38 39 type tableCRTsCollector struct { 40 minTs uint64 41 maxTs uint64 42 } 43 44 func (t *tableCRTsCollector) Add(key pebble.InternalKey, value []byte) error { 45 crts := encoding.DecodeCRTs(key.UserKey) 46 if crts > t.maxTs { 47 t.maxTs = crts 48 } 49 if crts < t.minTs { 50 t.minTs = crts 51 } 52 return nil 53 } 54 55 func (t *tableCRTsCollector) Finish(userProps map[string]string) error { 56 userProps[minTableCRTsLabel] = fmt.Sprintf("%d", t.minTs) 57 userProps[maxTableCRTsLabel] = fmt.Sprintf("%d", t.maxTs) 58 return nil 59 } 60 61 func (t *tableCRTsCollector) Name() string { 62 return tableCRTsCollectorName 63 } 64 65 // NOTE: both lowerBound and upperBound are included. 66 func iterTable( 67 db *pebble.DB, 68 uniqueID uint32, tableID model.TableID, 69 lowerBound, upperBound sorter.Position, 70 ) *pebble.Iterator { 71 // Pebble's iterator range is left-included but right-excluded. 72 upperBoundNext := upperBound.Next() 73 start := encoding.EncodeTsKey(uniqueID, uint64(tableID), lowerBound.CommitTs, lowerBound.StartTs) 74 end := encoding.EncodeTsKey(uniqueID, uint64(tableID), upperBoundNext.CommitTs, upperBoundNext.StartTs) 75 76 iter, err := db.NewIter(&pebble.IterOptions{ 77 LowerBound: start, 78 UpperBound: end, 79 TableFilter: func(userProps map[string]string) bool { 80 tableMinCRTs, _ := strconv.Atoi(userProps[minTableCRTsLabel]) 81 tableMaxCRTs, _ := strconv.Atoi(userProps[maxTableCRTsLabel]) 82 return uint64(tableMaxCRTs) >= lowerBound.CommitTs && uint64(tableMinCRTs) <= upperBound.CommitTs 83 }, 84 UseL6Filters: true, 85 }) 86 if err != nil { 87 log.Panic("fail to create iterator") 88 return nil 89 } 90 iter.First() 91 return iter 92 } 93 94 // OpenPebble opens a pebble. 95 func OpenPebble( 96 id int, path string, cfg *config.DBConfig, 97 cache *pebble.Cache, 98 tableCache *pebble.TableCache, 99 adjusts ...func(*pebble.Options), 100 ) (db *pebble.DB, err error) { 101 dbDir := filepath.Join(path, fmt.Sprintf("%04d", id)) 102 if err = os.RemoveAll(dbDir); err != nil { 103 log.Warn("clean data dir fails", zap.String("dir", dbDir), zap.Error(err)) 104 return 105 } 106 107 opts := buildPebbleOption(cfg) 108 opts.Cache = cache 109 opts.TableCache = tableCache 110 for _, adjust := range adjusts { 111 adjust(opts) 112 } 113 114 db, err = pebble.Open(dbDir, opts) 115 return 116 } 117 118 func buildPebbleOption(cfg *config.DBConfig) (opts *pebble.Options) { 119 opts = new(pebble.Options) 120 opts.ErrorIfExists = true 121 opts.DisableWAL = false // Delete range requires WAL. 122 opts.MaxOpenFiles = cfg.MaxOpenFiles / cfg.Count 123 opts.MaxConcurrentCompactions = func() int { return 6 } 124 opts.L0CompactionThreshold = 4 // Default for PebbleDB. 125 opts.L0CompactionFileThreshold = cfg.CompactionL0Trigger 126 opts.L0StopWritesThreshold = cfg.WriteL0PauseTrigger 127 opts.LBaseMaxBytes = 64 << 20 // 64 MB 128 opts.MemTableSize = uint64(cfg.WriterBufferSize) 129 opts.MemTableStopWritesThreshold = 4 130 opts.Levels = make([]pebble.LevelOptions, 7) 131 opts.TablePropertyCollectors = append(opts.TablePropertyCollectors, 132 func() pebble.TablePropertyCollector { 133 return &tableCRTsCollector{minTs: math.MaxUint64, maxTs: 0} 134 }, 135 ) 136 137 for i := 0; i < len(opts.Levels); i++ { 138 l := &opts.Levels[i] 139 l.BlockSize = cfg.BlockSize 140 l.IndexBlockSize = 256 << 10 // 256 KB 141 l.FilterPolicy = bloom.FilterPolicy(10) 142 l.FilterType = pebble.TableFilter 143 // 8M is large enough because generally Sorter won't carry too much data. 144 // Avoiding large targe file is helpful to reduce write-amplification. 145 l.TargetFileSize = 8 << 20 // 8 MB 146 switch cfg.Compression { 147 case "none": 148 l.Compression = pebble.NoCompression 149 case "snappy": 150 l.Compression = pebble.SnappyCompression 151 } 152 l.EnsureDefaults() 153 } 154 opts.Levels[6].FilterPolicy = nil 155 opts.FlushSplitBytes = opts.Levels[0].TargetFileSize 156 opts.EnsureDefaults() 157 return 158 }