github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/backend/local/iterator.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package local 15 16 import ( 17 "bytes" 18 "context" 19 20 "github.com/cockroachdb/pebble" 21 sst "github.com/pingcap/kvproto/pkg/import_sstpb" 22 "github.com/pingcap/tidb/util/codec" 23 "go.uber.org/multierr" 24 25 "github.com/pingcap/br/pkg/kv" 26 "github.com/pingcap/br/pkg/lightning/log" 27 "github.com/pingcap/br/pkg/logutil" 28 ) 29 30 type pebbleIter struct { 31 *pebble.Iterator 32 } 33 34 func (p pebbleIter) Seek(key []byte) bool { 35 return p.SeekGE(key) 36 } 37 38 func (p pebbleIter) OpType() sst.Pair_OP { 39 return sst.Pair_Put 40 } 41 42 var _ kv.Iter = pebbleIter{} 43 44 const maxDuplicateBatchSize = 4 << 20 45 46 type duplicateIter struct { 47 ctx context.Context 48 iter *pebble.Iterator 49 curKey []byte 50 curRawKey []byte 51 curVal []byte 52 nextKey []byte 53 err error 54 55 engineFile *File 56 keyAdapter KeyAdapter 57 writeBatch *pebble.Batch 58 writeBatchSize int64 59 } 60 61 func (d *duplicateIter) Seek(key []byte) bool { 62 encodedKey := d.keyAdapter.Encode(nil, key, 0, 0) 63 if d.err != nil || !d.iter.SeekGE(encodedKey) { 64 return false 65 } 66 d.fill() 67 return d.err == nil 68 } 69 70 func (d *duplicateIter) First() bool { 71 if d.err != nil || !d.iter.First() { 72 return false 73 } 74 d.fill() 75 return d.err == nil 76 } 77 78 func (d *duplicateIter) Last() bool { 79 if d.err != nil || !d.iter.Last() { 80 return false 81 } 82 d.fill() 83 return d.err == nil 84 } 85 86 func (d *duplicateIter) fill() { 87 d.curKey, _, _, d.err = d.keyAdapter.Decode(d.curKey[:0], d.iter.Key()) 88 d.curRawKey = append(d.curRawKey[:0], d.iter.Key()...) 89 d.curVal = append(d.curVal[:0], d.iter.Value()...) 90 } 91 92 func (d *duplicateIter) flush() { 93 d.err = d.writeBatch.Commit(pebble.Sync) 94 d.writeBatch.Reset() 95 d.writeBatchSize = 0 96 } 97 98 func (d *duplicateIter) record(key []byte, val []byte) { 99 d.engineFile.Duplicates.Inc() 100 d.err = d.writeBatch.Set(key, val, nil) 101 if d.err != nil { 102 return 103 } 104 d.writeBatchSize += int64(len(key) + len(val)) 105 if d.writeBatchSize >= maxDuplicateBatchSize { 106 d.flush() 107 } 108 } 109 110 func (d *duplicateIter) Next() bool { 111 recordFirst := false 112 for d.err == nil && d.ctx.Err() == nil && d.iter.Next() { 113 d.nextKey, _, _, d.err = d.keyAdapter.Decode(d.nextKey[:0], d.iter.Key()) 114 if d.err != nil { 115 return false 116 } 117 if !bytes.Equal(d.nextKey, d.curKey) { 118 d.curKey, d.nextKey = d.nextKey, d.curKey[:0] 119 d.curRawKey = append(d.curRawKey[:0], d.iter.Key()...) 120 d.curVal = append(d.curVal[:0], d.iter.Value()...) 121 return true 122 } 123 log.L().Debug("duplicate key detected", logutil.Key("key", d.curKey)) 124 if !recordFirst { 125 d.record(d.curRawKey, d.curVal) 126 recordFirst = true 127 } 128 d.record(d.iter.Key(), d.iter.Value()) 129 } 130 if d.err == nil { 131 d.err = d.ctx.Err() 132 } 133 return false 134 } 135 136 func (d *duplicateIter) Key() []byte { 137 return d.curKey 138 } 139 140 func (d *duplicateIter) Value() []byte { 141 return d.curVal 142 } 143 144 func (d *duplicateIter) Valid() bool { 145 return d.err == nil && d.iter.Valid() 146 } 147 148 func (d *duplicateIter) Error() error { 149 return multierr.Combine(d.iter.Error(), d.err) 150 } 151 152 func (d *duplicateIter) Close() error { 153 if d.err == nil { 154 d.flush() 155 } 156 d.writeBatch.Close() 157 return d.iter.Close() 158 } 159 160 func (d *duplicateIter) OpType() sst.Pair_OP { 161 return sst.Pair_Put 162 } 163 164 var _ kv.Iter = &duplicateIter{} 165 166 func newDuplicateIter(ctx context.Context, engineFile *File, opts *pebble.IterOptions) kv.Iter { 167 newOpts := &pebble.IterOptions{TableFilter: opts.TableFilter} 168 if len(opts.LowerBound) > 0 { 169 newOpts.LowerBound = codec.EncodeBytes(nil, opts.LowerBound) 170 } 171 if len(opts.UpperBound) > 0 { 172 newOpts.UpperBound = codec.EncodeBytes(nil, opts.UpperBound) 173 } 174 return &duplicateIter{ 175 ctx: ctx, 176 iter: engineFile.db.NewIter(newOpts), 177 engineFile: engineFile, 178 keyAdapter: engineFile.keyAdapter, 179 writeBatch: engineFile.duplicateDB.NewBatch(), 180 } 181 } 182 183 func newKeyIter(ctx context.Context, engineFile *File, opts *pebble.IterOptions) kv.Iter { 184 if bytes.Compare(opts.LowerBound, normalIterStartKey) < 0 { 185 newOpts := *opts 186 newOpts.LowerBound = normalIterStartKey 187 opts = &newOpts 188 } 189 if !engineFile.duplicateDetection { 190 return pebbleIter{Iterator: engineFile.db.NewIter(opts)} 191 } 192 return newDuplicateIter(ctx, engineFile, opts) 193 }