github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/backend/local/iterator.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package local
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  
    20  	"github.com/cockroachdb/pebble"
    21  	sst "github.com/pingcap/kvproto/pkg/import_sstpb"
    22  	"github.com/pingcap/tidb/util/codec"
    23  	"go.uber.org/multierr"
    24  
    25  	"github.com/pingcap/br/pkg/kv"
    26  	"github.com/pingcap/br/pkg/lightning/log"
    27  	"github.com/pingcap/br/pkg/logutil"
    28  )
    29  
    30  type pebbleIter struct {
    31  	*pebble.Iterator
    32  }
    33  
    34  func (p pebbleIter) Seek(key []byte) bool {
    35  	return p.SeekGE(key)
    36  }
    37  
    38  func (p pebbleIter) OpType() sst.Pair_OP {
    39  	return sst.Pair_Put
    40  }
    41  
    42  var _ kv.Iter = pebbleIter{}
    43  
    44  const maxDuplicateBatchSize = 4 << 20
    45  
    46  type duplicateIter struct {
    47  	ctx       context.Context
    48  	iter      *pebble.Iterator
    49  	curKey    []byte
    50  	curRawKey []byte
    51  	curVal    []byte
    52  	nextKey   []byte
    53  	err       error
    54  
    55  	engineFile     *File
    56  	keyAdapter     KeyAdapter
    57  	writeBatch     *pebble.Batch
    58  	writeBatchSize int64
    59  }
    60  
    61  func (d *duplicateIter) Seek(key []byte) bool {
    62  	encodedKey := d.keyAdapter.Encode(nil, key, 0, 0)
    63  	if d.err != nil || !d.iter.SeekGE(encodedKey) {
    64  		return false
    65  	}
    66  	d.fill()
    67  	return d.err == nil
    68  }
    69  
    70  func (d *duplicateIter) First() bool {
    71  	if d.err != nil || !d.iter.First() {
    72  		return false
    73  	}
    74  	d.fill()
    75  	return d.err == nil
    76  }
    77  
    78  func (d *duplicateIter) Last() bool {
    79  	if d.err != nil || !d.iter.Last() {
    80  		return false
    81  	}
    82  	d.fill()
    83  	return d.err == nil
    84  }
    85  
    86  func (d *duplicateIter) fill() {
    87  	d.curKey, _, _, d.err = d.keyAdapter.Decode(d.curKey[:0], d.iter.Key())
    88  	d.curRawKey = append(d.curRawKey[:0], d.iter.Key()...)
    89  	d.curVal = append(d.curVal[:0], d.iter.Value()...)
    90  }
    91  
    92  func (d *duplicateIter) flush() {
    93  	d.err = d.writeBatch.Commit(pebble.Sync)
    94  	d.writeBatch.Reset()
    95  	d.writeBatchSize = 0
    96  }
    97  
    98  func (d *duplicateIter) record(key []byte, val []byte) {
    99  	d.engineFile.Duplicates.Inc()
   100  	d.err = d.writeBatch.Set(key, val, nil)
   101  	if d.err != nil {
   102  		return
   103  	}
   104  	d.writeBatchSize += int64(len(key) + len(val))
   105  	if d.writeBatchSize >= maxDuplicateBatchSize {
   106  		d.flush()
   107  	}
   108  }
   109  
   110  func (d *duplicateIter) Next() bool {
   111  	recordFirst := false
   112  	for d.err == nil && d.ctx.Err() == nil && d.iter.Next() {
   113  		d.nextKey, _, _, d.err = d.keyAdapter.Decode(d.nextKey[:0], d.iter.Key())
   114  		if d.err != nil {
   115  			return false
   116  		}
   117  		if !bytes.Equal(d.nextKey, d.curKey) {
   118  			d.curKey, d.nextKey = d.nextKey, d.curKey[:0]
   119  			d.curRawKey = append(d.curRawKey[:0], d.iter.Key()...)
   120  			d.curVal = append(d.curVal[:0], d.iter.Value()...)
   121  			return true
   122  		}
   123  		log.L().Debug("duplicate key detected", logutil.Key("key", d.curKey))
   124  		if !recordFirst {
   125  			d.record(d.curRawKey, d.curVal)
   126  			recordFirst = true
   127  		}
   128  		d.record(d.iter.Key(), d.iter.Value())
   129  	}
   130  	if d.err == nil {
   131  		d.err = d.ctx.Err()
   132  	}
   133  	return false
   134  }
   135  
   136  func (d *duplicateIter) Key() []byte {
   137  	return d.curKey
   138  }
   139  
   140  func (d *duplicateIter) Value() []byte {
   141  	return d.curVal
   142  }
   143  
   144  func (d *duplicateIter) Valid() bool {
   145  	return d.err == nil && d.iter.Valid()
   146  }
   147  
   148  func (d *duplicateIter) Error() error {
   149  	return multierr.Combine(d.iter.Error(), d.err)
   150  }
   151  
   152  func (d *duplicateIter) Close() error {
   153  	if d.err == nil {
   154  		d.flush()
   155  	}
   156  	d.writeBatch.Close()
   157  	return d.iter.Close()
   158  }
   159  
   160  func (d *duplicateIter) OpType() sst.Pair_OP {
   161  	return sst.Pair_Put
   162  }
   163  
   164  var _ kv.Iter = &duplicateIter{}
   165  
   166  func newDuplicateIter(ctx context.Context, engineFile *File, opts *pebble.IterOptions) kv.Iter {
   167  	newOpts := &pebble.IterOptions{TableFilter: opts.TableFilter}
   168  	if len(opts.LowerBound) > 0 {
   169  		newOpts.LowerBound = codec.EncodeBytes(nil, opts.LowerBound)
   170  	}
   171  	if len(opts.UpperBound) > 0 {
   172  		newOpts.UpperBound = codec.EncodeBytes(nil, opts.UpperBound)
   173  	}
   174  	return &duplicateIter{
   175  		ctx:        ctx,
   176  		iter:       engineFile.db.NewIter(newOpts),
   177  		engineFile: engineFile,
   178  		keyAdapter: engineFile.keyAdapter,
   179  		writeBatch: engineFile.duplicateDB.NewBatch(),
   180  	}
   181  }
   182  
   183  func newKeyIter(ctx context.Context, engineFile *File, opts *pebble.IterOptions) kv.Iter {
   184  	if bytes.Compare(opts.LowerBound, normalIterStartKey) < 0 {
   185  		newOpts := *opts
   186  		newOpts.LowerBound = normalIterStartKey
   187  		opts = &newOpts
   188  	}
   189  	if !engineFile.duplicateDetection {
   190  		return pebbleIter{Iterator: engineFile.db.NewIter(opts)}
   191  	}
   192  	return newDuplicateIter(ctx, engineFile, opts)
   193  }