github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/backend/local/iterator_test.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package local
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"math/rand"
    20  	"path/filepath"
    21  	"sort"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/pebble"
    25  	. "github.com/pingcap/check"
    26  
    27  	"github.com/pingcap/br/pkg/lightning/common"
    28  )
    29  
    30  type iteratorSuite struct{}
    31  
    32  var _ = Suite(&iteratorSuite{})
    33  
    34  func (s *iteratorSuite) TestDuplicateIterator(c *C) {
    35  	var pairs []common.KvPair
    36  	prevRowMax := int64(0)
    37  	// Unique pairs.
    38  	for i := 0; i < 20; i++ {
    39  		pairs = append(pairs, common.KvPair{
    40  			Key:    randBytes(32),
    41  			Val:    randBytes(128),
    42  			RowID:  prevRowMax,
    43  			Offset: int64(i * 1234),
    44  		})
    45  		prevRowMax++
    46  	}
    47  	// Duplicate pairs which repeat the same key twice.
    48  	for i := 20; i < 40; i++ {
    49  		key := randBytes(32)
    50  		pairs = append(pairs, common.KvPair{
    51  			Key:    key,
    52  			Val:    randBytes(128),
    53  			RowID:  prevRowMax,
    54  			Offset: int64(i * 1234),
    55  		})
    56  		prevRowMax++
    57  		pairs = append(pairs, common.KvPair{
    58  			Key:    key,
    59  			Val:    randBytes(128),
    60  			RowID:  prevRowMax,
    61  			Offset: int64(i * 1235),
    62  		})
    63  		prevRowMax++
    64  	}
    65  	// Duplicate pairs which repeat the same key three times.
    66  	for i := 40; i < 50; i++ {
    67  		key := randBytes(32)
    68  		pairs = append(pairs, common.KvPair{
    69  			Key:    key,
    70  			Val:    randBytes(128),
    71  			RowID:  prevRowMax,
    72  			Offset: int64(i * 1234),
    73  		})
    74  		prevRowMax++
    75  		pairs = append(pairs, common.KvPair{
    76  			Key:    key,
    77  			Val:    randBytes(128),
    78  			RowID:  prevRowMax,
    79  			Offset: int64(i * 1235),
    80  		})
    81  		prevRowMax++
    82  		pairs = append(pairs, common.KvPair{
    83  			Key:    key,
    84  			Val:    randBytes(128),
    85  			RowID:  prevRowMax,
    86  			Offset: int64(i * 1236),
    87  		})
    88  		prevRowMax++
    89  	}
    90  
    91  	// Find duplicates from the generated pairs.
    92  	var duplicatePairs []common.KvPair
    93  	sort.Slice(pairs, func(i, j int) bool {
    94  		return bytes.Compare(pairs[i].Key, pairs[j].Key) < 0
    95  	})
    96  	uniqueKeys := make([][]byte, 0)
    97  	for i := 0; i < len(pairs); {
    98  		j := i + 1
    99  		for j < len(pairs) && bytes.Equal(pairs[j-1].Key, pairs[j].Key) {
   100  			j++
   101  		}
   102  		uniqueKeys = append(uniqueKeys, pairs[i].Key)
   103  		if i+1 == j {
   104  			i++
   105  			continue
   106  		}
   107  		for k := i; k < j; k++ {
   108  			duplicatePairs = append(duplicatePairs, pairs[k])
   109  		}
   110  		i = j
   111  	}
   112  
   113  	keyAdapter := duplicateKeyAdapter{}
   114  
   115  	// Write pairs to db after shuffling the pairs.
   116  	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
   117  	rnd.Shuffle(len(pairs), func(i, j int) {
   118  		pairs[i], pairs[j] = pairs[j], pairs[i]
   119  	})
   120  	storeDir := c.MkDir()
   121  	db, err := pebble.Open(filepath.Join(storeDir, "kv"), &pebble.Options{})
   122  	c.Assert(err, IsNil)
   123  	wb := db.NewBatch()
   124  	for _, p := range pairs {
   125  		key := keyAdapter.Encode(nil, p.Key, 1, p.Offset)
   126  		c.Assert(wb.Set(key, p.Val, nil), IsNil)
   127  	}
   128  	c.Assert(wb.Commit(pebble.Sync), IsNil)
   129  
   130  	duplicateDB, err := pebble.Open(filepath.Join(storeDir, "duplicates"), &pebble.Options{})
   131  	c.Assert(err, IsNil)
   132  	engineFile := &File{
   133  		ctx:         context.Background(),
   134  		db:          db,
   135  		keyAdapter:  keyAdapter,
   136  		duplicateDB: duplicateDB,
   137  	}
   138  	iter := newDuplicateIter(context.Background(), engineFile, &pebble.IterOptions{})
   139  	sort.Slice(pairs, func(i, j int) bool {
   140  		key1 := keyAdapter.Encode(nil, pairs[i].Key, pairs[i].RowID, pairs[i].Offset)
   141  		key2 := keyAdapter.Encode(nil, pairs[j].Key, pairs[j].RowID, pairs[j].Offset)
   142  		return bytes.Compare(key1, key2) < 0
   143  	})
   144  
   145  	// Verify first pair.
   146  	c.Assert(iter.First(), IsTrue)
   147  	c.Assert(iter.Valid(), IsTrue)
   148  	c.Assert(iter.Key(), BytesEquals, pairs[0].Key)
   149  	c.Assert(iter.Value(), BytesEquals, pairs[0].Val)
   150  
   151  	// Verify last pair.
   152  	c.Assert(iter.Last(), IsTrue)
   153  	c.Assert(iter.Valid(), IsTrue)
   154  	c.Assert(iter.Key(), BytesEquals, pairs[len(pairs)-1].Key)
   155  	c.Assert(iter.Value(), BytesEquals, pairs[len(pairs)-1].Val)
   156  
   157  	// Iterate all keys and check the count of unique keys.
   158  	for iter.First(); iter.Valid(); iter.Next() {
   159  		c.Assert(iter.Key(), BytesEquals, uniqueKeys[0])
   160  		uniqueKeys = uniqueKeys[1:]
   161  	}
   162  	c.Assert(iter.Error(), IsNil)
   163  	c.Assert(len(uniqueKeys), Equals, 0)
   164  	c.Assert(iter.Close(), IsNil)
   165  	c.Assert(engineFile.Close(), IsNil)
   166  
   167  	// Check duplicates detected by duplicate iterator.
   168  	iter = pebbleIter{Iterator: duplicateDB.NewIter(&pebble.IterOptions{})}
   169  	var detectedPairs []common.KvPair
   170  	for iter.First(); iter.Valid(); iter.Next() {
   171  		key, _, _, err := keyAdapter.Decode(nil, iter.Key())
   172  		c.Assert(err, IsNil)
   173  		detectedPairs = append(detectedPairs, common.KvPair{
   174  			Key: key,
   175  			Val: append([]byte{}, iter.Value()...),
   176  		})
   177  	}
   178  	c.Assert(iter.Error(), IsNil)
   179  	c.Assert(iter.Close(), IsNil)
   180  	c.Assert(duplicateDB.Close(), IsNil)
   181  	c.Assert(len(detectedPairs), Equals, len(duplicatePairs))
   182  
   183  	sort.Slice(duplicatePairs, func(i, j int) bool {
   184  		keyCmp := bytes.Compare(duplicatePairs[i].Key, duplicatePairs[j].Key)
   185  		return keyCmp < 0 || keyCmp == 0 && bytes.Compare(duplicatePairs[i].Val, duplicatePairs[j].Val) < 0
   186  	})
   187  	sort.Slice(detectedPairs, func(i, j int) bool {
   188  		keyCmp := bytes.Compare(detectedPairs[i].Key, detectedPairs[j].Key)
   189  		return keyCmp < 0 || keyCmp == 0 && bytes.Compare(detectedPairs[i].Val, detectedPairs[j].Val) < 0
   190  	})
   191  	for i := 0; i < len(detectedPairs); i++ {
   192  		c.Assert(detectedPairs[i].Key, BytesEquals, duplicatePairs[i].Key)
   193  		c.Assert(detectedPairs[i].Val, BytesEquals, duplicatePairs[i].Val)
   194  	}
   195  }
   196  
   197  func (s *iteratorSuite) TestDuplicateIterSeek(c *C) {
   198  	pairs := []common.KvPair{
   199  		{
   200  			Key:    []byte{1, 2, 3, 0},
   201  			Val:    randBytes(128),
   202  			RowID:  1,
   203  			Offset: 0,
   204  		},
   205  		{
   206  			Key:    []byte{1, 2, 3, 1},
   207  			Val:    randBytes(128),
   208  			RowID:  2,
   209  			Offset: 100,
   210  		},
   211  		{
   212  			Key:    []byte{1, 2, 3, 1},
   213  			Val:    randBytes(128),
   214  			RowID:  3,
   215  			Offset: 200,
   216  		},
   217  		{
   218  			Key:    []byte{1, 2, 3, 2},
   219  			Val:    randBytes(128),
   220  			RowID:  4,
   221  			Offset: 300,
   222  		},
   223  	}
   224  
   225  	storeDir := c.MkDir()
   226  	db, err := pebble.Open(filepath.Join(storeDir, "kv"), &pebble.Options{})
   227  	c.Assert(err, IsNil)
   228  
   229  	keyAdapter := duplicateKeyAdapter{}
   230  	wb := db.NewBatch()
   231  	for _, p := range pairs {
   232  		key := keyAdapter.Encode(nil, p.Key, p.RowID, p.Offset)
   233  		c.Assert(wb.Set(key, p.Val, nil), IsNil)
   234  	}
   235  	c.Assert(wb.Commit(pebble.Sync), IsNil)
   236  
   237  	duplicateDB, err := pebble.Open(filepath.Join(storeDir, "duplicates"), &pebble.Options{})
   238  	c.Assert(err, IsNil)
   239  	engineFile := &File{
   240  		ctx:         context.Background(),
   241  		db:          db,
   242  		keyAdapter:  keyAdapter,
   243  		duplicateDB: duplicateDB,
   244  	}
   245  	iter := newDuplicateIter(context.Background(), engineFile, &pebble.IterOptions{})
   246  
   247  	c.Assert(iter.Seek([]byte{1, 2, 3, 1}), IsTrue)
   248  	c.Assert(iter.Value(), BytesEquals, pairs[1].Val)
   249  	c.Assert(iter.Next(), IsTrue)
   250  	c.Assert(iter.Value(), BytesEquals, pairs[3].Val)
   251  	c.Assert(iter.Close(), IsNil)
   252  	c.Assert(engineFile.Close(), IsNil)
   253  	c.Assert(duplicateDB.Close(), IsNil)
   254  }