github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/backend/local/iterator_test.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package local 15 16 import ( 17 "bytes" 18 "context" 19 "math/rand" 20 "path/filepath" 21 "sort" 22 "time" 23 24 "github.com/cockroachdb/pebble" 25 . "github.com/pingcap/check" 26 27 "github.com/pingcap/br/pkg/lightning/common" 28 ) 29 30 type iteratorSuite struct{} 31 32 var _ = Suite(&iteratorSuite{}) 33 34 func (s *iteratorSuite) TestDuplicateIterator(c *C) { 35 var pairs []common.KvPair 36 prevRowMax := int64(0) 37 // Unique pairs. 38 for i := 0; i < 20; i++ { 39 pairs = append(pairs, common.KvPair{ 40 Key: randBytes(32), 41 Val: randBytes(128), 42 RowID: prevRowMax, 43 Offset: int64(i * 1234), 44 }) 45 prevRowMax++ 46 } 47 // Duplicate pairs which repeat the same key twice. 48 for i := 20; i < 40; i++ { 49 key := randBytes(32) 50 pairs = append(pairs, common.KvPair{ 51 Key: key, 52 Val: randBytes(128), 53 RowID: prevRowMax, 54 Offset: int64(i * 1234), 55 }) 56 prevRowMax++ 57 pairs = append(pairs, common.KvPair{ 58 Key: key, 59 Val: randBytes(128), 60 RowID: prevRowMax, 61 Offset: int64(i * 1235), 62 }) 63 prevRowMax++ 64 } 65 // Duplicate pairs which repeat the same key three times. 66 for i := 40; i < 50; i++ { 67 key := randBytes(32) 68 pairs = append(pairs, common.KvPair{ 69 Key: key, 70 Val: randBytes(128), 71 RowID: prevRowMax, 72 Offset: int64(i * 1234), 73 }) 74 prevRowMax++ 75 pairs = append(pairs, common.KvPair{ 76 Key: key, 77 Val: randBytes(128), 78 RowID: prevRowMax, 79 Offset: int64(i * 1235), 80 }) 81 prevRowMax++ 82 pairs = append(pairs, common.KvPair{ 83 Key: key, 84 Val: randBytes(128), 85 RowID: prevRowMax, 86 Offset: int64(i * 1236), 87 }) 88 prevRowMax++ 89 } 90 91 // Find duplicates from the generated pairs. 92 var duplicatePairs []common.KvPair 93 sort.Slice(pairs, func(i, j int) bool { 94 return bytes.Compare(pairs[i].Key, pairs[j].Key) < 0 95 }) 96 uniqueKeys := make([][]byte, 0) 97 for i := 0; i < len(pairs); { 98 j := i + 1 99 for j < len(pairs) && bytes.Equal(pairs[j-1].Key, pairs[j].Key) { 100 j++ 101 } 102 uniqueKeys = append(uniqueKeys, pairs[i].Key) 103 if i+1 == j { 104 i++ 105 continue 106 } 107 for k := i; k < j; k++ { 108 duplicatePairs = append(duplicatePairs, pairs[k]) 109 } 110 i = j 111 } 112 113 keyAdapter := duplicateKeyAdapter{} 114 115 // Write pairs to db after shuffling the pairs. 116 rnd := rand.New(rand.NewSource(time.Now().UnixNano())) 117 rnd.Shuffle(len(pairs), func(i, j int) { 118 pairs[i], pairs[j] = pairs[j], pairs[i] 119 }) 120 storeDir := c.MkDir() 121 db, err := pebble.Open(filepath.Join(storeDir, "kv"), &pebble.Options{}) 122 c.Assert(err, IsNil) 123 wb := db.NewBatch() 124 for _, p := range pairs { 125 key := keyAdapter.Encode(nil, p.Key, 1, p.Offset) 126 c.Assert(wb.Set(key, p.Val, nil), IsNil) 127 } 128 c.Assert(wb.Commit(pebble.Sync), IsNil) 129 130 duplicateDB, err := pebble.Open(filepath.Join(storeDir, "duplicates"), &pebble.Options{}) 131 c.Assert(err, IsNil) 132 engineFile := &File{ 133 ctx: context.Background(), 134 db: db, 135 keyAdapter: keyAdapter, 136 duplicateDB: duplicateDB, 137 } 138 iter := newDuplicateIter(context.Background(), engineFile, &pebble.IterOptions{}) 139 sort.Slice(pairs, func(i, j int) bool { 140 key1 := keyAdapter.Encode(nil, pairs[i].Key, pairs[i].RowID, pairs[i].Offset) 141 key2 := keyAdapter.Encode(nil, pairs[j].Key, pairs[j].RowID, pairs[j].Offset) 142 return bytes.Compare(key1, key2) < 0 143 }) 144 145 // Verify first pair. 146 c.Assert(iter.First(), IsTrue) 147 c.Assert(iter.Valid(), IsTrue) 148 c.Assert(iter.Key(), BytesEquals, pairs[0].Key) 149 c.Assert(iter.Value(), BytesEquals, pairs[0].Val) 150 151 // Verify last pair. 152 c.Assert(iter.Last(), IsTrue) 153 c.Assert(iter.Valid(), IsTrue) 154 c.Assert(iter.Key(), BytesEquals, pairs[len(pairs)-1].Key) 155 c.Assert(iter.Value(), BytesEquals, pairs[len(pairs)-1].Val) 156 157 // Iterate all keys and check the count of unique keys. 158 for iter.First(); iter.Valid(); iter.Next() { 159 c.Assert(iter.Key(), BytesEquals, uniqueKeys[0]) 160 uniqueKeys = uniqueKeys[1:] 161 } 162 c.Assert(iter.Error(), IsNil) 163 c.Assert(len(uniqueKeys), Equals, 0) 164 c.Assert(iter.Close(), IsNil) 165 c.Assert(engineFile.Close(), IsNil) 166 167 // Check duplicates detected by duplicate iterator. 168 iter = pebbleIter{Iterator: duplicateDB.NewIter(&pebble.IterOptions{})} 169 var detectedPairs []common.KvPair 170 for iter.First(); iter.Valid(); iter.Next() { 171 key, _, _, err := keyAdapter.Decode(nil, iter.Key()) 172 c.Assert(err, IsNil) 173 detectedPairs = append(detectedPairs, common.KvPair{ 174 Key: key, 175 Val: append([]byte{}, iter.Value()...), 176 }) 177 } 178 c.Assert(iter.Error(), IsNil) 179 c.Assert(iter.Close(), IsNil) 180 c.Assert(duplicateDB.Close(), IsNil) 181 c.Assert(len(detectedPairs), Equals, len(duplicatePairs)) 182 183 sort.Slice(duplicatePairs, func(i, j int) bool { 184 keyCmp := bytes.Compare(duplicatePairs[i].Key, duplicatePairs[j].Key) 185 return keyCmp < 0 || keyCmp == 0 && bytes.Compare(duplicatePairs[i].Val, duplicatePairs[j].Val) < 0 186 }) 187 sort.Slice(detectedPairs, func(i, j int) bool { 188 keyCmp := bytes.Compare(detectedPairs[i].Key, detectedPairs[j].Key) 189 return keyCmp < 0 || keyCmp == 0 && bytes.Compare(detectedPairs[i].Val, detectedPairs[j].Val) < 0 190 }) 191 for i := 0; i < len(detectedPairs); i++ { 192 c.Assert(detectedPairs[i].Key, BytesEquals, duplicatePairs[i].Key) 193 c.Assert(detectedPairs[i].Val, BytesEquals, duplicatePairs[i].Val) 194 } 195 } 196 197 func (s *iteratorSuite) TestDuplicateIterSeek(c *C) { 198 pairs := []common.KvPair{ 199 { 200 Key: []byte{1, 2, 3, 0}, 201 Val: randBytes(128), 202 RowID: 1, 203 Offset: 0, 204 }, 205 { 206 Key: []byte{1, 2, 3, 1}, 207 Val: randBytes(128), 208 RowID: 2, 209 Offset: 100, 210 }, 211 { 212 Key: []byte{1, 2, 3, 1}, 213 Val: randBytes(128), 214 RowID: 3, 215 Offset: 200, 216 }, 217 { 218 Key: []byte{1, 2, 3, 2}, 219 Val: randBytes(128), 220 RowID: 4, 221 Offset: 300, 222 }, 223 } 224 225 storeDir := c.MkDir() 226 db, err := pebble.Open(filepath.Join(storeDir, "kv"), &pebble.Options{}) 227 c.Assert(err, IsNil) 228 229 keyAdapter := duplicateKeyAdapter{} 230 wb := db.NewBatch() 231 for _, p := range pairs { 232 key := keyAdapter.Encode(nil, p.Key, p.RowID, p.Offset) 233 c.Assert(wb.Set(key, p.Val, nil), IsNil) 234 } 235 c.Assert(wb.Commit(pebble.Sync), IsNil) 236 237 duplicateDB, err := pebble.Open(filepath.Join(storeDir, "duplicates"), &pebble.Options{}) 238 c.Assert(err, IsNil) 239 engineFile := &File{ 240 ctx: context.Background(), 241 db: db, 242 keyAdapter: keyAdapter, 243 duplicateDB: duplicateDB, 244 } 245 iter := newDuplicateIter(context.Background(), engineFile, &pebble.IterOptions{}) 246 247 c.Assert(iter.Seek([]byte{1, 2, 3, 1}), IsTrue) 248 c.Assert(iter.Value(), BytesEquals, pairs[1].Val) 249 c.Assert(iter.Next(), IsTrue) 250 c.Assert(iter.Value(), BytesEquals, pairs[3].Val) 251 c.Assert(iter.Close(), IsNil) 252 c.Assert(engineFile.Close(), IsNil) 253 c.Assert(duplicateDB.Close(), IsNil) 254 }