github.com/ledgerwatch/erigon-lib@v1.0.0/etl/etl_test.go (about) 1 /* 2 Copyright 2021 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 package etl 17 18 import ( 19 "bytes" 20 "encoding/hex" 21 "encoding/json" 22 "fmt" 23 "io" 24 "os" 25 "strings" 26 "testing" 27 28 "github.com/ledgerwatch/erigon-lib/kv" 29 "github.com/ledgerwatch/erigon-lib/kv/memdb" 30 "github.com/ledgerwatch/log/v3" 31 "github.com/stretchr/testify/assert" 32 "github.com/stretchr/testify/require" 33 ) 34 35 func decodeHex(in string) []byte { 36 payload, err := hex.DecodeString(in) 37 if err != nil { 38 panic(err) 39 } 40 return payload 41 } 42 43 func TestEmptyValueIsNotANil(t *testing.T) { 44 logger := log.New() 45 t.Run("sortable", func(t *testing.T) { 46 collector := NewCollector(t.Name(), "", NewSortableBuffer(1), logger) 47 defer collector.Close() 48 require := require.New(t) 49 require.NoError(collector.Collect([]byte{1}, []byte{})) 50 require.NoError(collector.Collect([]byte{2}, nil)) 51 require.NoError(collector.Load(nil, "", func(k, v []byte, table CurrentTableReader, next LoadNextFunc) error { 52 if k[0] == 1 { 53 require.Equal([]byte{}, v) 54 } else { 55 require.Nil(v) 56 } 57 return nil 58 }, TransformArgs{})) 59 }) 60 t.Run("append", func(t *testing.T) { 61 // append buffer doesn't support nil values 62 collector := NewCollector(t.Name(), "", NewAppendBuffer(1), logger) 63 defer collector.Close() 64 require := require.New(t) 65 require.NoError(collector.Collect([]byte{1}, []byte{})) 66 require.NoError(collector.Collect([]byte{2}, nil)) 67 require.NoError(collector.Load(nil, "", func(k, v []byte, table CurrentTableReader, next LoadNextFunc) error { 68 require.Nil(v) 69 return nil 70 }, TransformArgs{})) 71 }) 72 t.Run("oldest", func(t *testing.T) { 73 collector := NewCollector(t.Name(), "", NewOldestEntryBuffer(1), logger) 74 defer collector.Close() 75 require := require.New(t) 76 require.NoError(collector.Collect([]byte{1}, []byte{})) 77 require.NoError(collector.Collect([]byte{2}, nil)) 78 require.NoError(collector.Load(nil, "", func(k, v []byte, table CurrentTableReader, next LoadNextFunc) error { 79 if k[0] == 1 { 80 require.Equal([]byte{}, v) 81 } else { 82 require.Nil(v) 83 } 84 return nil 85 }, TransformArgs{})) 86 }) 87 } 88 89 func TestEmptyKeyValue(t *testing.T) { 90 logger := log.New() 91 _, tx := memdb.NewTestTx(t) 92 require := require.New(t) 93 table := kv.ChaindataTables[0] 94 collector := NewCollector(t.Name(), "", NewSortableBuffer(1), logger) 95 defer collector.Close() 96 require.NoError(collector.Collect([]byte{2}, []byte{})) 97 require.NoError(collector.Collect([]byte{1}, []byte{1})) 98 require.NoError(collector.Load(tx, table, IdentityLoadFunc, TransformArgs{})) 99 v, err := tx.GetOne(table, []byte{2}) 100 require.NoError(err) 101 require.Equal([]byte{}, v) 102 v, err = tx.GetOne(table, []byte{1}) 103 require.NoError(err) 104 require.Equal([]byte{1}, v) 105 106 collector = NewCollector(t.Name(), "", NewSortableBuffer(1), logger) 107 defer collector.Close() 108 require.NoError(collector.Collect([]byte{}, nil)) 109 require.NoError(collector.Load(tx, table, IdentityLoadFunc, TransformArgs{})) 110 v, err = tx.GetOne(table, []byte{}) 111 require.NoError(err) 112 require.Nil(v) 113 } 114 115 func TestWriteAndReadBufferEntry(t *testing.T) { 116 b := NewSortableBuffer(128) 117 buffer := bytes.NewBuffer(make([]byte, 0)) 118 119 entries := make([]sortableBufferEntry, 100) 120 for i := range entries { 121 entries[i].key = []byte(fmt.Sprintf("key-%d", i)) 122 entries[i].value = []byte(fmt.Sprintf("value-%d", i)) 123 b.Put(entries[i].key, entries[i].value) 124 } 125 126 if err := b.Write(buffer); err != nil { 127 t.Error(err) 128 } 129 130 bb := buffer.Bytes() 131 132 readBuffer := bytes.NewReader(bb) 133 134 for i := range entries { 135 k, v, err := readElementFromDisk(readBuffer, readBuffer, nil, nil) 136 if err != nil { 137 t.Error(err) 138 } 139 assert.Equal(t, string(entries[i].key), string(k)) 140 assert.Equal(t, string(entries[i].value), string(v)) 141 } 142 143 _, _, err := readElementFromDisk(readBuffer, readBuffer, nil, nil) 144 assert.Equal(t, io.EOF, err) 145 } 146 147 func TestNextKey(t *testing.T) { 148 for _, tc := range []string{ 149 "00000001->00000002", 150 "000000FF->00000100", 151 "FEFFFFFF->FF000000", 152 } { 153 parts := strings.Split(tc, "->") 154 input := decodeHex(parts[0]) 155 expectedOutput := decodeHex(parts[1]) 156 actualOutput, err := NextKey(input) 157 assert.NoError(t, err) 158 assert.Equal(t, expectedOutput, actualOutput) 159 } 160 } 161 162 func TestNextKeyErr(t *testing.T) { 163 for _, tc := range []string{ 164 "", 165 "FFFFFF", 166 } { 167 input := decodeHex(tc) 168 _, err := NextKey(input) 169 assert.Error(t, err) 170 } 171 } 172 173 func TestFileDataProviders(t *testing.T) { 174 logger := log.New() 175 // test invariant when we go through files (> 1 buffer) 176 _, tx := memdb.NewTestTx(t) 177 sourceBucket := kv.ChaindataTables[0] 178 179 generateTestData(t, tx, sourceBucket, 10) 180 181 collector := NewCollector(t.Name(), "", NewSortableBuffer(1), logger) 182 183 err := extractBucketIntoFiles("logPrefix", tx, sourceBucket, nil, nil, collector, testExtractToMapFunc, nil, nil, logger) 184 assert.NoError(t, err) 185 186 assert.Equal(t, 10, len(collector.dataProviders)) 187 188 for _, p := range collector.dataProviders { 189 fp, ok := p.(*fileDataProvider) 190 assert.True(t, ok) 191 err := fp.Wait() 192 require.NoError(t, err) 193 _, err = os.Stat(fp.file.Name()) 194 assert.NoError(t, err) 195 } 196 197 collector.Close() 198 199 for _, p := range collector.dataProviders { 200 fp, ok := p.(*fileDataProvider) 201 assert.True(t, ok) 202 _, err = os.Stat(fp.file.Name()) 203 assert.True(t, os.IsNotExist(err)) 204 } 205 } 206 207 func TestRAMDataProviders(t *testing.T) { 208 logger := log.New() 209 // test invariant when we go through memory (1 buffer) 210 _, tx := memdb.NewTestTx(t) 211 sourceBucket := kv.ChaindataTables[0] 212 generateTestData(t, tx, sourceBucket, 10) 213 214 collector := NewCollector(t.Name(), "", NewSortableBuffer(BufferOptimalSize), logger) 215 err := extractBucketIntoFiles("logPrefix", tx, sourceBucket, nil, nil, collector, testExtractToMapFunc, nil, nil, logger) 216 assert.NoError(t, err) 217 218 assert.Equal(t, 1, len(collector.dataProviders)) 219 220 for _, p := range collector.dataProviders { 221 mp, ok := p.(*memoryDataProvider) 222 assert.True(t, ok) 223 assert.Equal(t, 10, mp.buffer.Len()) 224 } 225 } 226 227 func TestTransformRAMOnly(t *testing.T) { 228 logger := log.New() 229 // test invariant when we only have one buffer and it fits into RAM (exactly 1 buffer) 230 _, tx := memdb.NewTestTx(t) 231 232 sourceBucket := kv.ChaindataTables[0] 233 destBucket := kv.ChaindataTables[1] 234 generateTestData(t, tx, sourceBucket, 20) 235 err := Transform( 236 "logPrefix", 237 tx, 238 sourceBucket, 239 destBucket, 240 "", // temp dir 241 testExtractToMapFunc, 242 testLoadFromMapFunc, 243 TransformArgs{}, 244 logger, 245 ) 246 assert.Nil(t, err) 247 compareBuckets(t, tx, sourceBucket, destBucket, nil) 248 } 249 250 func TestEmptySourceBucket(t *testing.T) { 251 logger := log.New() 252 _, tx := memdb.NewTestTx(t) 253 sourceBucket := kv.ChaindataTables[0] 254 destBucket := kv.ChaindataTables[1] 255 err := Transform( 256 "logPrefix", 257 tx, 258 sourceBucket, 259 destBucket, 260 "", // temp dir 261 testExtractToMapFunc, 262 testLoadFromMapFunc, 263 TransformArgs{}, 264 logger, 265 ) 266 assert.Nil(t, err) 267 compareBuckets(t, tx, sourceBucket, destBucket, nil) 268 } 269 270 func TestTransformExtractStartKey(t *testing.T) { 271 logger := log.New() 272 // test invariant when we only have one buffer and it fits into RAM (exactly 1 buffer) 273 _, tx := memdb.NewTestTx(t) 274 sourceBucket := kv.ChaindataTables[0] 275 destBucket := kv.ChaindataTables[1] 276 generateTestData(t, tx, sourceBucket, 10) 277 err := Transform( 278 "logPrefix", 279 tx, 280 sourceBucket, 281 destBucket, 282 "", // temp dir 283 testExtractToMapFunc, 284 testLoadFromMapFunc, 285 TransformArgs{ExtractStartKey: []byte(fmt.Sprintf("%10d-key-%010d", 5, 5))}, 286 logger, 287 ) 288 assert.Nil(t, err) 289 compareBuckets(t, tx, sourceBucket, destBucket, []byte(fmt.Sprintf("%10d-key-%010d", 5, 5))) 290 } 291 292 func TestTransformThroughFiles(t *testing.T) { 293 logger := log.New() 294 // test invariant when we go through files (> 1 buffer) 295 _, tx := memdb.NewTestTx(t) 296 sourceBucket := kv.ChaindataTables[0] 297 destBucket := kv.ChaindataTables[1] 298 generateTestData(t, tx, sourceBucket, 10) 299 err := Transform( 300 "logPrefix", 301 tx, 302 sourceBucket, 303 destBucket, 304 "", // temp dir 305 testExtractToMapFunc, 306 testLoadFromMapFunc, 307 TransformArgs{ 308 BufferSize: 1, 309 }, 310 logger, 311 ) 312 assert.Nil(t, err) 313 compareBuckets(t, tx, sourceBucket, destBucket, nil) 314 } 315 316 func TestTransformDoubleOnExtract(t *testing.T) { 317 logger := log.New() 318 // test invariant when extractFunc multiplies the data 2x 319 _, tx := memdb.NewTestTx(t) 320 sourceBucket := kv.ChaindataTables[0] 321 destBucket := kv.ChaindataTables[1] 322 generateTestData(t, tx, sourceBucket, 10) 323 err := Transform( 324 "logPrefix", 325 tx, 326 sourceBucket, 327 destBucket, 328 "", // temp dir 329 testExtractDoubleToMapFunc, 330 testLoadFromMapFunc, 331 TransformArgs{}, 332 logger, 333 ) 334 assert.Nil(t, err) 335 compareBucketsDouble(t, tx, sourceBucket, destBucket) 336 } 337 338 func TestTransformDoubleOnLoad(t *testing.T) { 339 logger := log.New() 340 // test invariant when loadFunc multiplies the data 2x 341 _, tx := memdb.NewTestTx(t) 342 sourceBucket := kv.ChaindataTables[0] 343 destBucket := kv.ChaindataTables[1] 344 generateTestData(t, tx, sourceBucket, 10) 345 err := Transform( 346 "logPrefix", 347 tx, 348 sourceBucket, 349 destBucket, 350 "", // temp dir 351 testExtractToMapFunc, 352 testLoadFromMapDoubleFunc, 353 TransformArgs{}, 354 logger, 355 ) 356 assert.Nil(t, err) 357 compareBucketsDouble(t, tx, sourceBucket, destBucket) 358 } 359 360 func generateTestData(t *testing.T, db kv.Putter, bucket string, count int) { 361 t.Helper() 362 for i := 0; i < count; i++ { 363 k := []byte(fmt.Sprintf("%10d-key-%010d", i, i)) 364 v := []byte(fmt.Sprintf("val-%099d", i)) 365 err := db.Put(bucket, k, v) 366 assert.NoError(t, err) 367 } 368 } 369 370 func testExtractToMapFunc(k, v []byte, next ExtractNextFunc) error { 371 valueMap := make(map[string][]byte) 372 valueMap["value"] = v 373 out, err := json.Marshal(valueMap) 374 if err != nil { 375 return err 376 } 377 return next(k, k, out) 378 } 379 380 func testExtractDoubleToMapFunc(k, v []byte, next ExtractNextFunc) error { 381 var err error 382 valueMap := make(map[string][]byte) 383 valueMap["value"] = append(v, 0xAA) 384 k1 := append(k, 0xAA) 385 out, err := json.Marshal(valueMap) 386 if err != nil { 387 panic(err) 388 } 389 390 err = next(k, k1, out) 391 if err != nil { 392 return err 393 } 394 395 valueMap = make(map[string][]byte) 396 valueMap["value"] = append(v, 0xBB) 397 k2 := append(k, 0xBB) 398 out, err = json.Marshal(valueMap) 399 if err != nil { 400 panic(err) 401 } 402 return next(k, k2, out) 403 } 404 405 func testLoadFromMapFunc(k []byte, v []byte, _ CurrentTableReader, next LoadNextFunc) error { 406 valueMap := make(map[string][]byte) 407 err := json.Unmarshal(v, &valueMap) 408 if err != nil { 409 return err 410 } 411 realValue := valueMap["value"] 412 return next(k, k, realValue) 413 } 414 415 func testLoadFromMapDoubleFunc(k []byte, v []byte, _ CurrentTableReader, next LoadNextFunc) error { 416 valueMap := make(map[string][]byte) 417 err := json.Unmarshal(v, &valueMap) 418 if err != nil { 419 return err 420 } 421 realValue := valueMap["value"] 422 423 err = next(k, append(k, 0xAA), append(realValue, 0xAA)) 424 if err != nil { 425 return err 426 } 427 return next(k, append(k, 0xBB), append(realValue, 0xBB)) 428 } 429 430 func compareBuckets(t *testing.T, db kv.Tx, b1, b2 string, startKey []byte) { 431 t.Helper() 432 b1Map := make(map[string]string) 433 err := db.ForEach(b1, startKey, func(k, v []byte) error { 434 b1Map[fmt.Sprintf("%x", k)] = fmt.Sprintf("%x", v) 435 return nil 436 }) 437 assert.NoError(t, err) 438 b2Map := make(map[string]string) 439 err = db.ForEach(b2, nil, func(k, v []byte) error { 440 b2Map[fmt.Sprintf("%x", k)] = fmt.Sprintf("%x", v) 441 return nil 442 }) 443 assert.NoError(t, err) 444 assert.Equal(t, b1Map, b2Map) 445 } 446 447 func compareBucketsDouble(t *testing.T, db kv.Tx, b1, b2 string) { 448 t.Helper() 449 b1Map := make(map[string]string) 450 err := db.ForEach(b1, nil, func(k, v []byte) error { 451 b1Map[fmt.Sprintf("%x", append(k, 0xAA))] = fmt.Sprintf("%x", append(v, 0xAA)) 452 b1Map[fmt.Sprintf("%x", append(k, 0xBB))] = fmt.Sprintf("%x", append(v, 0xBB)) 453 return nil 454 }) 455 assert.NoError(t, err) 456 b2Map := make(map[string]string) 457 err = db.ForEach(b2, nil, func(k, v []byte) error { 458 b2Map[fmt.Sprintf("%x", k)] = fmt.Sprintf("%x", v) 459 return nil 460 }) 461 assert.NoError(t, err) 462 assert.Equal(t, b1Map, b2Map) 463 } 464 465 func TestReuseCollectorAfterLoad(t *testing.T) { 466 logger := log.New() 467 buf := NewSortableBuffer(128) 468 c := NewCollector("", t.TempDir(), buf, logger) 469 470 err := c.Collect([]byte{1}, []byte{2}) 471 require.NoError(t, err) 472 see := 0 473 err = c.Load(nil, "", func(k, v []byte, table CurrentTableReader, next LoadNextFunc) error { 474 see++ 475 return nil 476 }, TransformArgs{}) 477 require.NoError(t, err) 478 require.Equal(t, 1, see) 479 480 // buffers are not lost 481 require.Zero(t, len(buf.data)) 482 require.Zero(t, len(buf.lens)) 483 require.Zero(t, len(buf.offsets)) 484 require.NotZero(t, cap(buf.data)) 485 require.NotZero(t, cap(buf.lens)) 486 require.NotZero(t, cap(buf.offsets)) 487 488 // teset that no data visible 489 see = 0 490 err = c.Load(nil, "", func(k, v []byte, table CurrentTableReader, next LoadNextFunc) error { 491 see++ 492 return nil 493 }, TransformArgs{}) 494 require.NoError(t, err) 495 require.Equal(t, 0, see) 496 497 // reuse 498 see = 0 499 err = c.Load(nil, "", func(k, v []byte, table CurrentTableReader, next LoadNextFunc) error { 500 see++ 501 return nil 502 }, TransformArgs{}) 503 require.NoError(t, err) 504 require.Equal(t, 0, see) 505 506 err = c.Collect([]byte{3}, []byte{4}) 507 require.NoError(t, err) 508 see = 0 509 err = c.Load(nil, "", func(k, v []byte, table CurrentTableReader, next LoadNextFunc) error { 510 see++ 511 return nil 512 }, TransformArgs{}) 513 require.NoError(t, err) 514 require.Equal(t, 1, see) 515 }