github.com/daragao/go-ethereum@v1.8.14-0.20180809141559-45eaef243198/swarm/bmt/bmt_test.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package bmt 18 19 import ( 20 "bytes" 21 crand "crypto/rand" 22 "encoding/binary" 23 "fmt" 24 "io" 25 "math/rand" 26 "sync" 27 "sync/atomic" 28 "testing" 29 "time" 30 31 "github.com/ethereum/go-ethereum/crypto/sha3" 32 ) 33 34 // the actual data length generated (could be longer than max datalength of the BMT) 35 const BufferSize = 4128 36 37 var counts = []int{1, 2, 3, 4, 5, 8, 9, 15, 16, 17, 32, 37, 42, 53, 63, 64, 65, 111, 127, 128} 38 39 // calculates the Keccak256 SHA3 hash of the data 40 func sha3hash(data ...[]byte) []byte { 41 h := sha3.NewKeccak256() 42 return doSum(h, nil, data...) 43 } 44 45 // TestRefHasher tests that the RefHasher computes the expected BMT hash for 46 // some small data lengths 47 func TestRefHasher(t *testing.T) { 48 // the test struct is used to specify the expected BMT hash for 49 // segment counts between from and to and lengths from 1 to datalength 50 type test struct { 51 from int 52 to int 53 expected func([]byte) []byte 54 } 55 56 var tests []*test 57 // all lengths in [0,64] should be: 58 // 59 // sha3hash(data) 60 // 61 tests = append(tests, &test{ 62 from: 1, 63 to: 2, 64 expected: func(d []byte) []byte { 65 data := make([]byte, 64) 66 copy(data, d) 67 return sha3hash(data) 68 }, 69 }) 70 71 // all lengths in [3,4] should be: 72 // 73 // sha3hash( 74 // sha3hash(data[:64]) 75 // sha3hash(data[64:]) 76 // ) 77 // 78 tests = append(tests, &test{ 79 from: 3, 80 to: 4, 81 expected: func(d []byte) []byte { 82 data := make([]byte, 128) 83 copy(data, d) 84 return sha3hash(sha3hash(data[:64]), sha3hash(data[64:])) 85 }, 86 }) 87 88 // all segmentCounts in [5,8] should be: 89 // 90 // sha3hash( 91 // sha3hash( 92 // sha3hash(data[:64]) 93 // sha3hash(data[64:128]) 94 // ) 95 // sha3hash( 96 // sha3hash(data[128:192]) 97 // sha3hash(data[192:]) 98 // ) 99 // ) 100 // 101 tests = append(tests, &test{ 102 from: 5, 103 to: 8, 104 expected: func(d []byte) []byte { 105 data := make([]byte, 256) 106 copy(data, d) 107 return sha3hash(sha3hash(sha3hash(data[:64]), sha3hash(data[64:128])), sha3hash(sha3hash(data[128:192]), sha3hash(data[192:]))) 108 }, 109 }) 110 111 // run the tests 112 for _, x := range tests { 113 for segmentCount := x.from; segmentCount <= x.to; segmentCount++ { 114 for length := 1; length <= segmentCount*32; length++ { 115 t.Run(fmt.Sprintf("%d_segments_%d_bytes", segmentCount, length), func(t *testing.T) { 116 data := make([]byte, length) 117 if _, err := io.ReadFull(crand.Reader, data); err != nil && err != io.EOF { 118 t.Fatal(err) 119 } 120 expected := x.expected(data) 121 actual := NewRefHasher(sha3.NewKeccak256, segmentCount).Hash(data) 122 if !bytes.Equal(actual, expected) { 123 t.Fatalf("expected %x, got %x", expected, actual) 124 } 125 }) 126 } 127 } 128 } 129 } 130 131 // tests if hasher responds with correct hash comparing the reference implementation return value 132 func TestHasherEmptyData(t *testing.T) { 133 hasher := sha3.NewKeccak256 134 var data []byte 135 for _, count := range counts { 136 t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) { 137 pool := NewTreePool(hasher, count, PoolSize) 138 defer pool.Drain(0) 139 bmt := New(pool) 140 rbmt := NewRefHasher(hasher, count) 141 refHash := rbmt.Hash(data) 142 expHash := syncHash(bmt, nil, data) 143 if !bytes.Equal(expHash, refHash) { 144 t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) 145 } 146 }) 147 } 148 } 149 150 // tests sequential write with entire max size written in one go 151 func TestSyncHasherCorrectness(t *testing.T) { 152 data := newData(BufferSize) 153 hasher := sha3.NewKeccak256 154 size := hasher().Size() 155 156 var err error 157 for _, count := range counts { 158 t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) { 159 max := count * size 160 var incr int 161 capacity := 1 162 pool := NewTreePool(hasher, count, capacity) 163 defer pool.Drain(0) 164 for n := 0; n <= max; n += incr { 165 incr = 1 + rand.Intn(5) 166 bmt := New(pool) 167 err = testHasherCorrectness(bmt, hasher, data, n, count) 168 if err != nil { 169 t.Fatal(err) 170 } 171 } 172 }) 173 } 174 } 175 176 // tests order-neutral concurrent writes with entire max size written in one go 177 func TestAsyncCorrectness(t *testing.T) { 178 data := newData(BufferSize) 179 hasher := sha3.NewKeccak256 180 size := hasher().Size() 181 whs := []whenHash{first, last, random} 182 183 for _, double := range []bool{false, true} { 184 for _, wh := range whs { 185 for _, count := range counts { 186 t.Run(fmt.Sprintf("double_%v_hash_when_%v_segments_%v", double, wh, count), func(t *testing.T) { 187 max := count * size 188 var incr int 189 capacity := 1 190 pool := NewTreePool(hasher, count, capacity) 191 defer pool.Drain(0) 192 for n := 1; n <= max; n += incr { 193 incr = 1 + rand.Intn(5) 194 bmt := New(pool) 195 d := data[:n] 196 rbmt := NewRefHasher(hasher, count) 197 exp := rbmt.Hash(d) 198 got := syncHash(bmt, nil, d) 199 if !bytes.Equal(got, exp) { 200 t.Fatalf("wrong sync hash for datalength %v: expected %x (ref), got %x", n, exp, got) 201 } 202 sw := bmt.NewAsyncWriter(double) 203 got = asyncHashRandom(sw, nil, d, wh) 204 if !bytes.Equal(got, exp) { 205 t.Fatalf("wrong async hash for datalength %v: expected %x, got %x", n, exp, got) 206 } 207 } 208 }) 209 } 210 } 211 } 212 } 213 214 // Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize 215 func TestHasherReuse(t *testing.T) { 216 t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) { 217 testHasherReuse(1, t) 218 }) 219 t.Run(fmt.Sprintf("poolsize_%d", PoolSize), func(t *testing.T) { 220 testHasherReuse(PoolSize, t) 221 }) 222 } 223 224 // tests if bmt reuse is not corrupting result 225 func testHasherReuse(poolsize int, t *testing.T) { 226 hasher := sha3.NewKeccak256 227 pool := NewTreePool(hasher, SegmentCount, poolsize) 228 defer pool.Drain(0) 229 bmt := New(pool) 230 231 for i := 0; i < 100; i++ { 232 data := newData(BufferSize) 233 n := rand.Intn(bmt.Size()) 234 err := testHasherCorrectness(bmt, hasher, data, n, SegmentCount) 235 if err != nil { 236 t.Fatal(err) 237 } 238 } 239 } 240 241 // Tests if pool can be cleanly reused even in concurrent use by several hasher 242 func TestBMTConcurrentUse(t *testing.T) { 243 hasher := sha3.NewKeccak256 244 pool := NewTreePool(hasher, SegmentCount, PoolSize) 245 defer pool.Drain(0) 246 cycles := 100 247 errc := make(chan error) 248 249 for i := 0; i < cycles; i++ { 250 go func() { 251 bmt := New(pool) 252 data := newData(BufferSize) 253 n := rand.Intn(bmt.Size()) 254 errc <- testHasherCorrectness(bmt, hasher, data, n, 128) 255 }() 256 } 257 LOOP: 258 for { 259 select { 260 case <-time.NewTimer(5 * time.Second).C: 261 t.Fatal("timed out") 262 case err := <-errc: 263 if err != nil { 264 t.Fatal(err) 265 } 266 cycles-- 267 if cycles == 0 { 268 break LOOP 269 } 270 } 271 } 272 } 273 274 // Tests BMT Hasher io.Writer interface is working correctly 275 // even multiple short random write buffers 276 func TestBMTWriterBuffers(t *testing.T) { 277 hasher := sha3.NewKeccak256 278 279 for _, count := range counts { 280 t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) { 281 errc := make(chan error) 282 pool := NewTreePool(hasher, count, PoolSize) 283 defer pool.Drain(0) 284 n := count * 32 285 bmt := New(pool) 286 data := newData(n) 287 rbmt := NewRefHasher(hasher, count) 288 refHash := rbmt.Hash(data) 289 expHash := syncHash(bmt, nil, data) 290 if !bytes.Equal(expHash, refHash) { 291 t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash) 292 } 293 attempts := 10 294 f := func() error { 295 bmt := New(pool) 296 bmt.Reset() 297 var buflen int 298 for offset := 0; offset < n; offset += buflen { 299 buflen = rand.Intn(n-offset) + 1 300 read, err := bmt.Write(data[offset : offset+buflen]) 301 if err != nil { 302 return err 303 } 304 if read != buflen { 305 return fmt.Errorf("incorrect read. expected %v bytes, got %v", buflen, read) 306 } 307 } 308 hash := bmt.Sum(nil) 309 if !bytes.Equal(hash, expHash) { 310 return fmt.Errorf("hash mismatch. expected %x, got %x", hash, expHash) 311 } 312 return nil 313 } 314 315 for j := 0; j < attempts; j++ { 316 go func() { 317 errc <- f() 318 }() 319 } 320 timeout := time.NewTimer(2 * time.Second) 321 for { 322 select { 323 case err := <-errc: 324 if err != nil { 325 t.Fatal(err) 326 } 327 attempts-- 328 if attempts == 0 { 329 return 330 } 331 case <-timeout.C: 332 t.Fatalf("timeout") 333 } 334 } 335 }) 336 } 337 } 338 339 // helper function that compares reference and optimised implementations on 340 // correctness 341 func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, count int) (err error) { 342 span := make([]byte, 8) 343 if len(d) < n { 344 n = len(d) 345 } 346 binary.BigEndian.PutUint64(span, uint64(n)) 347 data := d[:n] 348 rbmt := NewRefHasher(hasher, count) 349 exp := sha3hash(span, rbmt.Hash(data)) 350 got := syncHash(bmt, span, data) 351 if !bytes.Equal(got, exp) { 352 return fmt.Errorf("wrong hash: expected %x, got %x", exp, got) 353 } 354 return err 355 } 356 357 // 358 func BenchmarkBMT(t *testing.B) { 359 for size := 4096; size >= 128; size /= 2 { 360 t.Run(fmt.Sprintf("%v_size_%v", "SHA3", size), func(t *testing.B) { 361 benchmarkSHA3(t, size) 362 }) 363 t.Run(fmt.Sprintf("%v_size_%v", "Baseline", size), func(t *testing.B) { 364 benchmarkBMTBaseline(t, size) 365 }) 366 t.Run(fmt.Sprintf("%v_size_%v", "REF", size), func(t *testing.B) { 367 benchmarkRefHasher(t, size) 368 }) 369 t.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(t *testing.B) { 370 benchmarkBMT(t, size) 371 }) 372 } 373 } 374 375 type whenHash = int 376 377 const ( 378 first whenHash = iota 379 last 380 random 381 ) 382 383 func BenchmarkBMTAsync(t *testing.B) { 384 whs := []whenHash{first, last, random} 385 for size := 4096; size >= 128; size /= 2 { 386 for _, wh := range whs { 387 for _, double := range []bool{false, true} { 388 t.Run(fmt.Sprintf("double_%v_hash_when_%v_size_%v", double, wh, size), func(t *testing.B) { 389 benchmarkBMTAsync(t, size, wh, double) 390 }) 391 } 392 } 393 } 394 } 395 396 func BenchmarkPool(t *testing.B) { 397 caps := []int{1, PoolSize} 398 for size := 4096; size >= 128; size /= 2 { 399 for _, c := range caps { 400 t.Run(fmt.Sprintf("poolsize_%v_size_%v", c, size), func(t *testing.B) { 401 benchmarkPool(t, c, size) 402 }) 403 } 404 } 405 } 406 407 // benchmarks simple sha3 hash on chunks 408 func benchmarkSHA3(t *testing.B, n int) { 409 data := newData(n) 410 hasher := sha3.NewKeccak256 411 h := hasher() 412 413 t.ReportAllocs() 414 t.ResetTimer() 415 for i := 0; i < t.N; i++ { 416 doSum(h, nil, data) 417 } 418 } 419 420 // benchmarks the minimum hashing time for a balanced (for simplicity) BMT 421 // by doing count/segmentsize parallel hashings of 2*segmentsize bytes 422 // doing it on n PoolSize each reusing the base hasher 423 // the premise is that this is the minimum computation needed for a BMT 424 // therefore this serves as a theoretical optimum for concurrent implementations 425 func benchmarkBMTBaseline(t *testing.B, n int) { 426 hasher := sha3.NewKeccak256 427 hashSize := hasher().Size() 428 data := newData(hashSize) 429 430 t.ReportAllocs() 431 t.ResetTimer() 432 for i := 0; i < t.N; i++ { 433 count := int32((n-1)/hashSize + 1) 434 wg := sync.WaitGroup{} 435 wg.Add(PoolSize) 436 var i int32 437 for j := 0; j < PoolSize; j++ { 438 go func() { 439 defer wg.Done() 440 h := hasher() 441 for atomic.AddInt32(&i, 1) < count { 442 doSum(h, nil, data) 443 } 444 }() 445 } 446 wg.Wait() 447 } 448 } 449 450 // benchmarks BMT Hasher 451 func benchmarkBMT(t *testing.B, n int) { 452 data := newData(n) 453 hasher := sha3.NewKeccak256 454 pool := NewTreePool(hasher, SegmentCount, PoolSize) 455 bmt := New(pool) 456 457 t.ReportAllocs() 458 t.ResetTimer() 459 for i := 0; i < t.N; i++ { 460 syncHash(bmt, nil, data) 461 } 462 } 463 464 // benchmarks BMT hasher with asynchronous concurrent segment/section writes 465 func benchmarkBMTAsync(t *testing.B, n int, wh whenHash, double bool) { 466 data := newData(n) 467 hasher := sha3.NewKeccak256 468 pool := NewTreePool(hasher, SegmentCount, PoolSize) 469 bmt := New(pool).NewAsyncWriter(double) 470 idxs, segments := splitAndShuffle(bmt.SectionSize(), data) 471 shuffle(len(idxs), func(i int, j int) { 472 idxs[i], idxs[j] = idxs[j], idxs[i] 473 }) 474 475 t.ReportAllocs() 476 t.ResetTimer() 477 for i := 0; i < t.N; i++ { 478 asyncHash(bmt, nil, n, wh, idxs, segments) 479 } 480 } 481 482 // benchmarks 100 concurrent bmt hashes with pool capacity 483 func benchmarkPool(t *testing.B, poolsize, n int) { 484 data := newData(n) 485 hasher := sha3.NewKeccak256 486 pool := NewTreePool(hasher, SegmentCount, poolsize) 487 cycles := 100 488 489 t.ReportAllocs() 490 t.ResetTimer() 491 wg := sync.WaitGroup{} 492 for i := 0; i < t.N; i++ { 493 wg.Add(cycles) 494 for j := 0; j < cycles; j++ { 495 go func() { 496 defer wg.Done() 497 bmt := New(pool) 498 syncHash(bmt, nil, data) 499 }() 500 } 501 wg.Wait() 502 } 503 } 504 505 // benchmarks the reference hasher 506 func benchmarkRefHasher(t *testing.B, n int) { 507 data := newData(n) 508 hasher := sha3.NewKeccak256 509 rbmt := NewRefHasher(hasher, 128) 510 511 t.ReportAllocs() 512 t.ResetTimer() 513 for i := 0; i < t.N; i++ { 514 rbmt.Hash(data) 515 } 516 } 517 518 func newData(bufferSize int) []byte { 519 data := make([]byte, bufferSize) 520 _, err := io.ReadFull(crand.Reader, data) 521 if err != nil { 522 panic(err.Error()) 523 } 524 return data 525 } 526 527 // Hash hashes the data and the span using the bmt hasher 528 func syncHash(h *Hasher, span, data []byte) []byte { 529 h.ResetWithLength(span) 530 h.Write(data) 531 return h.Sum(nil) 532 } 533 534 func splitAndShuffle(secsize int, data []byte) (idxs []int, segments [][]byte) { 535 l := len(data) 536 n := l / secsize 537 if l%secsize > 0 { 538 n++ 539 } 540 for i := 0; i < n; i++ { 541 idxs = append(idxs, i) 542 end := (i + 1) * secsize 543 if end > l { 544 end = l 545 } 546 section := data[i*secsize : end] 547 segments = append(segments, section) 548 } 549 shuffle(n, func(i int, j int) { 550 idxs[i], idxs[j] = idxs[j], idxs[i] 551 }) 552 return idxs, segments 553 } 554 555 // splits the input data performs a random shuffle to mock async section writes 556 func asyncHashRandom(bmt SectionWriter, span []byte, data []byte, wh whenHash) (s []byte) { 557 idxs, segments := splitAndShuffle(bmt.SectionSize(), data) 558 return asyncHash(bmt, span, len(data), wh, idxs, segments) 559 } 560 561 // mock for async section writes for BMT SectionWriter 562 // requires a permutation (a random shuffle) of list of all indexes of segments 563 // and writes them in order to the appropriate section 564 // the Sum function is called according to the wh parameter (first, last, random [relative to segment writes]) 565 func asyncHash(bmt SectionWriter, span []byte, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) { 566 bmt.Reset() 567 if l == 0 { 568 return bmt.Sum(nil, l, span) 569 } 570 c := make(chan []byte, 1) 571 hashf := func() { 572 c <- bmt.Sum(nil, l, span) 573 } 574 maxsize := len(idxs) 575 var r int 576 if wh == random { 577 r = rand.Intn(maxsize) 578 } 579 for i, idx := range idxs { 580 bmt.Write(idx, segments[idx]) 581 if (wh == first || wh == random) && i == r { 582 go hashf() 583 } 584 } 585 if wh == last { 586 return bmt.Sum(nil, l, span) 587 } 588 return <-c 589 } 590 591 // this is also in swarm/network_test.go 592 // shuffle pseudo-randomizes the order of elements. 593 // n is the number of elements. Shuffle panics if n < 0. 594 // swap swaps the elements with indexes i and j. 595 func shuffle(n int, swap func(i, j int)) { 596 if n < 0 { 597 panic("invalid argument to Shuffle") 598 } 599 600 // Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle 601 // Shuffle really ought not be called with n that doesn't fit in 32 bits. 602 // Not only will it take a very long time, but with 2³¹! possible permutations, 603 // there's no way that any PRNG can have a big enough internal state to 604 // generate even a minuscule percentage of the possible permutations. 605 // Nevertheless, the right API signature accepts an int n, so handle it as best we can. 606 i := n - 1 607 for ; i > 1<<31-1-1; i-- { 608 j := int(rand.Int63n(int64(i + 1))) 609 swap(i, j) 610 } 611 for ; i > 0; i-- { 612 j := int(rand.Int31n(int32(i + 1))) 613 swap(i, j) 614 } 615 }