github.com/daragao/go-ethereum@v1.8.14-0.20180809141559-45eaef243198/swarm/bmt/bmt_test.go (about)

     1  // Copyright 2017 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package bmt
    18  
    19  import (
    20  	"bytes"
    21  	crand "crypto/rand"
    22  	"encoding/binary"
    23  	"fmt"
    24  	"io"
    25  	"math/rand"
    26  	"sync"
    27  	"sync/atomic"
    28  	"testing"
    29  	"time"
    30  
    31  	"github.com/ethereum/go-ethereum/crypto/sha3"
    32  )
    33  
    34  // the actual data length generated (could be longer than max datalength of the BMT)
    35  const BufferSize = 4128
    36  
    37  var counts = []int{1, 2, 3, 4, 5, 8, 9, 15, 16, 17, 32, 37, 42, 53, 63, 64, 65, 111, 127, 128}
    38  
    39  // calculates the Keccak256 SHA3 hash of the data
    40  func sha3hash(data ...[]byte) []byte {
    41  	h := sha3.NewKeccak256()
    42  	return doSum(h, nil, data...)
    43  }
    44  
    45  // TestRefHasher tests that the RefHasher computes the expected BMT hash for
    46  // some small data lengths
    47  func TestRefHasher(t *testing.T) {
    48  	// the test struct is used to specify the expected BMT hash for
    49  	// segment counts between from and to and lengths from 1 to datalength
    50  	type test struct {
    51  		from     int
    52  		to       int
    53  		expected func([]byte) []byte
    54  	}
    55  
    56  	var tests []*test
    57  	// all lengths in [0,64] should be:
    58  	//
    59  	//   sha3hash(data)
    60  	//
    61  	tests = append(tests, &test{
    62  		from: 1,
    63  		to:   2,
    64  		expected: func(d []byte) []byte {
    65  			data := make([]byte, 64)
    66  			copy(data, d)
    67  			return sha3hash(data)
    68  		},
    69  	})
    70  
    71  	// all lengths in [3,4] should be:
    72  	//
    73  	//   sha3hash(
    74  	//     sha3hash(data[:64])
    75  	//     sha3hash(data[64:])
    76  	//   )
    77  	//
    78  	tests = append(tests, &test{
    79  		from: 3,
    80  		to:   4,
    81  		expected: func(d []byte) []byte {
    82  			data := make([]byte, 128)
    83  			copy(data, d)
    84  			return sha3hash(sha3hash(data[:64]), sha3hash(data[64:]))
    85  		},
    86  	})
    87  
    88  	// all segmentCounts in [5,8] should be:
    89  	//
    90  	//   sha3hash(
    91  	//     sha3hash(
    92  	//       sha3hash(data[:64])
    93  	//       sha3hash(data[64:128])
    94  	//     )
    95  	//     sha3hash(
    96  	//       sha3hash(data[128:192])
    97  	//       sha3hash(data[192:])
    98  	//     )
    99  	//   )
   100  	//
   101  	tests = append(tests, &test{
   102  		from: 5,
   103  		to:   8,
   104  		expected: func(d []byte) []byte {
   105  			data := make([]byte, 256)
   106  			copy(data, d)
   107  			return sha3hash(sha3hash(sha3hash(data[:64]), sha3hash(data[64:128])), sha3hash(sha3hash(data[128:192]), sha3hash(data[192:])))
   108  		},
   109  	})
   110  
   111  	// run the tests
   112  	for _, x := range tests {
   113  		for segmentCount := x.from; segmentCount <= x.to; segmentCount++ {
   114  			for length := 1; length <= segmentCount*32; length++ {
   115  				t.Run(fmt.Sprintf("%d_segments_%d_bytes", segmentCount, length), func(t *testing.T) {
   116  					data := make([]byte, length)
   117  					if _, err := io.ReadFull(crand.Reader, data); err != nil && err != io.EOF {
   118  						t.Fatal(err)
   119  					}
   120  					expected := x.expected(data)
   121  					actual := NewRefHasher(sha3.NewKeccak256, segmentCount).Hash(data)
   122  					if !bytes.Equal(actual, expected) {
   123  						t.Fatalf("expected %x, got %x", expected, actual)
   124  					}
   125  				})
   126  			}
   127  		}
   128  	}
   129  }
   130  
   131  // tests if hasher responds with correct hash comparing the reference implementation return value
   132  func TestHasherEmptyData(t *testing.T) {
   133  	hasher := sha3.NewKeccak256
   134  	var data []byte
   135  	for _, count := range counts {
   136  		t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
   137  			pool := NewTreePool(hasher, count, PoolSize)
   138  			defer pool.Drain(0)
   139  			bmt := New(pool)
   140  			rbmt := NewRefHasher(hasher, count)
   141  			refHash := rbmt.Hash(data)
   142  			expHash := syncHash(bmt, nil, data)
   143  			if !bytes.Equal(expHash, refHash) {
   144  				t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
   145  			}
   146  		})
   147  	}
   148  }
   149  
   150  // tests sequential write with entire max size written in one go
   151  func TestSyncHasherCorrectness(t *testing.T) {
   152  	data := newData(BufferSize)
   153  	hasher := sha3.NewKeccak256
   154  	size := hasher().Size()
   155  
   156  	var err error
   157  	for _, count := range counts {
   158  		t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) {
   159  			max := count * size
   160  			var incr int
   161  			capacity := 1
   162  			pool := NewTreePool(hasher, count, capacity)
   163  			defer pool.Drain(0)
   164  			for n := 0; n <= max; n += incr {
   165  				incr = 1 + rand.Intn(5)
   166  				bmt := New(pool)
   167  				err = testHasherCorrectness(bmt, hasher, data, n, count)
   168  				if err != nil {
   169  					t.Fatal(err)
   170  				}
   171  			}
   172  		})
   173  	}
   174  }
   175  
   176  // tests order-neutral concurrent writes with entire max size written in one go
   177  func TestAsyncCorrectness(t *testing.T) {
   178  	data := newData(BufferSize)
   179  	hasher := sha3.NewKeccak256
   180  	size := hasher().Size()
   181  	whs := []whenHash{first, last, random}
   182  
   183  	for _, double := range []bool{false, true} {
   184  		for _, wh := range whs {
   185  			for _, count := range counts {
   186  				t.Run(fmt.Sprintf("double_%v_hash_when_%v_segments_%v", double, wh, count), func(t *testing.T) {
   187  					max := count * size
   188  					var incr int
   189  					capacity := 1
   190  					pool := NewTreePool(hasher, count, capacity)
   191  					defer pool.Drain(0)
   192  					for n := 1; n <= max; n += incr {
   193  						incr = 1 + rand.Intn(5)
   194  						bmt := New(pool)
   195  						d := data[:n]
   196  						rbmt := NewRefHasher(hasher, count)
   197  						exp := rbmt.Hash(d)
   198  						got := syncHash(bmt, nil, d)
   199  						if !bytes.Equal(got, exp) {
   200  							t.Fatalf("wrong sync hash for datalength %v: expected %x (ref), got %x", n, exp, got)
   201  						}
   202  						sw := bmt.NewAsyncWriter(double)
   203  						got = asyncHashRandom(sw, nil, d, wh)
   204  						if !bytes.Equal(got, exp) {
   205  							t.Fatalf("wrong async hash for datalength %v: expected %x, got %x", n, exp, got)
   206  						}
   207  					}
   208  				})
   209  			}
   210  		}
   211  	}
   212  }
   213  
   214  // Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize
   215  func TestHasherReuse(t *testing.T) {
   216  	t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) {
   217  		testHasherReuse(1, t)
   218  	})
   219  	t.Run(fmt.Sprintf("poolsize_%d", PoolSize), func(t *testing.T) {
   220  		testHasherReuse(PoolSize, t)
   221  	})
   222  }
   223  
   224  // tests if bmt reuse is not corrupting result
   225  func testHasherReuse(poolsize int, t *testing.T) {
   226  	hasher := sha3.NewKeccak256
   227  	pool := NewTreePool(hasher, SegmentCount, poolsize)
   228  	defer pool.Drain(0)
   229  	bmt := New(pool)
   230  
   231  	for i := 0; i < 100; i++ {
   232  		data := newData(BufferSize)
   233  		n := rand.Intn(bmt.Size())
   234  		err := testHasherCorrectness(bmt, hasher, data, n, SegmentCount)
   235  		if err != nil {
   236  			t.Fatal(err)
   237  		}
   238  	}
   239  }
   240  
   241  // Tests if pool can be cleanly reused even in concurrent use by several hasher
   242  func TestBMTConcurrentUse(t *testing.T) {
   243  	hasher := sha3.NewKeccak256
   244  	pool := NewTreePool(hasher, SegmentCount, PoolSize)
   245  	defer pool.Drain(0)
   246  	cycles := 100
   247  	errc := make(chan error)
   248  
   249  	for i := 0; i < cycles; i++ {
   250  		go func() {
   251  			bmt := New(pool)
   252  			data := newData(BufferSize)
   253  			n := rand.Intn(bmt.Size())
   254  			errc <- testHasherCorrectness(bmt, hasher, data, n, 128)
   255  		}()
   256  	}
   257  LOOP:
   258  	for {
   259  		select {
   260  		case <-time.NewTimer(5 * time.Second).C:
   261  			t.Fatal("timed out")
   262  		case err := <-errc:
   263  			if err != nil {
   264  				t.Fatal(err)
   265  			}
   266  			cycles--
   267  			if cycles == 0 {
   268  				break LOOP
   269  			}
   270  		}
   271  	}
   272  }
   273  
   274  // Tests BMT Hasher io.Writer interface is working correctly
   275  // even multiple short random write buffers
   276  func TestBMTWriterBuffers(t *testing.T) {
   277  	hasher := sha3.NewKeccak256
   278  
   279  	for _, count := range counts {
   280  		t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
   281  			errc := make(chan error)
   282  			pool := NewTreePool(hasher, count, PoolSize)
   283  			defer pool.Drain(0)
   284  			n := count * 32
   285  			bmt := New(pool)
   286  			data := newData(n)
   287  			rbmt := NewRefHasher(hasher, count)
   288  			refHash := rbmt.Hash(data)
   289  			expHash := syncHash(bmt, nil, data)
   290  			if !bytes.Equal(expHash, refHash) {
   291  				t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
   292  			}
   293  			attempts := 10
   294  			f := func() error {
   295  				bmt := New(pool)
   296  				bmt.Reset()
   297  				var buflen int
   298  				for offset := 0; offset < n; offset += buflen {
   299  					buflen = rand.Intn(n-offset) + 1
   300  					read, err := bmt.Write(data[offset : offset+buflen])
   301  					if err != nil {
   302  						return err
   303  					}
   304  					if read != buflen {
   305  						return fmt.Errorf("incorrect read. expected %v bytes, got %v", buflen, read)
   306  					}
   307  				}
   308  				hash := bmt.Sum(nil)
   309  				if !bytes.Equal(hash, expHash) {
   310  					return fmt.Errorf("hash mismatch. expected %x, got %x", hash, expHash)
   311  				}
   312  				return nil
   313  			}
   314  
   315  			for j := 0; j < attempts; j++ {
   316  				go func() {
   317  					errc <- f()
   318  				}()
   319  			}
   320  			timeout := time.NewTimer(2 * time.Second)
   321  			for {
   322  				select {
   323  				case err := <-errc:
   324  					if err != nil {
   325  						t.Fatal(err)
   326  					}
   327  					attempts--
   328  					if attempts == 0 {
   329  						return
   330  					}
   331  				case <-timeout.C:
   332  					t.Fatalf("timeout")
   333  				}
   334  			}
   335  		})
   336  	}
   337  }
   338  
   339  // helper function that compares reference and optimised implementations on
   340  // correctness
   341  func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, count int) (err error) {
   342  	span := make([]byte, 8)
   343  	if len(d) < n {
   344  		n = len(d)
   345  	}
   346  	binary.BigEndian.PutUint64(span, uint64(n))
   347  	data := d[:n]
   348  	rbmt := NewRefHasher(hasher, count)
   349  	exp := sha3hash(span, rbmt.Hash(data))
   350  	got := syncHash(bmt, span, data)
   351  	if !bytes.Equal(got, exp) {
   352  		return fmt.Errorf("wrong hash: expected %x, got %x", exp, got)
   353  	}
   354  	return err
   355  }
   356  
   357  //
   358  func BenchmarkBMT(t *testing.B) {
   359  	for size := 4096; size >= 128; size /= 2 {
   360  		t.Run(fmt.Sprintf("%v_size_%v", "SHA3", size), func(t *testing.B) {
   361  			benchmarkSHA3(t, size)
   362  		})
   363  		t.Run(fmt.Sprintf("%v_size_%v", "Baseline", size), func(t *testing.B) {
   364  			benchmarkBMTBaseline(t, size)
   365  		})
   366  		t.Run(fmt.Sprintf("%v_size_%v", "REF", size), func(t *testing.B) {
   367  			benchmarkRefHasher(t, size)
   368  		})
   369  		t.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(t *testing.B) {
   370  			benchmarkBMT(t, size)
   371  		})
   372  	}
   373  }
   374  
   375  type whenHash = int
   376  
   377  const (
   378  	first whenHash = iota
   379  	last
   380  	random
   381  )
   382  
   383  func BenchmarkBMTAsync(t *testing.B) {
   384  	whs := []whenHash{first, last, random}
   385  	for size := 4096; size >= 128; size /= 2 {
   386  		for _, wh := range whs {
   387  			for _, double := range []bool{false, true} {
   388  				t.Run(fmt.Sprintf("double_%v_hash_when_%v_size_%v", double, wh, size), func(t *testing.B) {
   389  					benchmarkBMTAsync(t, size, wh, double)
   390  				})
   391  			}
   392  		}
   393  	}
   394  }
   395  
   396  func BenchmarkPool(t *testing.B) {
   397  	caps := []int{1, PoolSize}
   398  	for size := 4096; size >= 128; size /= 2 {
   399  		for _, c := range caps {
   400  			t.Run(fmt.Sprintf("poolsize_%v_size_%v", c, size), func(t *testing.B) {
   401  				benchmarkPool(t, c, size)
   402  			})
   403  		}
   404  	}
   405  }
   406  
   407  // benchmarks simple sha3 hash on chunks
   408  func benchmarkSHA3(t *testing.B, n int) {
   409  	data := newData(n)
   410  	hasher := sha3.NewKeccak256
   411  	h := hasher()
   412  
   413  	t.ReportAllocs()
   414  	t.ResetTimer()
   415  	for i := 0; i < t.N; i++ {
   416  		doSum(h, nil, data)
   417  	}
   418  }
   419  
   420  // benchmarks the minimum hashing time for a balanced (for simplicity) BMT
   421  // by doing count/segmentsize parallel hashings of 2*segmentsize bytes
   422  // doing it on n PoolSize each reusing the base hasher
   423  // the premise is that this is the minimum computation needed for a BMT
   424  // therefore this serves as a theoretical optimum for concurrent implementations
   425  func benchmarkBMTBaseline(t *testing.B, n int) {
   426  	hasher := sha3.NewKeccak256
   427  	hashSize := hasher().Size()
   428  	data := newData(hashSize)
   429  
   430  	t.ReportAllocs()
   431  	t.ResetTimer()
   432  	for i := 0; i < t.N; i++ {
   433  		count := int32((n-1)/hashSize + 1)
   434  		wg := sync.WaitGroup{}
   435  		wg.Add(PoolSize)
   436  		var i int32
   437  		for j := 0; j < PoolSize; j++ {
   438  			go func() {
   439  				defer wg.Done()
   440  				h := hasher()
   441  				for atomic.AddInt32(&i, 1) < count {
   442  					doSum(h, nil, data)
   443  				}
   444  			}()
   445  		}
   446  		wg.Wait()
   447  	}
   448  }
   449  
   450  // benchmarks BMT Hasher
   451  func benchmarkBMT(t *testing.B, n int) {
   452  	data := newData(n)
   453  	hasher := sha3.NewKeccak256
   454  	pool := NewTreePool(hasher, SegmentCount, PoolSize)
   455  	bmt := New(pool)
   456  
   457  	t.ReportAllocs()
   458  	t.ResetTimer()
   459  	for i := 0; i < t.N; i++ {
   460  		syncHash(bmt, nil, data)
   461  	}
   462  }
   463  
   464  // benchmarks BMT hasher with asynchronous concurrent segment/section writes
   465  func benchmarkBMTAsync(t *testing.B, n int, wh whenHash, double bool) {
   466  	data := newData(n)
   467  	hasher := sha3.NewKeccak256
   468  	pool := NewTreePool(hasher, SegmentCount, PoolSize)
   469  	bmt := New(pool).NewAsyncWriter(double)
   470  	idxs, segments := splitAndShuffle(bmt.SectionSize(), data)
   471  	shuffle(len(idxs), func(i int, j int) {
   472  		idxs[i], idxs[j] = idxs[j], idxs[i]
   473  	})
   474  
   475  	t.ReportAllocs()
   476  	t.ResetTimer()
   477  	for i := 0; i < t.N; i++ {
   478  		asyncHash(bmt, nil, n, wh, idxs, segments)
   479  	}
   480  }
   481  
   482  // benchmarks 100 concurrent bmt hashes with pool capacity
   483  func benchmarkPool(t *testing.B, poolsize, n int) {
   484  	data := newData(n)
   485  	hasher := sha3.NewKeccak256
   486  	pool := NewTreePool(hasher, SegmentCount, poolsize)
   487  	cycles := 100
   488  
   489  	t.ReportAllocs()
   490  	t.ResetTimer()
   491  	wg := sync.WaitGroup{}
   492  	for i := 0; i < t.N; i++ {
   493  		wg.Add(cycles)
   494  		for j := 0; j < cycles; j++ {
   495  			go func() {
   496  				defer wg.Done()
   497  				bmt := New(pool)
   498  				syncHash(bmt, nil, data)
   499  			}()
   500  		}
   501  		wg.Wait()
   502  	}
   503  }
   504  
   505  // benchmarks the reference hasher
   506  func benchmarkRefHasher(t *testing.B, n int) {
   507  	data := newData(n)
   508  	hasher := sha3.NewKeccak256
   509  	rbmt := NewRefHasher(hasher, 128)
   510  
   511  	t.ReportAllocs()
   512  	t.ResetTimer()
   513  	for i := 0; i < t.N; i++ {
   514  		rbmt.Hash(data)
   515  	}
   516  }
   517  
   518  func newData(bufferSize int) []byte {
   519  	data := make([]byte, bufferSize)
   520  	_, err := io.ReadFull(crand.Reader, data)
   521  	if err != nil {
   522  		panic(err.Error())
   523  	}
   524  	return data
   525  }
   526  
   527  // Hash hashes the data and the span using the bmt hasher
   528  func syncHash(h *Hasher, span, data []byte) []byte {
   529  	h.ResetWithLength(span)
   530  	h.Write(data)
   531  	return h.Sum(nil)
   532  }
   533  
   534  func splitAndShuffle(secsize int, data []byte) (idxs []int, segments [][]byte) {
   535  	l := len(data)
   536  	n := l / secsize
   537  	if l%secsize > 0 {
   538  		n++
   539  	}
   540  	for i := 0; i < n; i++ {
   541  		idxs = append(idxs, i)
   542  		end := (i + 1) * secsize
   543  		if end > l {
   544  			end = l
   545  		}
   546  		section := data[i*secsize : end]
   547  		segments = append(segments, section)
   548  	}
   549  	shuffle(n, func(i int, j int) {
   550  		idxs[i], idxs[j] = idxs[j], idxs[i]
   551  	})
   552  	return idxs, segments
   553  }
   554  
   555  // splits the input data performs a random shuffle to mock async section writes
   556  func asyncHashRandom(bmt SectionWriter, span []byte, data []byte, wh whenHash) (s []byte) {
   557  	idxs, segments := splitAndShuffle(bmt.SectionSize(), data)
   558  	return asyncHash(bmt, span, len(data), wh, idxs, segments)
   559  }
   560  
   561  // mock for async section writes for BMT SectionWriter
   562  // requires a permutation (a random shuffle) of list of all indexes of segments
   563  // and writes them in order to the appropriate section
   564  // the Sum function is called according to the wh parameter (first, last, random [relative to segment writes])
   565  func asyncHash(bmt SectionWriter, span []byte, l int, wh whenHash, idxs []int, segments [][]byte) (s []byte) {
   566  	bmt.Reset()
   567  	if l == 0 {
   568  		return bmt.Sum(nil, l, span)
   569  	}
   570  	c := make(chan []byte, 1)
   571  	hashf := func() {
   572  		c <- bmt.Sum(nil, l, span)
   573  	}
   574  	maxsize := len(idxs)
   575  	var r int
   576  	if wh == random {
   577  		r = rand.Intn(maxsize)
   578  	}
   579  	for i, idx := range idxs {
   580  		bmt.Write(idx, segments[idx])
   581  		if (wh == first || wh == random) && i == r {
   582  			go hashf()
   583  		}
   584  	}
   585  	if wh == last {
   586  		return bmt.Sum(nil, l, span)
   587  	}
   588  	return <-c
   589  }
   590  
   591  // this is also in swarm/network_test.go
   592  // shuffle pseudo-randomizes the order of elements.
   593  // n is the number of elements. Shuffle panics if n < 0.
   594  // swap swaps the elements with indexes i and j.
   595  func shuffle(n int, swap func(i, j int)) {
   596  	if n < 0 {
   597  		panic("invalid argument to Shuffle")
   598  	}
   599  
   600  	// Fisher-Yates shuffle: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle
   601  	// Shuffle really ought not be called with n that doesn't fit in 32 bits.
   602  	// Not only will it take a very long time, but with 2³¹! possible permutations,
   603  	// there's no way that any PRNG can have a big enough internal state to
   604  	// generate even a minuscule percentage of the possible permutations.
   605  	// Nevertheless, the right API signature accepts an int n, so handle it as best we can.
   606  	i := n - 1
   607  	for ; i > 1<<31-1-1; i-- {
   608  		j := int(rand.Int63n(int64(i + 1)))
   609  		swap(i, j)
   610  	}
   611  	for ; i > 0; i-- {
   612  		j := int(rand.Int31n(int32(i + 1)))
   613  		swap(i, j)
   614  	}
   615  }