github.com/onflow/atree@v0.6.0/blake3_regression_test.go (about)

     1  /*
     2   * Atree - Scalable Arrays and Ordered Maps
     3   *
     4   * Copyright 2022 Dapper Labs, Inc.
     5   * Copyright 2021 Faye Amacker
     6   *
     7   * Licensed under the Apache License, Version 2.0 (the "License");
     8   * you may not use this file except in compliance with the License.
     9   * You may obtain a copy of the License at
    10   *
    11   *   http://www.apache.org/licenses/LICENSE-2.0
    12   *
    13   * Unless required by applicable law or agreed to in writing, software
    14   * distributed under the License is distributed on an "AS IS" BASIS,
    15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16   * See the License for the specific language governing permissions and
    17   * limitations under the License.
    18   *
    19   * ------------------------------------------------------------------------
    20   *
    21   * This file is a modified subset of circlehash64_test.go copied from
    22   *
    23   *     https://github.com/fxamacker/circlehash
    24   *
    25   * Expected digest values and some names of functions and variables were
    26   * modified to check BLAKE3 instead of CircleHash64. Seeds were removed.
    27   * Test data size was increased from 16KiB to 64KiB to be able to
    28   * verify code paths in BLAKE3 that might be optimized for larger
    29   * input sizes.
    30   */
    31  
    32  package atree
    33  
    34  import (
    35  	"bytes"
    36  	"crypto/sha512"
    37  	"hash"
    38  	"testing"
    39  
    40  	"github.com/stretchr/testify/require"
    41  	blake3zeebo "github.com/zeebo/blake3"
    42  	blake3luke "lukechampine.com/blake3"
    43  )
    44  
    45  // Compatibility tests check 131072 BLAKE3 digests produced by hashing
    46  // input sizes of various lengths (0-64KiB bytes) and then hashing the
    47  // equivalent "concatenated" BLAKE3 digests with SHA-512.
    48  //
    49  // Additionally, each 131072 BLAKE3 digest is compared between
    50  // github.com/zeebo/blake3 and lukechampine.com/blake3 libraries.
    51  //
    52  // Tests use SHA-512 digest to represent many BLAKE3 digests because SHA-512 is
    53  // included in Go and is available in many languages. This approach eliminates
    54  // the need to store and separately compare 131072 BLAKE3 digests.
    55  //
    56  // Tests for input sizes greater than 128 bytes can help BLAKE3 implementations
    57  // that rely on input size to determine which optimized code path to execute.
    58  
    59  var countBLAKE3 uint64 // count calls to Hash256 (doesn't double count zeebo & luke)
    60  
    61  func TestBLAKE3Vectors(t *testing.T) {
    62  
    63  	// Official BLAKE3 test vector checks 35 digests using
    64  	// 35 sizes of input from the same repeating pattern
    65  	inputs := makeBLAKE3InputData(102400)
    66  
    67  	sizes := []int{
    68  		0, 1, 2, 3, 4, 5, 6, 7,
    69  		8, 63, 64, 65, 127, 128, 129, 1023,
    70  		1024, 1025, 2048, 2049, 3072, 3073, 4096, 4097,
    71  		5120, 5121, 6144, 6145, 7168, 7169, 8192, 8193,
    72  		16384, 31744, 102400,
    73  	}
    74  
    75  	// Use SHA-512 to hash 35 BLAKE3 digest results
    76  	// so we only have to compare one hardcoded value.
    77  	h := sha512.New()
    78  	want := decodeHexOrPanic("b785cc13e1ed42b2c31096c91aacf155d2898bcf2fbcfd3a02b481612423a4372a6367bd5da5ce9e1edadef81d44d77363060a4c4b6af436e4b4c189f6f72b3e")
    79  
    80  	for _, n := range sizes {
    81  		digest := countedAndComparedBLAKE3(t, inputs[:n])
    82  		_, err := h.Write(digest[:])
    83  		require.NoError(t, err)
    84  	}
    85  
    86  	got := h.Sum(nil)
    87  	if !bytes.Equal(got, want) {
    88  		t.Errorf("got 0x%064x; want 0x%064x", got, want)
    89  	}
    90  }
    91  
    92  func makeBLAKE3InputData(length int) []byte {
    93  	b := make([]byte, length)
    94  	for i := 0; i < len(b); i++ {
    95  		b[i] = byte(i % 251)
    96  	}
    97  	return b
    98  }
    99  
   100  // TestBLAKE3Regression checks 131072 BLAKE3 digests
   101  // for expected values using input sizes up to
   102  // 64KiB.  This test is designed to detect
   103  // malicious hash implementations when this test
   104  // is used with other tests for size 0 and some
   105  // sizes > 64KiB.
   106  func TestBLAKE3Regression(t *testing.T) {
   107  
   108  	// Create 64 KiB of test data from SHA-512 using the simplest
   109  	// form of SHA-512 feedback loop (nothing-up-my-sleeve).
   110  	data := nonUniformBytes64KiB()
   111  
   112  	// Verify BLAKE3 digests produced from hashing portions of
   113  	// data. Input sizes vary from 1 to 64KiB bytes by varying
   114  	// starting pos and ending pos.
   115  	// We use 64KiB because BLAKE3 implementations can have
   116  	// special optimizations for large data sizes and we
   117  	// want to verify digests produced by all their code paths.
   118  
   119  	testCases := []struct {
   120  		name                     string
   121  		wantSHA512VaringStartPos []byte
   122  		wantSHA512VaringEndPos   []byte
   123  	}{
   124  		{
   125  			"nokey",
   126  			decodeHexOrPanic("8030991ad495219d9fdd346fab027d1f453887a3d157fa4bfcd67a4b213a6817817817f43779ddd2b274a243d8a942728141b72d8bcde9d49fdfc5d9a823983f"),
   127  			decodeHexOrPanic("a8a7c00fce5a6adc774e8bf5ff45b40f382954c932288d0d79d589755b094f8db6fa16780e2ca9a1434b56a0716a25fb7eecb545f1c6f7599b08214fd1b59a8a"),
   128  		},
   129  		// If Atree begins to use keyed BLAKE3, add testCases here for keyed hashes
   130  	}
   131  
   132  	for _, tc := range testCases {
   133  		t.Run(tc.name, func(t *testing.T) {
   134  
   135  			h := sha512.New()
   136  
   137  			// test 65536 BLAKE3 digests by varying start pos of data
   138  			checksumVaryingStartPosNoSeed(t, h, data)
   139  			got := h.Sum(nil)
   140  			if !bytes.Equal(got, tc.wantSHA512VaringStartPos) {
   141  				t.Errorf("checksumVaryingStartPos(nonuniform16KiB) = 0x%0128x; want 0x%0128x",
   142  					got,
   143  					tc.wantSHA512VaringStartPos)
   144  			}
   145  
   146  			h.Reset()
   147  
   148  			// test another 65536 BLAKE3 digests by varying end pos of data
   149  			checksumVaryingEndPosNoSeed(t, h, data)
   150  			got = h.Sum(nil)
   151  			if !bytes.Equal(got, tc.wantSHA512VaringEndPos) {
   152  				t.Errorf("checksumVaryingEndPos(nonuniform16KiB) = 0x%0128x; want 0x%0128x",
   153  					got,
   154  					tc.wantSHA512VaringEndPos)
   155  			}
   156  		})
   157  	}
   158  }
   159  
   160  // checksumVaryingStartPosNoSeed updates cryptoHash512 with
   161  // concatenated BLAKE3 digests.
   162  func checksumVaryingStartPosNoSeed(t *testing.T, cryptoHash512 hash.Hash, data []byte) {
   163  
   164  	// vary the starting position and keep the ending position
   165  	for i := uint64(0); i < uint64(len(data)); i++ {
   166  
   167  		digest := countedAndComparedBLAKE3(t, data[i:])
   168  
   169  		// Feed digest into SHA-512, SHA3-512, etc.
   170  		cryptoHash512.Write(digest[:])
   171  	}
   172  }
   173  
   174  // checksumVaryingEndPosNoSeed updates cryptoHash512 with
   175  // concatenated BLAKE3 digests.
   176  func checksumVaryingEndPosNoSeed(t *testing.T, cryptoHash512 hash.Hash, data []byte) {
   177  
   178  	// keep the starting position at zero and increment the length
   179  	for i := uint64(1); i <= uint64(len(data)); i++ {
   180  		digest := countedAndComparedBLAKE3(t, data[:i])
   181  
   182  		// Feed digest into SHA-512, SHA3-512, etc.
   183  		cryptoHash512.Write(digest[:])
   184  	}
   185  }
   186  
   187  // nonUniformBytes64KiB returns 64KiB bytes of non-uniform bytes
   188  // produced from SHA-512 in a feedback loop. SHA-512 is used instead
   189  // of SHAKE-256 XOF or a stream cipher because SHA-512 is bundled with
   190  // Go and is available in most languages. One reason a simple PRNG
   191  // isn't used here is because different implementions in different
   192  // programming languages are sometimes incompatible due to errors
   193  // (like SplitMix64). SHA-512 will be compatible everywhere.
   194  // For BLAKE3, we should use at least 64KiB because implementations
   195  // might use optimized paths for various large input sizes.
   196  func nonUniformBytes64KiB() []byte {
   197  	b := make([]byte, 0, 1024*64)
   198  
   199  	// Each input to SHA-512 is 64 bytes. First 64-byte input is zeros.
   200  	// The next input to SHA-512 is the 64-byte output of SHA-512.
   201  	// Each output of SHA-512 is appended to the returned byte slice.
   202  	d := make([]byte, 64)
   203  	for i := 0; i < 1024; i++ {
   204  		a := sha512.Sum512(d)
   205  		d = a[:]
   206  		b = append(b, d...)
   207  	}
   208  
   209  	return b
   210  }
   211  
   212  func countedAndComparedBLAKE3(t *testing.T, data []byte) [32]byte {
   213  	digest := blake3zeebo.Sum256(data)
   214  	digest2 := blake3luke.Sum256(data)
   215  	if digest != digest2 {
   216  		t.Errorf("BLAKE3zeebo 0x%x != BLAKE3luke 0x%x", digest, digest2)
   217  	}
   218  
   219  	countBLAKE3++
   220  	return digest
   221  }