github.com/onflow/atree@v0.6.0/circlehash64_regression_test.go (about)

     1  /*
     2   * Atree - Scalable Arrays and Ordered Maps
     3   *
     4   * Copyright 2022 Dapper Labs, Inc.
     5   * Copyright 2021 Faye Amacker
     6   *
     7   * Licensed under the Apache License, Version 2.0 (the "License");
     8   * you may not use this file except in compliance with the License.
     9   * You may obtain a copy of the License at
    10   *
    11   *   http://www.apache.org/licenses/LICENSE-2.0
    12   *
    13   * Unless required by applicable law or agreed to in writing, software
    14   * distributed under the License is distributed on an "AS IS" BASIS,
    15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16   * See the License for the specific language governing permissions and
    17   * limitations under the License.
    18   *
    19   * ------------------------------------------------------------------------
    20   *
    21   * This file is a modified subset of circlehash64_test.go copied from
    22   *
    23   *     https://github.com/fxamacker/circlehash
    24   *
    25   * This subset verifies nearly 200,000 CircleHash64 digests.  The full
    26   * CircleHash64 compatibility tests verify nearly 600,000 digests.
    27   */
    28  
    29  package atree
    30  
    31  import (
    32  	"bytes"
    33  	"crypto/sha512"
    34  	"encoding/binary"
    35  	"encoding/hex"
    36  	"fmt"
    37  	"hash"
    38  	"testing"
    39  
    40  	"github.com/fxamacker/circlehash"
    41  	"github.com/stretchr/testify/require"
    42  )
    43  
    44  // CircleHash64 uses CircleHash64f as default hash. Expected SHA-512 checksums are
    45  // from the C++ and Go CircleHash reference implementations by Faye Amacker.
    46  // SHA-512 is used because it's included in Go and available in many languages.
    47  //
    48  // Compatibility tests check CircleHash64 digests produced by hashing
    49  // input sizes of various lengths (0-16384 bytes).  Tests for input sizes greater
    50  // than 128 bytes can help future implementations that rely on input size to
    51  // determine which optimized code path to execute.
    52  
    53  const (
    54  	// nums are nothing-up-my-sleeve numbers
    55  	numsAllZeros = uint64(0x0000000000000000)
    56  	numsAll55s   = uint64(0x5555555555555555) // alternating 1 and 0 bit
    57  	numsAllAAs   = uint64(0xAAAAAAAAAAAAAAAA) // alternating 0 and 1 bit
    58  	numsAllFFs   = uint64(0xFFFFFFFFFFFFFFFF)
    59  
    60  	numsGoldenRatio    = uint64(0x9E3779B97F4A7C15) // https://en.wikipedia.org/wiki/Golden_ratio
    61  	numsGoldenRatioInv = numsGoldenRatio ^ numsAllFFs
    62  )
    63  
    64  var countCircleHash64f uint64 // count calls to Hash64 (doesn't include calls to HashString64)
    65  
    66  // TestCircleHash64Regression is renamed from TestCircleHash64NonUniformBitPatternInputs.
    67  func TestCircleHash64Regression(t *testing.T) {
    68  
    69  	// Create 16 KiB of test data from SHA-512 using the simplest
    70  	// form of SHA-512 feedback loop (nothing-up-my-sleeve).
    71  	data := nonUniformBytes16KiB()
    72  
    73  	// Verify CircleHash64 digests produced from hashing portions of
    74  	// data using different seed values. Input sizes vary from
    75  	// 1 to 16384 bytes by varying starting pos and ending pos.
    76  
    77  	testCases := []struct {
    78  		name                     string
    79  		seed                     uint64
    80  		wantSHA512VaringStartPos []byte
    81  		wantSHA512VaringEndPos   []byte
    82  	}{
    83  		{
    84  			"seed 00s",
    85  			numsAllZeros,
    86  			decodeHexOrPanic("8fc041d09087f9f3108ed86422ee6562f4eaf1ad0b1d83ede3f69b14f3798b8a5c80518ea7041f0803882ced33bce34351c5415469957e40ddd806d618742a71"),
    87  			decodeHexOrPanic("80348e245dec5e09c424411c4dfa9fbbad1cbf68495707e3579bec8c7e7e010f6ff441b6b3987e4da28be39ccd355ae545ca0329284fa39a0d630b941e83355a"),
    88  		},
    89  
    90  		{
    91  			"seed 55s",
    92  			numsAll55s,
    93  			decodeHexOrPanic("714733e2f758328f07556e849cc96b371dba28ed8c6c934f6591a7e4ea90a02dc93bb858639ed62b3aacc26932efe3a47aa4e5b713a8f1c2a5375988fb3fcf05"),
    94  			decodeHexOrPanic("90ac86e7e8ce973ad402d1db741c7ee320b330ffbbe391c9b20eb9ce07385c66df3f40efd0865ee18894b559cde70f38ec7b01319b2ef2f3f61c64cc8abeca12"),
    95  		},
    96  
    97  		{
    98  			"seed AAs",
    99  			numsAllAAs,
   100  			decodeHexOrPanic("710f68717bf5144e703e10236d9d2cda2b7e8e503aacf4168a088a1be51d3ffe83cf19908e238be15883f6cd25a2c7c71e715173e19fc73f5707ad7626c3b944"),
   101  			decodeHexOrPanic("042037e4dfaf0072c5048c8043fa6ac1f197f8b3c2140d97ccfe9abd69f7ef6fb6739e0728c40bff272dbd6a0c82f7f04f95a0ca64cdfe73c080b691bd58214e"),
   102  		},
   103  
   104  		{
   105  			"seed FFs",
   106  			numsAllFFs,
   107  			decodeHexOrPanic("e275ac0f2df55036ac844f7cbf6375fbad8b4c7fbac98296e5d0fbfbdb294534c5a45058883220572bff8145c3e2f191950f0cad2841c9bd50babe3b907469c4"),
   108  			decodeHexOrPanic("12864d73da4f64ef97b988b400566f9b89ebaeee87629208ac7029a6cc6a57759025f83efd0480b1675fb4b06d128439c03ac300ce0c1fbd35dfaa9a91e233ac"),
   109  		},
   110  
   111  		{
   112  			"seed GR",
   113  			numsGoldenRatio,
   114  			decodeHexOrPanic("a19151170f5a8e92a98416fff407f35317d458cd8f47a3d28b9a2ddcb277d6d0ff895a1b06f6aa5f25c67b71c74d9f6705ffbfe27edd1237ee990395f61842f2"),
   115  			decodeHexOrPanic("b853718a24f4b46e0e3d1b4cf497637af09b5aa061496707b1839f824b9b4f4294113976765a72b9dfd916d8a56cc434a7f12116cff8406c8b3ffd8a8acd80d3"),
   116  		},
   117  
   118  		{
   119  			"seed GRI",
   120  			numsGoldenRatioInv,
   121  			decodeHexOrPanic("e18b76bb467bbbab91deeb42307964fd92db5ac6bf5718da12ba391c3a89c6f0f7c6379dcf6c7676eb1bbc8c8d240919b154086bfba65fc4d0e468b67e474195"),
   122  			decodeHexOrPanic("626cbb08e12d6988bc7d8f75e9571961d4e46240e5ef682562f7010d8916a7b104b988f6749b67f59f5e7cb4147017842d78ce17b7c9443813b92c0e198b62e2"),
   123  		},
   124  	}
   125  
   126  	for _, tc := range testCases {
   127  		t.Run(tc.name, func(t *testing.T) {
   128  
   129  			h := sha512.New()
   130  
   131  			checksumVaryingStartPos(t, h, tc.seed, data)
   132  			got := h.Sum(nil)
   133  			if !bytes.Equal(got, tc.wantSHA512VaringStartPos) {
   134  				t.Errorf("checksumVaryingStartPos(nonuniform16KiB) = 0x%0128x; want 0x%0128x",
   135  					got,
   136  					tc.wantSHA512VaringStartPos)
   137  			}
   138  
   139  			h.Reset()
   140  
   141  			checksumVaryingEndPos(t, h, tc.seed, data)
   142  			got = h.Sum(nil)
   143  			if !bytes.Equal(got, tc.wantSHA512VaringEndPos) {
   144  				t.Errorf("checksumVaryingEndPos(nonuniform16KiB) = 0x%0128x; want 0x%0128x",
   145  					got,
   146  					tc.wantSHA512VaringEndPos)
   147  			}
   148  		})
   149  	}
   150  
   151  	require.Equal(t, uint64(196608), countCircleHash64f) // Update comments if this line changes
   152  }
   153  
   154  // checksumVaryingStartPos updates cryptoHash512 with
   155  // concatenated CircleHash64 digests. E.g. passing in data containing
   156  // 128 bytes will use 128 CircleHash64 digests ending at
   157  // the last byte and incrementing the start position of data.
   158  func checksumVaryingStartPos(t *testing.T, cryptoHash512 hash.Hash, seed uint64, data []byte) {
   159  
   160  	// vary the starting position and keep the ending position
   161  	for i := uint64(0); i < uint64(len(data)); i++ {
   162  
   163  		digest := countedCircleHash64(t, data[i:], seed)
   164  
   165  		b := make([]byte, 8)
   166  		binary.LittleEndian.PutUint64(b, digest)
   167  
   168  		// Feed CircleHash64 result into SHA-512, SHA3-512, etc.
   169  		cryptoHash512.Write(b)
   170  	}
   171  }
   172  
   173  // checksumVaryingEndPos updates cryptoHash512 with
   174  // concatenated CircleHash64 digests. E.g. passing in data containing
   175  // 128 bytes will use 128 CircleHash64 digests always starting at
   176  // the first byte and incrementing the length of input size.
   177  func checksumVaryingEndPos(t *testing.T, cryptoHash512 hash.Hash, seed uint64, data []byte) {
   178  
   179  	// keep the starting position at zero and increment the length
   180  	for i := uint64(1); i <= uint64(len(data)); i++ {
   181  		digest := countedCircleHash64(t, data[0:i], seed)
   182  
   183  		b := make([]byte, 8)
   184  		binary.LittleEndian.PutUint64(b, digest)
   185  
   186  		// Feed CircleHash64 result into SHA-512, SHA3-512, etc.
   187  		cryptoHash512.Write(b)
   188  	}
   189  }
   190  
   191  // nonUniformBytes16Kib returns 16384 bytes of non-uniform bytes
   192  // produced from SHA-512 in a feedback loop. SHA-512 is used instead
   193  // of SHAKE-256 XOF or a stream cipher because SHA-512 is bundled with
   194  // Go and is available in most languages. One reason a simple PRNG
   195  // isn't used here is because different implementions in different
   196  // programming languages are sometimes incompatible due to errors
   197  // (like SplitMix64). SHA-512 will be compatible everywhere.
   198  // SHA-512 of the returned 16384-byte slice is:
   199  // 412895cdfdf6fd60181cd709b6aed89cce63ede8402531185c969de50eb04ae3
   200  // 5d042d7b2758d02f97c6b13b1a397e2fbeca7ceb07c606f3602bed97984f99c6
   201  func nonUniformBytes16KiB() []byte {
   202  	b := make([]byte, 0, 256*64) // length=0, capacity=16384
   203  
   204  	// Each input to SHA-512 is 64 bytes. First 64-byte input is zeros.
   205  	// The next input to SHA-512 is the 64-byte output of SHA-512.
   206  	// Each output of SHA-512 is appended to the returned byte slice.
   207  	d := make([]byte, 64)
   208  	for i := 0; i < 256; i++ {
   209  		a := sha512.Sum512(d)
   210  		d = a[:]
   211  		b = append(b, d...)
   212  	}
   213  
   214  	return b
   215  }
   216  
   217  // countedCircleHash64 calls Hash64 and increments countCircleHash64.
   218  func countedCircleHash64(t *testing.T, data []byte, seed uint64) uint64 {
   219  	digest := circlehash.Hash64(data, seed)
   220  	digest2 := circlehash.Hash64String(string(data), seed)
   221  	if digest != digest2 {
   222  		t.Errorf("Hash64() 0x%x != Hash64String() 0x%x", digest, digest2)
   223  	}
   224  
   225  	if len(data) == 16 {
   226  		a := binary.LittleEndian.Uint64(data)
   227  		b := binary.LittleEndian.Uint64(data[8:])
   228  		digest3 := circlehash.Hash64Uint64x2(a, b, seed)
   229  		if digest != digest3 {
   230  			t.Errorf("Hash64() 0x%x != Hash64Uint64x2() 0x%x", digest, digest3)
   231  		}
   232  	}
   233  
   234  	countCircleHash64f++
   235  	return digest
   236  }
   237  
   238  func decodeHexOrPanic(s string) []byte {
   239  	b, err := hex.DecodeString(s)
   240  	if err != nil {
   241  		panic(fmt.Sprintf("bad hex string: %s", err))
   242  	}
   243  	return b
   244  }