github.com/jstaf/onedriver@v0.14.2-0.20240420231225-f07678f9e6ef/fs/graph/quickxorhash/quickxorhash.go (about)

     1  // Package quickxorhash provides the quickXorHash algorithm which is a
     2  // quick, simple non-cryptographic hash algorithm that works by XORing
     3  // the bytes in a circular-shifting fashion.
     4  //
     5  // It is used by Microsoft Onedrive for Business to hash data.
     6  //
     7  // See: https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
     8  package quickxorhash
     9  
    10  // This code was ported from the code snippet linked from
    11  // https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
    12  // Which has the copyright
    13  
    14  // ------------------------------------------------------------------------------
    15  //  Copyright (c) 2016 Microsoft Corporation
    16  //
    17  //  Permission is hereby granted, free of charge, to any person obtaining a copy
    18  //  of this software and associated documentation files (the "Software"), to deal
    19  //  in the Software without restriction, including without limitation the rights
    20  //  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    21  //  copies of the Software, and to permit persons to whom the Software is
    22  //  furnished to do so, subject to the following conditions:
    23  //
    24  //  The above copyright notice and this permission notice shall be included in
    25  //  all copies or substantial portions of the Software.
    26  //
    27  //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    28  //  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    29  //  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    30  //  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    31  //  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    32  //  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    33  //  THE SOFTWARE.
    34  // ------------------------------------------------------------------------------
    35  
    36  import (
    37  	"hash"
    38  )
    39  
    40  const (
    41  	// BlockSize is the preferred size for hashing
    42  	BlockSize = 64
    43  	// Size of the output checksum
    44  	Size           = 20
    45  	bitsInLastCell = 32
    46  	shift          = 11
    47  	widthInBits    = 8 * Size
    48  	dataSize       = (widthInBits-1)/64 + 1
    49  )
    50  
    51  type quickXorHash struct {
    52  	data        [dataSize]uint64
    53  	lengthSoFar uint64
    54  	shiftSoFar  int
    55  }
    56  
    57  // New returns a new hash.Hash computing the quickXorHash checksum.
    58  func New() hash.Hash {
    59  	return &quickXorHash{}
    60  }
    61  
    62  // Write (via the embedded io.Writer interface) adds more data to the running hash.
    63  // It never returns an error.
    64  //
    65  // Write writes len(p) bytes from p to the underlying data stream. It returns
    66  // the number of bytes written from p (0 <= n <= len(p)) and any error
    67  // encountered that caused the write to stop early. Write must return a non-nil
    68  // error if it returns n < len(p). Write must not modify the slice data, even
    69  // temporarily.
    70  //
    71  // Implementations must not retain p.
    72  func (q *quickXorHash) Write(p []byte) (n int, err error) {
    73  	currentshift := q.shiftSoFar
    74  
    75  	// The bitvector where we'll start xoring
    76  	vectorArrayIndex := currentshift / 64
    77  
    78  	// The position within the bit vector at which we begin xoring
    79  	vectorOffset := currentshift % 64
    80  	iterations := len(p)
    81  	if iterations > widthInBits {
    82  		iterations = widthInBits
    83  	}
    84  
    85  	for i := 0; i < iterations; i++ {
    86  		isLastCell := vectorArrayIndex == len(q.data)-1
    87  		var bitsInVectorCell int
    88  		if isLastCell {
    89  			bitsInVectorCell = bitsInLastCell
    90  		} else {
    91  			bitsInVectorCell = 64
    92  		}
    93  
    94  		// There's at least 2 bitvectors before we reach the end of the array
    95  		if vectorOffset <= bitsInVectorCell-8 {
    96  			for j := i; j < len(p); j += widthInBits {
    97  				q.data[vectorArrayIndex] ^= uint64(p[j]) << uint(vectorOffset)
    98  			}
    99  		} else {
   100  			index1 := vectorArrayIndex
   101  			var index2 int
   102  			if isLastCell {
   103  				index2 = 0
   104  			} else {
   105  				index2 = vectorArrayIndex + 1
   106  			}
   107  			low := byte(bitsInVectorCell - vectorOffset)
   108  
   109  			xoredByte := byte(0)
   110  			for j := i; j < len(p); j += widthInBits {
   111  				xoredByte ^= p[j]
   112  			}
   113  			q.data[index1] ^= uint64(xoredByte) << uint(vectorOffset)
   114  			q.data[index2] ^= uint64(xoredByte) >> low
   115  		}
   116  		vectorOffset += shift
   117  		for vectorOffset >= bitsInVectorCell {
   118  			if isLastCell {
   119  				vectorArrayIndex = 0
   120  			} else {
   121  				vectorArrayIndex = vectorArrayIndex + 1
   122  			}
   123  			vectorOffset -= bitsInVectorCell
   124  		}
   125  	}
   126  
   127  	// Update the starting position in a circular shift pattern
   128  	q.shiftSoFar = (q.shiftSoFar + shift*(len(p)%widthInBits)) % widthInBits
   129  
   130  	q.lengthSoFar += uint64(len(p))
   131  
   132  	return len(p), nil
   133  }
   134  
   135  // Calculate the current checksum
   136  func (q *quickXorHash) checkSum() (h [Size]byte) {
   137  	// Output the data as little endian bytes
   138  	ph := 0
   139  	for i := 0; i < len(q.data)-1; i++ {
   140  		d := q.data[i]
   141  		_ = h[ph+7] // bounds check
   142  		h[ph+0] = byte(d >> (8 * 0))
   143  		h[ph+1] = byte(d >> (8 * 1))
   144  		h[ph+2] = byte(d >> (8 * 2))
   145  		h[ph+3] = byte(d >> (8 * 3))
   146  		h[ph+4] = byte(d >> (8 * 4))
   147  		h[ph+5] = byte(d >> (8 * 5))
   148  		h[ph+6] = byte(d >> (8 * 6))
   149  		h[ph+7] = byte(d >> (8 * 7))
   150  		ph += 8
   151  	}
   152  	// remaining 32 bits
   153  	d := q.data[len(q.data)-1]
   154  	h[Size-4] = byte(d >> (8 * 0))
   155  	h[Size-3] = byte(d >> (8 * 1))
   156  	h[Size-2] = byte(d >> (8 * 2))
   157  	h[Size-1] = byte(d >> (8 * 3))
   158  
   159  	// XOR the file length with the least significant bits in little endian format
   160  	d = q.lengthSoFar
   161  	h[Size-8] ^= byte(d >> (8 * 0))
   162  	h[Size-7] ^= byte(d >> (8 * 1))
   163  	h[Size-6] ^= byte(d >> (8 * 2))
   164  	h[Size-5] ^= byte(d >> (8 * 3))
   165  	h[Size-4] ^= byte(d >> (8 * 4))
   166  	h[Size-3] ^= byte(d >> (8 * 5))
   167  	h[Size-2] ^= byte(d >> (8 * 6))
   168  	h[Size-1] ^= byte(d >> (8 * 7))
   169  
   170  	return h
   171  }
   172  
   173  // Sum appends the current hash to b and returns the resulting slice.
   174  // It does not change the underlying hash state.
   175  func (q *quickXorHash) Sum(b []byte) []byte {
   176  	hash := q.checkSum()
   177  	return append(b, hash[:]...)
   178  }
   179  
   180  // Reset resets the Hash to its initial state.
   181  func (q *quickXorHash) Reset() {
   182  	*q = quickXorHash{}
   183  }
   184  
   185  // Size returns the number of bytes Sum will return.
   186  func (q *quickXorHash) Size() int {
   187  	return Size
   188  }
   189  
   190  // BlockSize returns the hash's underlying block size.
   191  // The Write method must be able to accept any amount
   192  // of data, but it may operate more efficiently if all writes
   193  // are a multiple of the block size.
   194  func (q *quickXorHash) BlockSize() int {
   195  	return BlockSize
   196  }
   197  
   198  // Sum returns the quickXorHash checksum of the data.
   199  func Sum(data []byte) [Size]byte {
   200  	var d quickXorHash
   201  	_, _ = d.Write(data)
   202  	return d.checkSum()
   203  }