github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/backend/onedrive/quickxorhash/quickxorhash.go (about)

     1  // Package quickxorhash provides the quickXorHash algorithm which is a
     2  // quick, simple non-cryptographic hash algorithm that works by XORing
     3  // the bytes in a circular-shifting fashion.
     4  //
     5  // It is used by Microsoft Onedrive for Business to hash data.
     6  //
     7  // See: https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
     8  package quickxorhash
     9  
    10  // This code was ported from the code snippet linked from
    11  // https://docs.microsoft.com/en-us/onedrive/developer/code-snippets/quickxorhash
    12  // Which has the copyright
    13  
    14  // ------------------------------------------------------------------------------
    15  //  Copyright (c) 2016 Microsoft Corporation
    16  //
    17  //  Permission is hereby granted, free of charge, to any person obtaining a copy
    18  //  of this software and associated documentation files (the "Software"), to deal
    19  //  in the Software without restriction, including without limitation the rights
    20  //  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    21  //  copies of the Software, and to permit persons to whom the Software is
    22  //  furnished to do so, subject to the following conditions:
    23  //
    24  //  The above copyright notice and this permission notice shall be included in
    25  //  all copies or substantial portions of the Software.
    26  //
    27  //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    28  //  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    29  //  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    30  //  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    31  //  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    32  //  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    33  //  THE SOFTWARE.
    34  // ------------------------------------------------------------------------------
    35  
    36  import (
    37  	"hash"
    38  )
    39  
    40  const (
    41  	// BlockSize is the preferred size for hashing
    42  	BlockSize = 64
    43  	// Size of the output checksum
    44  	Size           = 20
    45  	bitsInLastCell = 32
    46  	shift          = 11
    47  	widthInBits    = 8 * Size
    48  	dataSize       = (widthInBits-1)/64 + 1
    49  )
    50  
    51  type quickXorHash struct {
    52  	data        [dataSize]uint64
    53  	lengthSoFar uint64
    54  	shiftSoFar  int
    55  }
    56  
    57  // New returns a new hash.Hash computing the quickXorHash checksum.
    58  func New() hash.Hash {
    59  	return &quickXorHash{}
    60  }
    61  
    62  // Write (via the embedded io.Writer interface) adds more data to the running hash.
    63  // It never returns an error.
    64  //
    65  // Write writes len(p) bytes from p to the underlying data stream. It returns
    66  // the number of bytes written from p (0 <= n <= len(p)) and any error
    67  // encountered that caused the write to stop early. Write must return a non-nil
    68  // error if it returns n < len(p). Write must not modify the slice data, even
    69  // temporarily.
    70  //
    71  // Implementations must not retain p.
    72  func (q *quickXorHash) Write(p []byte) (n int, err error) {
    73  	currentshift := q.shiftSoFar
    74  
    75  	// The bitvector where we'll start xoring
    76  	vectorArrayIndex := currentshift / 64
    77  
    78  	// The position within the bit vector at which we begin xoring
    79  	vectorOffset := currentshift % 64
    80  	iterations := len(p)
    81  	if iterations > widthInBits {
    82  		iterations = widthInBits
    83  	}
    84  
    85  	for i := 0; i < iterations; i++ {
    86  		isLastCell := vectorArrayIndex == len(q.data)-1
    87  		var bitsInVectorCell int
    88  		if isLastCell {
    89  			bitsInVectorCell = bitsInLastCell
    90  		} else {
    91  			bitsInVectorCell = 64
    92  		}
    93  
    94  		// There's at least 2 bitvectors before we reach the end of the array
    95  		if vectorOffset <= bitsInVectorCell-8 {
    96  			for j := i; j < len(p); j += widthInBits {
    97  				q.data[vectorArrayIndex] ^= uint64(p[j]) << uint(vectorOffset)
    98  			}
    99  		} else {
   100  			index1 := vectorArrayIndex
   101  			var index2 int
   102  			if isLastCell {
   103  				index2 = 0
   104  			} else {
   105  				index2 = vectorArrayIndex + 1
   106  			}
   107  			low := byte(bitsInVectorCell - vectorOffset)
   108  
   109  			xoredByte := byte(0)
   110  			for j := i; j < len(p); j += widthInBits {
   111  				xoredByte ^= p[j]
   112  			}
   113  			q.data[index1] ^= uint64(xoredByte) << uint(vectorOffset)
   114  			q.data[index2] ^= uint64(xoredByte) >> low
   115  		}
   116  		vectorOffset += shift
   117  		for vectorOffset >= bitsInVectorCell {
   118  			if isLastCell {
   119  				vectorArrayIndex = 0
   120  			} else {
   121  				vectorArrayIndex = vectorArrayIndex + 1
   122  			}
   123  			vectorOffset -= bitsInVectorCell
   124  		}
   125  	}
   126  
   127  	// Update the starting position in a circular shift pattern
   128  	q.shiftSoFar = (q.shiftSoFar + shift*(len(p)%widthInBits)) % widthInBits
   129  
   130  	q.lengthSoFar += uint64(len(p))
   131  
   132  	return len(p), nil
   133  }
   134  
   135  // Calculate the current checksum
   136  func (q *quickXorHash) checkSum() (h [Size]byte) {
   137  	// Output the data as little endian bytes
   138  	ph := 0
   139  	for _, d := range q.data[:len(q.data)-1] {
   140  		_ = h[ph+7] // bounds check
   141  		h[ph+0] = byte(d >> (8 * 0))
   142  		h[ph+1] = byte(d >> (8 * 1))
   143  		h[ph+2] = byte(d >> (8 * 2))
   144  		h[ph+3] = byte(d >> (8 * 3))
   145  		h[ph+4] = byte(d >> (8 * 4))
   146  		h[ph+5] = byte(d >> (8 * 5))
   147  		h[ph+6] = byte(d >> (8 * 6))
   148  		h[ph+7] = byte(d >> (8 * 7))
   149  		ph += 8
   150  	}
   151  	// remaining 32 bits
   152  	d := q.data[len(q.data)-1]
   153  	h[Size-4] = byte(d >> (8 * 0))
   154  	h[Size-3] = byte(d >> (8 * 1))
   155  	h[Size-2] = byte(d >> (8 * 2))
   156  	h[Size-1] = byte(d >> (8 * 3))
   157  
   158  	// XOR the file length with the least significant bits in little endian format
   159  	d = q.lengthSoFar
   160  	h[Size-8] ^= byte(d >> (8 * 0))
   161  	h[Size-7] ^= byte(d >> (8 * 1))
   162  	h[Size-6] ^= byte(d >> (8 * 2))
   163  	h[Size-5] ^= byte(d >> (8 * 3))
   164  	h[Size-4] ^= byte(d >> (8 * 4))
   165  	h[Size-3] ^= byte(d >> (8 * 5))
   166  	h[Size-2] ^= byte(d >> (8 * 6))
   167  	h[Size-1] ^= byte(d >> (8 * 7))
   168  
   169  	return h
   170  }
   171  
   172  // Sum appends the current hash to b and returns the resulting slice.
   173  // It does not change the underlying hash state.
   174  func (q *quickXorHash) Sum(b []byte) []byte {
   175  	hash := q.checkSum()
   176  	return append(b, hash[:]...)
   177  }
   178  
   179  // Reset resets the Hash to its initial state.
   180  func (q *quickXorHash) Reset() {
   181  	*q = quickXorHash{}
   182  }
   183  
   184  // Size returns the number of bytes Sum will return.
   185  func (q *quickXorHash) Size() int {
   186  	return Size
   187  }
   188  
   189  // BlockSize returns the hash's underlying block size.
   190  // The Write method must be able to accept any amount
   191  // of data, but it may operate more efficiently if all writes
   192  // are a multiple of the block size.
   193  func (q *quickXorHash) BlockSize() int {
   194  	return BlockSize
   195  }
   196  
   197  // Sum returns the quickXorHash checksum of the data.
   198  func Sum(data []byte) [Size]byte {
   199  	var d quickXorHash
   200  	_, _ = d.Write(data)
   201  	return d.checkSum()
   202  }