github.com/artpar/rclone@v1.67.3/backend/hidrive/hidrivehash/hidrivehash.go (about)

     1  // Package hidrivehash implements the HiDrive hashing algorithm which combines SHA-1 hashes hierarchically to a single top-level hash.
     2  //
     3  // Note: This implementation does not grant access to any partial hashes generated.
     4  //
     5  // See: https://developer.hidrive.com/wp-content/uploads/2021/07/HiDrive_Synchronization-v3.3-rev28.pdf
     6  // (link to newest version: https://static.hidrive.com/dev/0001)
     7  package hidrivehash
     8  
     9  import (
    10  	"bytes"
    11  	"crypto/sha1"
    12  	"encoding"
    13  	"encoding/binary"
    14  	"errors"
    15  	"fmt"
    16  	"hash"
    17  	"io"
    18  
    19  	"github.com/artpar/rclone/backend/hidrive/hidrivehash/internal"
    20  )
    21  
    22  const (
    23  	// BlockSize of the checksum in bytes.
    24  	BlockSize = 4096
    25  	// Size of the checksum in bytes.
    26  	Size = sha1.Size
    27  	// sumsPerLevel is the number of checksums
    28  	sumsPerLevel = 256
    29  )
    30  
    31  var (
    32  	// zeroSum is a special hash consisting of 20 null-bytes.
    33  	// This will be the hash of any empty file (or ones containing only null-bytes).
    34  	zeroSum = [Size]byte{}
    35  	// ErrorInvalidEncoding is returned when a hash should be decoded from a binary form that is invalid.
    36  	ErrorInvalidEncoding = errors.New("encoded binary form is invalid for this hash")
    37  	// ErrorHashFull is returned when a hash reached its capacity and cannot accept any more input.
    38  	ErrorHashFull = errors.New("hash reached its capacity")
    39  )
    40  
    41  // writeByBlock writes len(p) bytes from p to the io.Writer in blocks of size blockSize.
    42  // It returns the number of bytes written from p (0 <= n <= len(p))
    43  // and any error encountered that caused the write to stop early.
    44  //
    45  // A pointer bytesInBlock to a counter needs to be supplied,
    46  // that is used to keep track how many bytes have been written to the writer already.
    47  // A pointer onlyNullBytesInBlock to a boolean needs to be supplied,
    48  // that is used to keep track whether the block so far only consists of null-bytes.
    49  // The callback onBlockWritten is called whenever a full block has been written to the writer
    50  // and is given as input the number of bytes that still need to be written.
    51  func writeByBlock(p []byte, writer io.Writer, blockSize uint32, bytesInBlock *uint32, onlyNullBytesInBlock *bool, onBlockWritten func(remaining int) error) (n int, err error) {
    52  	total := len(p)
    53  	nullBytes := make([]byte, blockSize)
    54  	for len(p) > 0 {
    55  		toWrite := int(blockSize - *bytesInBlock)
    56  		if toWrite > len(p) {
    57  			toWrite = len(p)
    58  		}
    59  		c, err := writer.Write(p[:toWrite])
    60  		*bytesInBlock += uint32(c)
    61  		*onlyNullBytesInBlock = *onlyNullBytesInBlock && bytes.Equal(nullBytes[:toWrite], p[:toWrite])
    62  		// Discard data written through a reslice
    63  		p = p[c:]
    64  		if err != nil {
    65  			return total - len(p), err
    66  		}
    67  		if *bytesInBlock == blockSize {
    68  			err = onBlockWritten(len(p))
    69  			if err != nil {
    70  				return total - len(p), err
    71  			}
    72  			*bytesInBlock = 0
    73  			*onlyNullBytesInBlock = true
    74  		}
    75  	}
    76  	return total, nil
    77  }
    78  
    79  // level is a hash.Hash that is used to aggregate the checksums produced by the level hierarchically beneath it.
    80  // It is used to represent any level-n hash, except for level-0.
    81  type level struct {
    82  	checksum              [Size]byte // aggregated checksum of this level
    83  	sumCount              uint32     // number of sums contained in this level so far
    84  	bytesInHasher         uint32     //  number of bytes written into hasher so far
    85  	onlyNullBytesInHasher bool       // whether the hasher only contains null-bytes so far
    86  	hasher                hash.Hash
    87  }
    88  
    89  // NewLevel returns a new hash.Hash computing any level-n hash, except level-0.
    90  func NewLevel() hash.Hash {
    91  	l := &level{}
    92  	l.Reset()
    93  	return l
    94  }
    95  
    96  // Add takes a position-embedded SHA-1 checksum and adds it to the level.
    97  func (l *level) Add(sha1sum []byte) {
    98  	var tmp uint
    99  	var carry bool
   100  	for i := Size - 1; i >= 0; i-- {
   101  		tmp = uint(sha1sum[i]) + uint(l.checksum[i])
   102  		if carry {
   103  			tmp++
   104  		}
   105  		carry = tmp > 255
   106  		l.checksum[i] = byte(tmp)
   107  	}
   108  }
   109  
   110  // IsFull returns whether the number of checksums added to this level reached its capacity.
   111  func (l *level) IsFull() bool {
   112  	return l.sumCount >= sumsPerLevel
   113  }
   114  
   115  // Write (via the embedded io.Writer interface) adds more data to the running hash.
   116  // Contrary to the specification from hash.Hash, this DOES return an error,
   117  // specifically ErrorHashFull if and only if IsFull() returns true.
   118  func (l *level) Write(p []byte) (n int, err error) {
   119  	if l.IsFull() {
   120  		return 0, ErrorHashFull
   121  	}
   122  	onBlockWritten := func(remaining int) error {
   123  		if !l.onlyNullBytesInHasher {
   124  			c, err := l.hasher.Write([]byte{byte(l.sumCount)})
   125  			l.bytesInHasher += uint32(c)
   126  			if err != nil {
   127  				return err
   128  			}
   129  			l.Add(l.hasher.Sum(nil))
   130  		}
   131  		l.sumCount++
   132  		l.hasher.Reset()
   133  		if remaining > 0 && l.IsFull() {
   134  			return ErrorHashFull
   135  		}
   136  		return nil
   137  	}
   138  	return writeByBlock(p, l.hasher, uint32(l.BlockSize()), &l.bytesInHasher, &l.onlyNullBytesInHasher, onBlockWritten)
   139  }
   140  
   141  // Sum appends the current hash to b and returns the resulting slice.
   142  // It does not change the underlying hash state.
   143  func (l *level) Sum(b []byte) []byte {
   144  	return append(b, l.checksum[:]...)
   145  }
   146  
   147  // Reset resets the Hash to its initial state.
   148  func (l *level) Reset() {
   149  	l.checksum = zeroSum // clear the current checksum
   150  	l.sumCount = 0
   151  	l.bytesInHasher = 0
   152  	l.onlyNullBytesInHasher = true
   153  	l.hasher = sha1.New()
   154  }
   155  
   156  // Size returns the number of bytes Sum will return.
   157  func (l *level) Size() int {
   158  	return Size
   159  }
   160  
   161  // BlockSize returns the hash's underlying block size.
   162  // The Write method must be able to accept any amount
   163  // of data, but it may operate more efficiently if all writes
   164  // are a multiple of the block size.
   165  func (l *level) BlockSize() int {
   166  	return Size
   167  }
   168  
   169  // MarshalBinary encodes the hash into a binary form and returns the result.
   170  func (l *level) MarshalBinary() ([]byte, error) {
   171  	b := make([]byte, Size+4+4+1)
   172  	copy(b, l.checksum[:])
   173  	binary.BigEndian.PutUint32(b[Size:], l.sumCount)
   174  	binary.BigEndian.PutUint32(b[Size+4:], l.bytesInHasher)
   175  	if l.onlyNullBytesInHasher {
   176  		b[Size+4+4] = 1
   177  	}
   178  	encodedHasher, err := l.hasher.(encoding.BinaryMarshaler).MarshalBinary()
   179  	if err != nil {
   180  		return nil, err
   181  	}
   182  	b = append(b, encodedHasher...)
   183  	return b, nil
   184  }
   185  
   186  // UnmarshalBinary decodes the binary form generated by MarshalBinary.
   187  // The hash will replace its internal state accordingly.
   188  func (l *level) UnmarshalBinary(b []byte) error {
   189  	if len(b) < Size+4+4+1 {
   190  		return ErrorInvalidEncoding
   191  	}
   192  	copy(l.checksum[:], b)
   193  	l.sumCount = binary.BigEndian.Uint32(b[Size:])
   194  	l.bytesInHasher = binary.BigEndian.Uint32(b[Size+4:])
   195  	switch b[Size+4+4] {
   196  	case 0:
   197  		l.onlyNullBytesInHasher = false
   198  	case 1:
   199  		l.onlyNullBytesInHasher = true
   200  	default:
   201  		return ErrorInvalidEncoding
   202  	}
   203  	err := l.hasher.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[Size+4+4+1:])
   204  	return err
   205  }
   206  
   207  // hidriveHash is the hash computing the actual checksum used by HiDrive by combining multiple level-hashes.
   208  type hidriveHash struct {
   209  	levels               []*level   // collection of level-hashes, one for each level starting at level-1
   210  	lastSumWritten       [Size]byte // the last checksum written to any of the levels
   211  	bytesInBlock         uint32     // bytes written into blockHash so far
   212  	onlyNullBytesInBlock bool       // whether the hasher only contains null-bytes so far
   213  	blockHash            hash.Hash
   214  }
   215  
   216  // New returns a new hash.Hash computing the HiDrive checksum.
   217  func New() hash.Hash {
   218  	h := &hidriveHash{}
   219  	h.Reset()
   220  	return h
   221  }
   222  
   223  // aggregateToLevel writes the checksum to the level at the given index
   224  // and if necessary propagates any changes to levels above.
   225  func (h *hidriveHash) aggregateToLevel(index int, sum []byte) {
   226  	for i := index; ; i++ {
   227  		if i >= len(h.levels) {
   228  			h.levels = append(h.levels, NewLevel().(*level))
   229  		}
   230  		_, err := h.levels[i].Write(sum)
   231  		copy(h.lastSumWritten[:], sum)
   232  		if err != nil {
   233  			panic(fmt.Errorf("level-hash should not have produced an error: %w", err))
   234  		}
   235  		if !h.levels[i].IsFull() {
   236  			break
   237  		}
   238  		sum = h.levels[i].Sum(nil)
   239  		h.levels[i].Reset()
   240  	}
   241  }
   242  
   243  // Write (via the embedded io.Writer interface) adds more data to the running hash.
   244  // It never returns an error.
   245  func (h *hidriveHash) Write(p []byte) (n int, err error) {
   246  	onBlockWritten := func(remaining int) error {
   247  		var sum []byte
   248  		if h.onlyNullBytesInBlock {
   249  			sum = zeroSum[:]
   250  		} else {
   251  			sum = h.blockHash.Sum(nil)
   252  		}
   253  		h.blockHash.Reset()
   254  		h.aggregateToLevel(0, sum)
   255  		return nil
   256  	}
   257  	return writeByBlock(p, h.blockHash, uint32(BlockSize), &h.bytesInBlock, &h.onlyNullBytesInBlock, onBlockWritten)
   258  }
   259  
   260  // Sum appends the current hash to b and returns the resulting slice.
   261  // It does not change the underlying hash state.
   262  func (h *hidriveHash) Sum(b []byte) []byte {
   263  	// Save internal state.
   264  	state, err := h.MarshalBinary()
   265  	if err != nil {
   266  		panic(fmt.Errorf("saving the internal state should not have produced an error: %w", err))
   267  	}
   268  
   269  	if h.bytesInBlock > 0 {
   270  		// Fill remainder of block with null-bytes.
   271  		filler := make([]byte, h.BlockSize()-int(h.bytesInBlock))
   272  		_, err = h.Write(filler)
   273  		if err != nil {
   274  			panic(fmt.Errorf("filling with null-bytes should not have an error: %w", err))
   275  		}
   276  	}
   277  
   278  	checksum := zeroSum
   279  	for i := 0; i < len(h.levels); i++ {
   280  		level := h.levels[i]
   281  		if i < len(h.levels)-1 {
   282  			// Aggregate non-empty non-final levels.
   283  			if level.sumCount >= 1 {
   284  				h.aggregateToLevel(i+1, level.Sum(nil))
   285  				level.Reset()
   286  			}
   287  		} else {
   288  			// Determine sum of final level.
   289  			if level.sumCount > 1 {
   290  				copy(checksum[:], level.Sum(nil))
   291  			} else {
   292  				// This is needed, otherwise there is no way to return
   293  				// the non-position-embedded checksum.
   294  				checksum = h.lastSumWritten
   295  			}
   296  		}
   297  	}
   298  
   299  	// Restore internal state.
   300  	err = h.UnmarshalBinary(state)
   301  	if err != nil {
   302  		panic(fmt.Errorf("restoring the internal state should not have produced an error: %w", err))
   303  	}
   304  
   305  	return append(b, checksum[:]...)
   306  }
   307  
   308  // Reset resets the Hash to its initial state.
   309  func (h *hidriveHash) Reset() {
   310  	h.levels = nil
   311  	h.lastSumWritten = zeroSum // clear the last written checksum
   312  	h.bytesInBlock = 0
   313  	h.onlyNullBytesInBlock = true
   314  	h.blockHash = sha1.New()
   315  }
   316  
   317  // Size returns the number of bytes Sum will return.
   318  func (h *hidriveHash) Size() int {
   319  	return Size
   320  }
   321  
   322  // BlockSize returns the hash's underlying block size.
   323  // The Write method must be able to accept any amount
   324  // of data, but it may operate more efficiently if all writes
   325  // are a multiple of the block size.
   326  func (h *hidriveHash) BlockSize() int {
   327  	return BlockSize
   328  }
   329  
   330  // MarshalBinary encodes the hash into a binary form and returns the result.
   331  func (h *hidriveHash) MarshalBinary() ([]byte, error) {
   332  	b := make([]byte, Size+4+1+8)
   333  	copy(b, h.lastSumWritten[:])
   334  	binary.BigEndian.PutUint32(b[Size:], h.bytesInBlock)
   335  	if h.onlyNullBytesInBlock {
   336  		b[Size+4] = 1
   337  	}
   338  
   339  	binary.BigEndian.PutUint64(b[Size+4+1:], uint64(len(h.levels)))
   340  	for _, level := range h.levels {
   341  		encodedLevel, err := level.MarshalBinary()
   342  		if err != nil {
   343  			return nil, err
   344  		}
   345  		encodedLength := make([]byte, 8)
   346  		binary.BigEndian.PutUint64(encodedLength, uint64(len(encodedLevel)))
   347  		b = append(b, encodedLength...)
   348  		b = append(b, encodedLevel...)
   349  	}
   350  	encodedBlockHash, err := h.blockHash.(encoding.BinaryMarshaler).MarshalBinary()
   351  	if err != nil {
   352  		return nil, err
   353  	}
   354  	b = append(b, encodedBlockHash...)
   355  	return b, nil
   356  }
   357  
   358  // UnmarshalBinary decodes the binary form generated by MarshalBinary.
   359  // The hash will replace its internal state accordingly.
   360  func (h *hidriveHash) UnmarshalBinary(b []byte) error {
   361  	if len(b) < Size+4+1+8 {
   362  		return ErrorInvalidEncoding
   363  	}
   364  	copy(h.lastSumWritten[:], b)
   365  	h.bytesInBlock = binary.BigEndian.Uint32(b[Size:])
   366  	switch b[Size+4] {
   367  	case 0:
   368  		h.onlyNullBytesInBlock = false
   369  	case 1:
   370  		h.onlyNullBytesInBlock = true
   371  	default:
   372  		return ErrorInvalidEncoding
   373  	}
   374  
   375  	amount := binary.BigEndian.Uint64(b[Size+4+1:])
   376  	h.levels = make([]*level, int(amount))
   377  	offset := Size + 4 + 1 + 8
   378  	for i := range h.levels {
   379  		length := int(binary.BigEndian.Uint64(b[offset:]))
   380  		offset += 8
   381  		h.levels[i] = NewLevel().(*level)
   382  		err := h.levels[i].UnmarshalBinary(b[offset : offset+length])
   383  		if err != nil {
   384  			return err
   385  		}
   386  		offset += length
   387  	}
   388  	err := h.blockHash.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[offset:])
   389  	return err
   390  }
   391  
   392  // Sum returns the HiDrive checksum of the data.
   393  func Sum(data []byte) [Size]byte {
   394  	h := New().(*hidriveHash)
   395  	_, _ = h.Write(data)
   396  	var result [Size]byte
   397  	copy(result[:], h.Sum(nil))
   398  	return result
   399  }
   400  
   401  // Check the interfaces are satisfied.
   402  var (
   403  	_ hash.Hash                  = (*level)(nil)
   404  	_ encoding.BinaryMarshaler   = (*level)(nil)
   405  	_ encoding.BinaryUnmarshaler = (*level)(nil)
   406  	_ internal.LevelHash         = (*level)(nil)
   407  	_ hash.Hash                  = (*hidriveHash)(nil)
   408  	_ encoding.BinaryMarshaler   = (*hidriveHash)(nil)
   409  	_ encoding.BinaryUnmarshaler = (*hidriveHash)(nil)
   410  )