github.com/artpar/rclone@v1.67.3/backend/hidrive/hidrivehash/hidrivehash.go (about) 1 // Package hidrivehash implements the HiDrive hashing algorithm which combines SHA-1 hashes hierarchically to a single top-level hash. 2 // 3 // Note: This implementation does not grant access to any partial hashes generated. 4 // 5 // See: https://developer.hidrive.com/wp-content/uploads/2021/07/HiDrive_Synchronization-v3.3-rev28.pdf 6 // (link to newest version: https://static.hidrive.com/dev/0001) 7 package hidrivehash 8 9 import ( 10 "bytes" 11 "crypto/sha1" 12 "encoding" 13 "encoding/binary" 14 "errors" 15 "fmt" 16 "hash" 17 "io" 18 19 "github.com/artpar/rclone/backend/hidrive/hidrivehash/internal" 20 ) 21 22 const ( 23 // BlockSize of the checksum in bytes. 24 BlockSize = 4096 25 // Size of the checksum in bytes. 26 Size = sha1.Size 27 // sumsPerLevel is the number of checksums 28 sumsPerLevel = 256 29 ) 30 31 var ( 32 // zeroSum is a special hash consisting of 20 null-bytes. 33 // This will be the hash of any empty file (or ones containing only null-bytes). 34 zeroSum = [Size]byte{} 35 // ErrorInvalidEncoding is returned when a hash should be decoded from a binary form that is invalid. 36 ErrorInvalidEncoding = errors.New("encoded binary form is invalid for this hash") 37 // ErrorHashFull is returned when a hash reached its capacity and cannot accept any more input. 38 ErrorHashFull = errors.New("hash reached its capacity") 39 ) 40 41 // writeByBlock writes len(p) bytes from p to the io.Writer in blocks of size blockSize. 42 // It returns the number of bytes written from p (0 <= n <= len(p)) 43 // and any error encountered that caused the write to stop early. 44 // 45 // A pointer bytesInBlock to a counter needs to be supplied, 46 // that is used to keep track how many bytes have been written to the writer already. 47 // A pointer onlyNullBytesInBlock to a boolean needs to be supplied, 48 // that is used to keep track whether the block so far only consists of null-bytes. 49 // The callback onBlockWritten is called whenever a full block has been written to the writer 50 // and is given as input the number of bytes that still need to be written. 51 func writeByBlock(p []byte, writer io.Writer, blockSize uint32, bytesInBlock *uint32, onlyNullBytesInBlock *bool, onBlockWritten func(remaining int) error) (n int, err error) { 52 total := len(p) 53 nullBytes := make([]byte, blockSize) 54 for len(p) > 0 { 55 toWrite := int(blockSize - *bytesInBlock) 56 if toWrite > len(p) { 57 toWrite = len(p) 58 } 59 c, err := writer.Write(p[:toWrite]) 60 *bytesInBlock += uint32(c) 61 *onlyNullBytesInBlock = *onlyNullBytesInBlock && bytes.Equal(nullBytes[:toWrite], p[:toWrite]) 62 // Discard data written through a reslice 63 p = p[c:] 64 if err != nil { 65 return total - len(p), err 66 } 67 if *bytesInBlock == blockSize { 68 err = onBlockWritten(len(p)) 69 if err != nil { 70 return total - len(p), err 71 } 72 *bytesInBlock = 0 73 *onlyNullBytesInBlock = true 74 } 75 } 76 return total, nil 77 } 78 79 // level is a hash.Hash that is used to aggregate the checksums produced by the level hierarchically beneath it. 80 // It is used to represent any level-n hash, except for level-0. 81 type level struct { 82 checksum [Size]byte // aggregated checksum of this level 83 sumCount uint32 // number of sums contained in this level so far 84 bytesInHasher uint32 // number of bytes written into hasher so far 85 onlyNullBytesInHasher bool // whether the hasher only contains null-bytes so far 86 hasher hash.Hash 87 } 88 89 // NewLevel returns a new hash.Hash computing any level-n hash, except level-0. 90 func NewLevel() hash.Hash { 91 l := &level{} 92 l.Reset() 93 return l 94 } 95 96 // Add takes a position-embedded SHA-1 checksum and adds it to the level. 97 func (l *level) Add(sha1sum []byte) { 98 var tmp uint 99 var carry bool 100 for i := Size - 1; i >= 0; i-- { 101 tmp = uint(sha1sum[i]) + uint(l.checksum[i]) 102 if carry { 103 tmp++ 104 } 105 carry = tmp > 255 106 l.checksum[i] = byte(tmp) 107 } 108 } 109 110 // IsFull returns whether the number of checksums added to this level reached its capacity. 111 func (l *level) IsFull() bool { 112 return l.sumCount >= sumsPerLevel 113 } 114 115 // Write (via the embedded io.Writer interface) adds more data to the running hash. 116 // Contrary to the specification from hash.Hash, this DOES return an error, 117 // specifically ErrorHashFull if and only if IsFull() returns true. 118 func (l *level) Write(p []byte) (n int, err error) { 119 if l.IsFull() { 120 return 0, ErrorHashFull 121 } 122 onBlockWritten := func(remaining int) error { 123 if !l.onlyNullBytesInHasher { 124 c, err := l.hasher.Write([]byte{byte(l.sumCount)}) 125 l.bytesInHasher += uint32(c) 126 if err != nil { 127 return err 128 } 129 l.Add(l.hasher.Sum(nil)) 130 } 131 l.sumCount++ 132 l.hasher.Reset() 133 if remaining > 0 && l.IsFull() { 134 return ErrorHashFull 135 } 136 return nil 137 } 138 return writeByBlock(p, l.hasher, uint32(l.BlockSize()), &l.bytesInHasher, &l.onlyNullBytesInHasher, onBlockWritten) 139 } 140 141 // Sum appends the current hash to b and returns the resulting slice. 142 // It does not change the underlying hash state. 143 func (l *level) Sum(b []byte) []byte { 144 return append(b, l.checksum[:]...) 145 } 146 147 // Reset resets the Hash to its initial state. 148 func (l *level) Reset() { 149 l.checksum = zeroSum // clear the current checksum 150 l.sumCount = 0 151 l.bytesInHasher = 0 152 l.onlyNullBytesInHasher = true 153 l.hasher = sha1.New() 154 } 155 156 // Size returns the number of bytes Sum will return. 157 func (l *level) Size() int { 158 return Size 159 } 160 161 // BlockSize returns the hash's underlying block size. 162 // The Write method must be able to accept any amount 163 // of data, but it may operate more efficiently if all writes 164 // are a multiple of the block size. 165 func (l *level) BlockSize() int { 166 return Size 167 } 168 169 // MarshalBinary encodes the hash into a binary form and returns the result. 170 func (l *level) MarshalBinary() ([]byte, error) { 171 b := make([]byte, Size+4+4+1) 172 copy(b, l.checksum[:]) 173 binary.BigEndian.PutUint32(b[Size:], l.sumCount) 174 binary.BigEndian.PutUint32(b[Size+4:], l.bytesInHasher) 175 if l.onlyNullBytesInHasher { 176 b[Size+4+4] = 1 177 } 178 encodedHasher, err := l.hasher.(encoding.BinaryMarshaler).MarshalBinary() 179 if err != nil { 180 return nil, err 181 } 182 b = append(b, encodedHasher...) 183 return b, nil 184 } 185 186 // UnmarshalBinary decodes the binary form generated by MarshalBinary. 187 // The hash will replace its internal state accordingly. 188 func (l *level) UnmarshalBinary(b []byte) error { 189 if len(b) < Size+4+4+1 { 190 return ErrorInvalidEncoding 191 } 192 copy(l.checksum[:], b) 193 l.sumCount = binary.BigEndian.Uint32(b[Size:]) 194 l.bytesInHasher = binary.BigEndian.Uint32(b[Size+4:]) 195 switch b[Size+4+4] { 196 case 0: 197 l.onlyNullBytesInHasher = false 198 case 1: 199 l.onlyNullBytesInHasher = true 200 default: 201 return ErrorInvalidEncoding 202 } 203 err := l.hasher.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[Size+4+4+1:]) 204 return err 205 } 206 207 // hidriveHash is the hash computing the actual checksum used by HiDrive by combining multiple level-hashes. 208 type hidriveHash struct { 209 levels []*level // collection of level-hashes, one for each level starting at level-1 210 lastSumWritten [Size]byte // the last checksum written to any of the levels 211 bytesInBlock uint32 // bytes written into blockHash so far 212 onlyNullBytesInBlock bool // whether the hasher only contains null-bytes so far 213 blockHash hash.Hash 214 } 215 216 // New returns a new hash.Hash computing the HiDrive checksum. 217 func New() hash.Hash { 218 h := &hidriveHash{} 219 h.Reset() 220 return h 221 } 222 223 // aggregateToLevel writes the checksum to the level at the given index 224 // and if necessary propagates any changes to levels above. 225 func (h *hidriveHash) aggregateToLevel(index int, sum []byte) { 226 for i := index; ; i++ { 227 if i >= len(h.levels) { 228 h.levels = append(h.levels, NewLevel().(*level)) 229 } 230 _, err := h.levels[i].Write(sum) 231 copy(h.lastSumWritten[:], sum) 232 if err != nil { 233 panic(fmt.Errorf("level-hash should not have produced an error: %w", err)) 234 } 235 if !h.levels[i].IsFull() { 236 break 237 } 238 sum = h.levels[i].Sum(nil) 239 h.levels[i].Reset() 240 } 241 } 242 243 // Write (via the embedded io.Writer interface) adds more data to the running hash. 244 // It never returns an error. 245 func (h *hidriveHash) Write(p []byte) (n int, err error) { 246 onBlockWritten := func(remaining int) error { 247 var sum []byte 248 if h.onlyNullBytesInBlock { 249 sum = zeroSum[:] 250 } else { 251 sum = h.blockHash.Sum(nil) 252 } 253 h.blockHash.Reset() 254 h.aggregateToLevel(0, sum) 255 return nil 256 } 257 return writeByBlock(p, h.blockHash, uint32(BlockSize), &h.bytesInBlock, &h.onlyNullBytesInBlock, onBlockWritten) 258 } 259 260 // Sum appends the current hash to b and returns the resulting slice. 261 // It does not change the underlying hash state. 262 func (h *hidriveHash) Sum(b []byte) []byte { 263 // Save internal state. 264 state, err := h.MarshalBinary() 265 if err != nil { 266 panic(fmt.Errorf("saving the internal state should not have produced an error: %w", err)) 267 } 268 269 if h.bytesInBlock > 0 { 270 // Fill remainder of block with null-bytes. 271 filler := make([]byte, h.BlockSize()-int(h.bytesInBlock)) 272 _, err = h.Write(filler) 273 if err != nil { 274 panic(fmt.Errorf("filling with null-bytes should not have an error: %w", err)) 275 } 276 } 277 278 checksum := zeroSum 279 for i := 0; i < len(h.levels); i++ { 280 level := h.levels[i] 281 if i < len(h.levels)-1 { 282 // Aggregate non-empty non-final levels. 283 if level.sumCount >= 1 { 284 h.aggregateToLevel(i+1, level.Sum(nil)) 285 level.Reset() 286 } 287 } else { 288 // Determine sum of final level. 289 if level.sumCount > 1 { 290 copy(checksum[:], level.Sum(nil)) 291 } else { 292 // This is needed, otherwise there is no way to return 293 // the non-position-embedded checksum. 294 checksum = h.lastSumWritten 295 } 296 } 297 } 298 299 // Restore internal state. 300 err = h.UnmarshalBinary(state) 301 if err != nil { 302 panic(fmt.Errorf("restoring the internal state should not have produced an error: %w", err)) 303 } 304 305 return append(b, checksum[:]...) 306 } 307 308 // Reset resets the Hash to its initial state. 309 func (h *hidriveHash) Reset() { 310 h.levels = nil 311 h.lastSumWritten = zeroSum // clear the last written checksum 312 h.bytesInBlock = 0 313 h.onlyNullBytesInBlock = true 314 h.blockHash = sha1.New() 315 } 316 317 // Size returns the number of bytes Sum will return. 318 func (h *hidriveHash) Size() int { 319 return Size 320 } 321 322 // BlockSize returns the hash's underlying block size. 323 // The Write method must be able to accept any amount 324 // of data, but it may operate more efficiently if all writes 325 // are a multiple of the block size. 326 func (h *hidriveHash) BlockSize() int { 327 return BlockSize 328 } 329 330 // MarshalBinary encodes the hash into a binary form and returns the result. 331 func (h *hidriveHash) MarshalBinary() ([]byte, error) { 332 b := make([]byte, Size+4+1+8) 333 copy(b, h.lastSumWritten[:]) 334 binary.BigEndian.PutUint32(b[Size:], h.bytesInBlock) 335 if h.onlyNullBytesInBlock { 336 b[Size+4] = 1 337 } 338 339 binary.BigEndian.PutUint64(b[Size+4+1:], uint64(len(h.levels))) 340 for _, level := range h.levels { 341 encodedLevel, err := level.MarshalBinary() 342 if err != nil { 343 return nil, err 344 } 345 encodedLength := make([]byte, 8) 346 binary.BigEndian.PutUint64(encodedLength, uint64(len(encodedLevel))) 347 b = append(b, encodedLength...) 348 b = append(b, encodedLevel...) 349 } 350 encodedBlockHash, err := h.blockHash.(encoding.BinaryMarshaler).MarshalBinary() 351 if err != nil { 352 return nil, err 353 } 354 b = append(b, encodedBlockHash...) 355 return b, nil 356 } 357 358 // UnmarshalBinary decodes the binary form generated by MarshalBinary. 359 // The hash will replace its internal state accordingly. 360 func (h *hidriveHash) UnmarshalBinary(b []byte) error { 361 if len(b) < Size+4+1+8 { 362 return ErrorInvalidEncoding 363 } 364 copy(h.lastSumWritten[:], b) 365 h.bytesInBlock = binary.BigEndian.Uint32(b[Size:]) 366 switch b[Size+4] { 367 case 0: 368 h.onlyNullBytesInBlock = false 369 case 1: 370 h.onlyNullBytesInBlock = true 371 default: 372 return ErrorInvalidEncoding 373 } 374 375 amount := binary.BigEndian.Uint64(b[Size+4+1:]) 376 h.levels = make([]*level, int(amount)) 377 offset := Size + 4 + 1 + 8 378 for i := range h.levels { 379 length := int(binary.BigEndian.Uint64(b[offset:])) 380 offset += 8 381 h.levels[i] = NewLevel().(*level) 382 err := h.levels[i].UnmarshalBinary(b[offset : offset+length]) 383 if err != nil { 384 return err 385 } 386 offset += length 387 } 388 err := h.blockHash.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[offset:]) 389 return err 390 } 391 392 // Sum returns the HiDrive checksum of the data. 393 func Sum(data []byte) [Size]byte { 394 h := New().(*hidriveHash) 395 _, _ = h.Write(data) 396 var result [Size]byte 397 copy(result[:], h.Sum(nil)) 398 return result 399 } 400 401 // Check the interfaces are satisfied. 402 var ( 403 _ hash.Hash = (*level)(nil) 404 _ encoding.BinaryMarshaler = (*level)(nil) 405 _ encoding.BinaryUnmarshaler = (*level)(nil) 406 _ internal.LevelHash = (*level)(nil) 407 _ hash.Hash = (*hidriveHash)(nil) 408 _ encoding.BinaryMarshaler = (*hidriveHash)(nil) 409 _ encoding.BinaryUnmarshaler = (*hidriveHash)(nil) 410 )