github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/go/hash/hash.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 // Package hash implements the hash function used throughout Noms. 6 // 7 // Noms serialization from version 4-onward uses the first 20 bytes of sha-512 for hashes. 8 // 9 // sha-512 was chosen because: 10 // 11 // - sha-1 is no longer recommended. 12 // - sha-3 is brand new, not a lot of platform support. 13 // - blake is not commonly used, not a lot of platform support. 14 // - within sha-2, sha-512 is faster than sha-256 on 64 bit. 15 // 16 // Our specific truncation scheme (first 20 bytes) was chosen because: 17 // 18 // - The "standard" truncation schemes are not widely supported. For example, at time of writing, there is no fast native implementation of sha512/256 on Node. 19 // - The smallest standard truncation of sha512 is 28 bytes, but we don't need this many. And because we are a database, the size of the hashes matters. Bigger hashes mean less data in each chunk, which means less tree fan-out, which means slower iteration and searching. 20 bytes is a good balance between collision resistance and wide trees. 20 // - 20 bytes leads to a nice round number of base32 digits: 32. 21 // 22 // The textual serialization of hashes uses big-endian base32 with the alphabet {0-9,a-v}. This scheme was chosen because: 23 // 24 // - It's easy to convert to and from base32 without bignum arithemetic. 25 // - No special chars: you can double-click to select in GUIs. 26 // - Sorted hashes will be sorted textually, making it easy to scan for humans. 27 // 28 // In Noms, the hash function is a component of the serialization version, which is constant over the entire lifetime of a single database. So clients do not need to worry about encountering multiple hash functions in the same database. 29 package hash 30 31 import ( 32 "bytes" 33 "crypto/sha512" 34 "fmt" 35 "regexp" 36 "strconv" 37 38 "github.com/attic-labs/noms/go/d" 39 ) 40 41 const ( 42 // ByteLen is the number of bytes used to represent the Hash. 43 ByteLen = 20 44 45 // StringLen is the number of characters need to represent the Hash using Base32. 46 StringLen = 32 // 20 * 8 / log2(32) 47 ) 48 49 var ( 50 pattern = regexp.MustCompile("^([0-9a-v]{" + strconv.Itoa(StringLen) + "})$") 51 emptyHash = Hash{} 52 ) 53 54 // Hash is used to represent the hash of a Noms Value. 55 type Hash [ByteLen]byte 56 57 // IsEmpty determines if this Hash is equal to the empty hash (all zeroes). 58 func (h Hash) IsEmpty() bool { 59 return h == emptyHash 60 } 61 62 // String returns a string representation of the hash using Base32 encoding. 63 func (h Hash) String() string { 64 return encode(h[:]) 65 } 66 67 // Of computes a new Hash from data. 68 func Of(data []byte) Hash { 69 r := sha512.Sum512(data) 70 h := Hash{} 71 copy(h[:], r[:ByteLen]) 72 return h 73 } 74 75 // New creates a new Hash backed by data, ensuring that data is an acceptable length. 76 func New(data []byte) Hash { 77 d.PanicIfFalse(len(data) == ByteLen) 78 h := Hash{} 79 copy(h[:], data) 80 return h 81 } 82 83 // MaybeParse parses a string representing a hash as a Base32 encoded byte array. 84 // If the string is not well formed then this returns (emptyHash, false). 85 func MaybeParse(s string) (Hash, bool) { 86 match := pattern.FindStringSubmatch(s) 87 if match == nil { 88 return emptyHash, false 89 } 90 return New(decode(s)), true 91 } 92 93 // Parse parses a string representing a hash as a Base32 encoded byte array. 94 // If the string is not well formed then this panics. 95 func Parse(s string) Hash { 96 r, ok := MaybeParse(s) 97 if !ok { 98 d.PanicIfError(fmt.Errorf("Cound not parse Hash: %s", s)) 99 } 100 return r 101 } 102 103 // Less compares two hashes returning whether this Hash is less than other. 104 func (h Hash) Less(other Hash) bool { 105 return bytes.Compare(h[:], other[:]) < 0 106 } 107 108 // Greater compares two hashes returning whether this Hash is greater than other. 109 func (h Hash) Greater(other Hash) bool { 110 // TODO: Remove this 111 return bytes.Compare(h[:], other[:]) > 0 112 } 113 114 // HashSet is a set of Hashes. 115 type HashSet map[Hash]struct{} 116 117 func NewHashSet(hashes ...Hash) HashSet { 118 out := make(HashSet, len(hashes)) 119 for _, h := range hashes { 120 out.Insert(h) 121 } 122 return out 123 } 124 125 // Insert adds a Hash to the set. 126 func (hs HashSet) Insert(hash Hash) { 127 hs[hash] = struct{}{} 128 } 129 130 // Has returns true if the HashSet contains hash. 131 func (hs HashSet) Has(hash Hash) (has bool) { 132 _, has = hs[hash] 133 return 134 } 135 136 // Remove removes hash from the HashSet. 137 func (hs HashSet) Remove(hash Hash) { 138 delete(hs, hash) 139 }