github.com/sercand/please@v13.4.0+incompatible/src/fs/hash.go (about) 1 package fs 2 3 import ( 4 "crypto/sha1" 5 "fmt" 6 "hash" 7 "io" 8 "os" 9 "path/filepath" 10 "strings" 11 "sync" 12 ) 13 14 // boolTrueHashValue is used when we need to write something indicating a bool in the input. 15 var boolTrueHashValue = []byte{2} 16 17 // A PathHasher is responsible for hashing & remembering paths. 18 type PathHasher struct { 19 memo map[string][]byte 20 mutex sync.RWMutex 21 root string 22 } 23 24 // NewPathHasher returns a new PathHasher based on the given root directory. 25 func NewPathHasher(root string) *PathHasher { 26 return &PathHasher{ 27 memo: map[string][]byte{}, 28 root: root, 29 } 30 } 31 32 // Hash hashes a single path. 33 // It is memoised and so will only hash each path once, unless recalc is true which will 34 // then force a recalculation of it. 35 func (hasher *PathHasher) Hash(path string, recalc bool) ([]byte, error) { 36 path = hasher.ensureRelative(path) 37 if !recalc { 38 hasher.mutex.RLock() 39 cached, present := hasher.memo[path] 40 hasher.mutex.RUnlock() 41 if present { 42 return cached, nil 43 } 44 } 45 result, err := hasher.hash(path) 46 if err == nil { 47 hasher.mutex.Lock() 48 hasher.memo[path] = result 49 hasher.mutex.Unlock() 50 } 51 return result, err 52 } 53 54 // MustHash is as Hash but panics on error. 55 func (hasher *PathHasher) MustHash(path string) []byte { 56 hash, err := hasher.Hash(path, false) 57 if err != nil { 58 panic(err) 59 } 60 return hash 61 } 62 63 // MoveHash is used when we move files from tmp to out and there was one there before; that's 64 // the only case in which the hash of a filepath could change. 65 func (hasher *PathHasher) MoveHash(oldPath, newPath string, copy bool) { 66 oldPath = hasher.ensureRelative(oldPath) 67 newPath = hasher.ensureRelative(newPath) 68 hasher.mutex.Lock() 69 defer hasher.mutex.Unlock() 70 if oldHash, present := hasher.memo[oldPath]; present { 71 hasher.memo[newPath] = oldHash 72 // If the path is in plz-out/tmp we aren't ever going to use it again, so free some space. 73 if !copy && strings.HasPrefix(oldPath, "plz-out/tmp") { 74 delete(hasher.memo, oldPath) 75 } 76 } 77 } 78 79 // SetHash is used to directly set a hash for a path. 80 // This is used for remote files where we download them & therefore know the hash as they come in. 81 // TODO(peterebden): We should probably use this more for things like caches and so forth... 82 func (hasher *PathHasher) SetHash(path string, hash []byte) { 83 hasher.mutex.Lock() 84 hasher.memo[path] = hash 85 hasher.mutex.Unlock() 86 } 87 88 func (hasher *PathHasher) hash(path string) ([]byte, error) { 89 h := sha1.New() 90 info, err := os.Lstat(path) 91 if err == nil && info.Mode()&os.ModeSymlink != 0 { 92 // Handle symlinks specially (don't attempt to read their contents). 93 dest, err := os.Readlink(path) 94 if err != nil { 95 return nil, err 96 } 97 // Write something arbitrary indicating this is a symlink. 98 // This isn't quite perfect - it could potentially get mixed up with a file with the 99 // appropriate contents, but that is not really likely. 100 h.Write(boolTrueHashValue) 101 if rel := hasher.ensureRelative(dest); (rel != dest || !filepath.IsAbs(dest)) && !filepath.IsAbs(path) { 102 // Inside the root of our repo so it's something we manage - just hash its (relative) destination 103 h.Write([]byte(rel)) 104 } else { 105 // Outside the repo; it's a system tool, so we hash its contents. 106 err := hasher.fileHash(h, path) 107 return h.Sum(nil), err 108 } 109 return h.Sum(nil), nil 110 } else if err == nil && info.IsDir() { 111 err = WalkMode(path, func(p string, isDir bool, mode os.FileMode) error { 112 if mode&os.ModeSymlink != 0 { 113 // Is a symlink, must verify that it's not a link outside the tmp dir. 114 deref, err := filepath.EvalSymlinks(p) 115 if err != nil { 116 return err 117 } 118 if !strings.HasPrefix(deref, path) { 119 return fmt.Errorf("Output %s links outside the build dir (to %s)", p, deref) 120 } 121 // Deliberately do not attempt to read it. We will read the contents later since 122 // it is a link within the temp dir anyway, and if it's a link to a directory 123 // it can introduce a cycle. 124 // Just write something to the hash indicating that we found something here, 125 // otherwise rules might be marked as unchanged if they added additional symlinks. 126 h.Write(boolTrueHashValue) 127 } else if !isDir { 128 return hasher.fileHash(h, p) 129 } 130 return nil 131 }) 132 } else { 133 err = hasher.fileHash(h, path) // let this handle any other errors 134 } 135 return h.Sum(nil), err 136 } 137 138 // Calculate the hash of a single file 139 func (hasher *PathHasher) fileHash(h hash.Hash, filename string) error { 140 file, err := os.Open(filename) 141 if err != nil { 142 return err 143 } 144 _, err = io.Copy(h, file) 145 file.Close() 146 return err 147 } 148 149 // ensureRelative ensures a path is relative to the repo root. 150 // This is important for getting best performance from memoizing the path hashes. 151 func (hasher *PathHasher) ensureRelative(path string) string { 152 if strings.HasPrefix(path, hasher.root) { 153 return strings.TrimLeft(strings.TrimPrefix(path, hasher.root), "/") 154 } 155 return path 156 }