github.com/sercand/please@v13.4.0+incompatible/src/fs/hash.go (about)

     1  package fs
     2  
     3  import (
     4  	"crypto/sha1"
     5  	"fmt"
     6  	"hash"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  )
    13  
    14  // boolTrueHashValue is used when we need to write something indicating a bool in the input.
    15  var boolTrueHashValue = []byte{2}
    16  
    17  // A PathHasher is responsible for hashing & remembering paths.
    18  type PathHasher struct {
    19  	memo  map[string][]byte
    20  	mutex sync.RWMutex
    21  	root  string
    22  }
    23  
    24  // NewPathHasher returns a new PathHasher based on the given root directory.
    25  func NewPathHasher(root string) *PathHasher {
    26  	return &PathHasher{
    27  		memo: map[string][]byte{},
    28  		root: root,
    29  	}
    30  }
    31  
    32  // Hash hashes a single path.
    33  // It is memoised and so will only hash each path once, unless recalc is true which will
    34  // then force a recalculation of it.
    35  func (hasher *PathHasher) Hash(path string, recalc bool) ([]byte, error) {
    36  	path = hasher.ensureRelative(path)
    37  	if !recalc {
    38  		hasher.mutex.RLock()
    39  		cached, present := hasher.memo[path]
    40  		hasher.mutex.RUnlock()
    41  		if present {
    42  			return cached, nil
    43  		}
    44  	}
    45  	result, err := hasher.hash(path)
    46  	if err == nil {
    47  		hasher.mutex.Lock()
    48  		hasher.memo[path] = result
    49  		hasher.mutex.Unlock()
    50  	}
    51  	return result, err
    52  }
    53  
    54  // MustHash is as Hash but panics on error.
    55  func (hasher *PathHasher) MustHash(path string) []byte {
    56  	hash, err := hasher.Hash(path, false)
    57  	if err != nil {
    58  		panic(err)
    59  	}
    60  	return hash
    61  }
    62  
    63  // MoveHash is used when we move files from tmp to out and there was one there before; that's
    64  // the only case in which the hash of a filepath could change.
    65  func (hasher *PathHasher) MoveHash(oldPath, newPath string, copy bool) {
    66  	oldPath = hasher.ensureRelative(oldPath)
    67  	newPath = hasher.ensureRelative(newPath)
    68  	hasher.mutex.Lock()
    69  	defer hasher.mutex.Unlock()
    70  	if oldHash, present := hasher.memo[oldPath]; present {
    71  		hasher.memo[newPath] = oldHash
    72  		// If the path is in plz-out/tmp we aren't ever going to use it again, so free some space.
    73  		if !copy && strings.HasPrefix(oldPath, "plz-out/tmp") {
    74  			delete(hasher.memo, oldPath)
    75  		}
    76  	}
    77  }
    78  
    79  // SetHash is used to directly set a hash for a path.
    80  // This is used for remote files where we download them & therefore know the hash as they come in.
    81  // TODO(peterebden): We should probably use this more for things like caches and so forth...
    82  func (hasher *PathHasher) SetHash(path string, hash []byte) {
    83  	hasher.mutex.Lock()
    84  	hasher.memo[path] = hash
    85  	hasher.mutex.Unlock()
    86  }
    87  
    88  func (hasher *PathHasher) hash(path string) ([]byte, error) {
    89  	h := sha1.New()
    90  	info, err := os.Lstat(path)
    91  	if err == nil && info.Mode()&os.ModeSymlink != 0 {
    92  		// Handle symlinks specially (don't attempt to read their contents).
    93  		dest, err := os.Readlink(path)
    94  		if err != nil {
    95  			return nil, err
    96  		}
    97  		// Write something arbitrary indicating this is a symlink.
    98  		// This isn't quite perfect - it could potentially get mixed up with a file with the
    99  		// appropriate contents, but that is not really likely.
   100  		h.Write(boolTrueHashValue)
   101  		if rel := hasher.ensureRelative(dest); (rel != dest || !filepath.IsAbs(dest)) && !filepath.IsAbs(path) {
   102  			// Inside the root of our repo so it's something we manage - just hash its (relative) destination
   103  			h.Write([]byte(rel))
   104  		} else {
   105  			// Outside the repo; it's a system tool, so we hash its contents.
   106  			err := hasher.fileHash(h, path)
   107  			return h.Sum(nil), err
   108  		}
   109  		return h.Sum(nil), nil
   110  	} else if err == nil && info.IsDir() {
   111  		err = WalkMode(path, func(p string, isDir bool, mode os.FileMode) error {
   112  			if mode&os.ModeSymlink != 0 {
   113  				// Is a symlink, must verify that it's not a link outside the tmp dir.
   114  				deref, err := filepath.EvalSymlinks(p)
   115  				if err != nil {
   116  					return err
   117  				}
   118  				if !strings.HasPrefix(deref, path) {
   119  					return fmt.Errorf("Output %s links outside the build dir (to %s)", p, deref)
   120  				}
   121  				// Deliberately do not attempt to read it. We will read the contents later since
   122  				// it is a link within the temp dir anyway, and if it's a link to a directory
   123  				// it can introduce a cycle.
   124  				// Just write something to the hash indicating that we found something here,
   125  				// otherwise rules might be marked as unchanged if they added additional symlinks.
   126  				h.Write(boolTrueHashValue)
   127  			} else if !isDir {
   128  				return hasher.fileHash(h, p)
   129  			}
   130  			return nil
   131  		})
   132  	} else {
   133  		err = hasher.fileHash(h, path) // let this handle any other errors
   134  	}
   135  	return h.Sum(nil), err
   136  }
   137  
   138  // Calculate the hash of a single file
   139  func (hasher *PathHasher) fileHash(h hash.Hash, filename string) error {
   140  	file, err := os.Open(filename)
   141  	if err != nil {
   142  		return err
   143  	}
   144  	_, err = io.Copy(h, file)
   145  	file.Close()
   146  	return err
   147  }
   148  
   149  // ensureRelative ensures a path is relative to the repo root.
   150  // This is important for getting best performance from memoizing the path hashes.
   151  func (hasher *PathHasher) ensureRelative(path string) string {
   152  	if strings.HasPrefix(path, hasher.root) {
   153  		return strings.TrimLeft(strings.TrimPrefix(path, hasher.root), "/")
   154  	}
   155  	return path
   156  }