github.com/aretext/aretext@v1.3.0/file/watcher.go (about)

     1  package file
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"io/fs"
     8  	"log"
     9  	"os"
    10  	"sync"
    11  	"time"
    12  )
    13  
    14  const DefaultPollInterval = time.Second
    15  
    16  // Watcher checks if a file's contents have changed.
    17  type Watcher struct {
    18  	// These fields are immutable, so they can be read safely from any goroutine.
    19  	path      string
    20  	isNewFile bool
    21  	size      int64
    22  	checksum  string
    23  
    24  	// After the watcher is constructed, this field is read and written
    25  	// only by the watcher goroutine.
    26  	lastModified time.Time
    27  
    28  	changedChan chan struct{}
    29  	quitChan    chan struct{}
    30  	stopOnce    sync.Once
    31  }
    32  
    33  // NewWatcherForNewFile returns a watcher for a file that does not yet exist on disk.
    34  func NewWatcherForNewFile(pollInterval time.Duration, path string) *Watcher {
    35  	w := &Watcher{
    36  		path:        path,
    37  		isNewFile:   true,
    38  		changedChan: make(chan struct{}),
    39  		quitChan:    make(chan struct{}),
    40  	}
    41  	go w.checkFileLoop(pollInterval)
    42  	return w
    43  }
    44  
    45  // NewWatcherForExistingFile returns a watcher for a file that exists on disk.
    46  // lastModified is the time the file was last modified, as reported when the file was loaded.
    47  // size is the size in bytes of the file when it was loaded.
    48  // checksum is an MD5 hash of the file's contents when it was loaded.
    49  func NewWatcherForExistingFile(pollInterval time.Duration, path string, lastModified time.Time, size int64, checksum string) *Watcher {
    50  	w := &Watcher{
    51  		path:         path,
    52  		size:         size,
    53  		lastModified: lastModified,
    54  		checksum:     checksum,
    55  		changedChan:  make(chan struct{}),
    56  		quitChan:     make(chan struct{}),
    57  	}
    58  	go w.checkFileLoop(pollInterval)
    59  	return w
    60  }
    61  
    62  // NewEmptyWatcher returns a watcher that has an empty path and never triggers.
    63  func NewEmptyWatcher() *Watcher {
    64  	return &Watcher{changedChan: make(chan struct{})}
    65  }
    66  
    67  // Path returns the path to the file being watched.
    68  func (w *Watcher) Path() string {
    69  	return w.path
    70  }
    71  
    72  // Stop stops the watcher from checking for changes.
    73  func (w *Watcher) Stop() {
    74  	w.stopOnce.Do(func() {
    75  		if w.quitChan != nil {
    76  			log.Printf("Stopping file watcher for %s...\n", w.path)
    77  			close(w.quitChan)
    78  		}
    79  	})
    80  }
    81  
    82  // CheckFileMovedOrDeleted checks whether the file used to exist
    83  // at the path but has since been moved or deleted.
    84  func (w *Watcher) CheckFileMovedOrDeleted() (bool, error) {
    85  	if w.isNewFile {
    86  		// File has not been created yet, so it can't have been moved or deleted.
    87  		return false, nil
    88  	}
    89  
    90  	_, err := os.Stat(w.path)
    91  	if err != nil {
    92  		if errors.Is(err, fs.ErrNotExist) {
    93  			// File used to exist, but no longer exists at the path,
    94  			// so it must have been moved or deleted.
    95  			return true, nil
    96  		}
    97  
    98  		return false, fmt.Errorf("os.Stat: %w", err)
    99  	}
   100  
   101  	// File still exists at the path.
   102  	return false, nil
   103  }
   104  
   105  // CheckFileContentsChanged checks whether the file's checksum has changed.
   106  // If the file no longer exists, this will return an error.
   107  func (w *Watcher) CheckFileContentsChanged() (bool, error) {
   108  	checksum, err := w.calculateChecksum()
   109  	if err != nil {
   110  		return false, err
   111  	}
   112  	changed := checksum != w.checksum
   113  	return changed, nil
   114  }
   115  
   116  // ChangedChan returns a channel that receives a message when the file's contents change.
   117  // This can produce false negatives if an error occurs accessing the file (for example, if file permissions changed).
   118  // The channel will receive at most one message.
   119  // This method is thread-safe.
   120  func (w *Watcher) ChangedChan() chan struct{} {
   121  	return w.changedChan
   122  }
   123  
   124  func (w *Watcher) checkFileLoop(pollInterval time.Duration) {
   125  	log.Printf("Started file watcher for %s\n", w.path)
   126  	ticker := time.NewTicker(pollInterval)
   127  	defer ticker.Stop()
   128  	for {
   129  		select {
   130  		case <-ticker.C:
   131  			if w.checkFileChanged() {
   132  				log.Printf("File change detected in %s\n", w.path)
   133  				w.changedChan <- struct{}{}
   134  				return
   135  			}
   136  		case <-w.quitChan:
   137  			log.Printf("Quit channel closed, exiting check file loop for %s\n", w.path)
   138  			return
   139  		}
   140  	}
   141  }
   142  
   143  func (w *Watcher) checkFileChanged() bool {
   144  	fileInfo, err := os.Stat(w.path)
   145  	if err != nil {
   146  		if !errors.Is(err, fs.ErrNotExist) {
   147  			log.Printf("Error retrieving file info: %v\n", err)
   148  		}
   149  		return false
   150  	}
   151  
   152  	// If neither mtime or size changed since the last check or file load, the contents probably haven't changed.
   153  	// This check could produce a false negative if someone modifies the file immediately after loading it (within mtime granularity)
   154  	// and replaces bytes without changing the size, but it's so much cheaper than calculating the md5 checksum that we do it anyway.
   155  	// It is safe to read lastModified and size because no other goroutine mutates these.
   156  	if w.lastModified.Equal(fileInfo.ModTime()) && w.size == fileInfo.Size() {
   157  		return false
   158  	}
   159  
   160  	// It is possible for someone to update the file's last modified time without changing the contents.
   161  	// If that happens, we want to avoid calculating the checksum on every poll, so update the watcher's lastModified time.
   162  	// It is safe to modify lastModified because the check file loop goroutine is the only reader.
   163  	w.lastModified = fileInfo.ModTime()
   164  
   165  	checksum, err := w.calculateChecksum()
   166  	if err != nil {
   167  		log.Printf("Could not checksum file: %v\n", err)
   168  		return false
   169  	}
   170  
   171  	return checksum != w.checksum
   172  }
   173  
   174  func (w *Watcher) calculateChecksum() (string, error) {
   175  	f, err := os.Open(w.path)
   176  	if err != nil {
   177  		return "", fmt.Errorf("os.Open: %w", err)
   178  	}
   179  	defer f.Close()
   180  
   181  	checksummer := NewChecksummer()
   182  	if _, err := io.Copy(checksummer, f); err != nil {
   183  		return "", fmt.Errorf("io.Copy: %w", err)
   184  	}
   185  
   186  	return checksummer.Checksum(), nil
   187  }