github.com/derat/nup@v0.0.0-20230418113745-15592ba7c620/cmd/nup/update/scan.go (about)

     1  // Copyright 2020 Daniel Erat.
     2  // All rights reserved.
     3  
     4  package update
     5  
     6  import (
     7  	"bufio"
     8  	"fmt"
     9  	"log"
    10  	"os"
    11  	"path/filepath"
    12  	"sort"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/derat/nup/cmd/nup/client"
    17  	"github.com/derat/nup/cmd/nup/client/files"
    18  	"github.com/derat/nup/server/db"
    19  )
    20  
    21  const (
    22  	// Maximum number of songs to read at once. This needs to not be too high to avoid
    23  	// running out of FDs when readSong calls block on computing gain adjustments (my system
    24  	// has a default soft limit of 1024 per "ulimit -Sn"), but it should also be high enough
    25  	// that we're processing songs from different albums simultaneously so we can run
    26  	// multiple copies of mp3gain in parallel on multicore systems.
    27  	maxScanWorkers = 64
    28  
    29  	logProgressInterval = 100
    30  )
    31  
    32  // readSongList reads a list of relative (to cfg.MusicDir) paths from listPath
    33  // and asynchronously sends the resulting Song structs to ch.
    34  // The number of songs that will be sent to the channel is returned.
    35  func readSongList(cfg *client.Config, listPath string, ch chan songOrErr,
    36  	opts *scanOptions) (numSongs int, err error) {
    37  	f, err := os.Open(listPath)
    38  	if err != nil {
    39  		return 0, err
    40  	}
    41  	defer f.Close()
    42  
    43  	// Read the list synchronously first to get the number of songs.
    44  	var paths []string // relative paths
    45  	sc := bufio.NewScanner(f)
    46  	for sc.Scan() {
    47  		paths = append(paths, sc.Text())
    48  	}
    49  	if err := sc.Err(); err != nil {
    50  		return 0, err
    51  	}
    52  
    53  	gains, err := files.NewGainsCache(cfg, opts.dumpedGainsPath)
    54  	if err != nil {
    55  		return 0, err
    56  	}
    57  
    58  	// Now read the files asynchronously (but one at a time).
    59  	// TODO: Consider reading multiple songs simultaneously as in scanForUpdatedSongs
    60  	// so that gain calculation is parallelized.
    61  	go func() {
    62  		for _, rel := range paths {
    63  			full := filepath.Join(cfg.MusicDir, rel)
    64  			s, err := files.ReadSong(cfg, full, nil, 0, gains)
    65  			ch <- songOrErr{s, err}
    66  		}
    67  	}()
    68  
    69  	return len(paths), nil
    70  }
    71  
    72  // scanOptions contains options for scanForUpdatedSongs and readSongList.
    73  // Some of the options aren't used by readSongList.
    74  type scanOptions struct {
    75  	forceGlob       string // glob matching files to update even if unchanged
    76  	logProgress     bool   // periodically log progress while scanning
    77  	dumpedGainsPath string // file with JSON-marshaled db.Song objects
    78  }
    79  
    80  // scanForUpdatedSongs looks for songs under cfg.MusicDir updated more recently than lastUpdateTime or
    81  // in directories not listed in lastUpdateDirs and asynchronously sends the resulting Song structs
    82  // to ch. The number of songs that will be sent to the channel and seen directories (relative to
    83  // musicDir) are returned.
    84  func scanForUpdatedSongs(cfg *client.Config, lastUpdateTime time.Time, lastUpdateDirs []string,
    85  	ch chan songOrErr, opts *scanOptions) (numUpdates int, seenDirs []string, err error) {
    86  	var numSongs int // total number of songs under cfg.MusicDir
    87  
    88  	oldDirs := make(map[string]struct{}, len(lastUpdateDirs))
    89  	for _, d := range lastUpdateDirs {
    90  		oldDirs[d] = struct{}{}
    91  	}
    92  	newDirs := make(map[string]struct{})
    93  
    94  	gains, err := files.NewGainsCache(cfg, opts.dumpedGainsPath)
    95  	if err != nil {
    96  		return 0, nil, err
    97  	}
    98  
    99  	workers := make(chan struct{}, maxScanWorkers)
   100  	if err := filepath.Walk(cfg.MusicDir, func(path string, fi os.FileInfo, err error) error {
   101  		if err != nil {
   102  			return err
   103  		}
   104  		if !fi.Mode().IsRegular() || !files.IsMusicPath(path) {
   105  			return nil
   106  		}
   107  		relPath, err := filepath.Rel(cfg.MusicDir, path)
   108  		if err != nil {
   109  			return fmt.Errorf("%q isn't subpath of %q: %v", path, cfg.MusicDir, err)
   110  		}
   111  
   112  		numSongs++
   113  		if opts.logProgress && numSongs%logProgressInterval == 0 {
   114  			log.Printf("Scanned %v files", numSongs)
   115  		}
   116  
   117  		relDir := filepath.Dir(relPath)
   118  		newDirs[relDir] = struct{}{}
   119  
   120  		if opts.forceGlob != "" {
   121  			if matched, err := filepath.Match(opts.forceGlob, relPath); err != nil {
   122  				return fmt.Errorf("invalid glob %q: %v", opts.forceGlob, err)
   123  			} else if !matched {
   124  				return nil
   125  			}
   126  		} else {
   127  			// Bail out if the file isn't new and we saw its directory in the last update.
   128  			// We need to check for new directories to handle the situation described at
   129  			// https://github.com/derat/nup/issues/22 where a directory containing files
   130  			// with old timestamps is moved into the tree.
   131  			oldFile := fi.ModTime().Before(lastUpdateTime) && getCtime(fi).Before(lastUpdateTime)
   132  			_, oldDir := oldDirs[relDir]
   133  
   134  			// Handle old configs that don't include previously-seen directories.
   135  			if len(oldDirs) == 0 {
   136  				oldDir = true
   137  			}
   138  
   139  			// Also check if an updated metadata override file exists.
   140  			// TODO: If override files are also used to add synthetic songs for
   141  			// https://github.com/derat/nup/issues/32, then scanForUpdatedSongs will need to
   142  			// scan all of cfg.MetadataDir while also avoiding duplicate updates in the case
   143  			// where both the song file and the corresponding override file have been updated.
   144  			var newMetadata bool
   145  			if mp, err := files.MetadataOverridePath(cfg, relPath); err == nil {
   146  				if mfi, err := os.Stat(mp); err == nil {
   147  					// Avoid checking ctime since the metastore program doesn't seem to set it.
   148  					// TODO: This check is somewhat incorrect since it doesn't include the oldDirs
   149  					// trickiness used above for song files. More worryingly, a song won't be
   150  					// rescanned if its override file is deleted. I guess override files should be
   151  					// set to "{}" instead of being deleted. The only other alternative seems to be
   152  					// listing all known override files within cfg.LastUpdateInfoFile.
   153  					newMetadata = !mfi.ModTime().Before(lastUpdateTime)
   154  				}
   155  			}
   156  
   157  			if oldFile && oldDir && !newMetadata {
   158  				return nil
   159  			}
   160  		}
   161  
   162  		go func() {
   163  			// Avoid having too many parallel readSong calls, as we can run out of FDs.
   164  			// TODO: Find a better way to rate-limit this that also avoids creating so many
   165  			// simultaneous goroutines. Updating 10,000 songs brought my computer to its knees.
   166  			workers <- struct{}{}
   167  			s, err := files.ReadSong(cfg, path, fi, 0, gains)
   168  			<-workers
   169  			if err != nil && s == nil {
   170  				s = &db.Song{Filename: relPath} // return the filename for error reporting
   171  			}
   172  			ch <- songOrErr{s, err}
   173  		}()
   174  
   175  		numUpdates++
   176  		return nil
   177  	}); err != nil {
   178  		return 0, nil, err
   179  	}
   180  
   181  	if opts.logProgress {
   182  		log.Printf("Found %v update(s) among %v files", numUpdates, numSongs)
   183  	}
   184  	for d := range newDirs {
   185  		seenDirs = append(seenDirs, d)
   186  	}
   187  	sort.Strings(seenDirs)
   188  	return numUpdates, seenDirs, nil
   189  }
   190  
   191  // getCtime returns fi's ctime (i.e. when its metadata was last changed).
   192  func getCtime(fi os.FileInfo) time.Time {
   193  	stat := fi.Sys().(*syscall.Stat_t)
   194  	return time.Unix(int64(stat.Ctim.Sec), int64(stat.Ctim.Nsec))
   195  }