github.com/derat/nup@v0.0.0-20230418113745-15592ba7c620/cmd/nup/update/scan.go (about) 1 // Copyright 2020 Daniel Erat. 2 // All rights reserved. 3 4 package update 5 6 import ( 7 "bufio" 8 "fmt" 9 "log" 10 "os" 11 "path/filepath" 12 "sort" 13 "syscall" 14 "time" 15 16 "github.com/derat/nup/cmd/nup/client" 17 "github.com/derat/nup/cmd/nup/client/files" 18 "github.com/derat/nup/server/db" 19 ) 20 21 const ( 22 // Maximum number of songs to read at once. This needs to not be too high to avoid 23 // running out of FDs when readSong calls block on computing gain adjustments (my system 24 // has a default soft limit of 1024 per "ulimit -Sn"), but it should also be high enough 25 // that we're processing songs from different albums simultaneously so we can run 26 // multiple copies of mp3gain in parallel on multicore systems. 27 maxScanWorkers = 64 28 29 logProgressInterval = 100 30 ) 31 32 // readSongList reads a list of relative (to cfg.MusicDir) paths from listPath 33 // and asynchronously sends the resulting Song structs to ch. 34 // The number of songs that will be sent to the channel is returned. 35 func readSongList(cfg *client.Config, listPath string, ch chan songOrErr, 36 opts *scanOptions) (numSongs int, err error) { 37 f, err := os.Open(listPath) 38 if err != nil { 39 return 0, err 40 } 41 defer f.Close() 42 43 // Read the list synchronously first to get the number of songs. 44 var paths []string // relative paths 45 sc := bufio.NewScanner(f) 46 for sc.Scan() { 47 paths = append(paths, sc.Text()) 48 } 49 if err := sc.Err(); err != nil { 50 return 0, err 51 } 52 53 gains, err := files.NewGainsCache(cfg, opts.dumpedGainsPath) 54 if err != nil { 55 return 0, err 56 } 57 58 // Now read the files asynchronously (but one at a time). 59 // TODO: Consider reading multiple songs simultaneously as in scanForUpdatedSongs 60 // so that gain calculation is parallelized. 61 go func() { 62 for _, rel := range paths { 63 full := filepath.Join(cfg.MusicDir, rel) 64 s, err := files.ReadSong(cfg, full, nil, 0, gains) 65 ch <- songOrErr{s, err} 66 } 67 }() 68 69 return len(paths), nil 70 } 71 72 // scanOptions contains options for scanForUpdatedSongs and readSongList. 73 // Some of the options aren't used by readSongList. 74 type scanOptions struct { 75 forceGlob string // glob matching files to update even if unchanged 76 logProgress bool // periodically log progress while scanning 77 dumpedGainsPath string // file with JSON-marshaled db.Song objects 78 } 79 80 // scanForUpdatedSongs looks for songs under cfg.MusicDir updated more recently than lastUpdateTime or 81 // in directories not listed in lastUpdateDirs and asynchronously sends the resulting Song structs 82 // to ch. The number of songs that will be sent to the channel and seen directories (relative to 83 // musicDir) are returned. 84 func scanForUpdatedSongs(cfg *client.Config, lastUpdateTime time.Time, lastUpdateDirs []string, 85 ch chan songOrErr, opts *scanOptions) (numUpdates int, seenDirs []string, err error) { 86 var numSongs int // total number of songs under cfg.MusicDir 87 88 oldDirs := make(map[string]struct{}, len(lastUpdateDirs)) 89 for _, d := range lastUpdateDirs { 90 oldDirs[d] = struct{}{} 91 } 92 newDirs := make(map[string]struct{}) 93 94 gains, err := files.NewGainsCache(cfg, opts.dumpedGainsPath) 95 if err != nil { 96 return 0, nil, err 97 } 98 99 workers := make(chan struct{}, maxScanWorkers) 100 if err := filepath.Walk(cfg.MusicDir, func(path string, fi os.FileInfo, err error) error { 101 if err != nil { 102 return err 103 } 104 if !fi.Mode().IsRegular() || !files.IsMusicPath(path) { 105 return nil 106 } 107 relPath, err := filepath.Rel(cfg.MusicDir, path) 108 if err != nil { 109 return fmt.Errorf("%q isn't subpath of %q: %v", path, cfg.MusicDir, err) 110 } 111 112 numSongs++ 113 if opts.logProgress && numSongs%logProgressInterval == 0 { 114 log.Printf("Scanned %v files", numSongs) 115 } 116 117 relDir := filepath.Dir(relPath) 118 newDirs[relDir] = struct{}{} 119 120 if opts.forceGlob != "" { 121 if matched, err := filepath.Match(opts.forceGlob, relPath); err != nil { 122 return fmt.Errorf("invalid glob %q: %v", opts.forceGlob, err) 123 } else if !matched { 124 return nil 125 } 126 } else { 127 // Bail out if the file isn't new and we saw its directory in the last update. 128 // We need to check for new directories to handle the situation described at 129 // https://github.com/derat/nup/issues/22 where a directory containing files 130 // with old timestamps is moved into the tree. 131 oldFile := fi.ModTime().Before(lastUpdateTime) && getCtime(fi).Before(lastUpdateTime) 132 _, oldDir := oldDirs[relDir] 133 134 // Handle old configs that don't include previously-seen directories. 135 if len(oldDirs) == 0 { 136 oldDir = true 137 } 138 139 // Also check if an updated metadata override file exists. 140 // TODO: If override files are also used to add synthetic songs for 141 // https://github.com/derat/nup/issues/32, then scanForUpdatedSongs will need to 142 // scan all of cfg.MetadataDir while also avoiding duplicate updates in the case 143 // where both the song file and the corresponding override file have been updated. 144 var newMetadata bool 145 if mp, err := files.MetadataOverridePath(cfg, relPath); err == nil { 146 if mfi, err := os.Stat(mp); err == nil { 147 // Avoid checking ctime since the metastore program doesn't seem to set it. 148 // TODO: This check is somewhat incorrect since it doesn't include the oldDirs 149 // trickiness used above for song files. More worryingly, a song won't be 150 // rescanned if its override file is deleted. I guess override files should be 151 // set to "{}" instead of being deleted. The only other alternative seems to be 152 // listing all known override files within cfg.LastUpdateInfoFile. 153 newMetadata = !mfi.ModTime().Before(lastUpdateTime) 154 } 155 } 156 157 if oldFile && oldDir && !newMetadata { 158 return nil 159 } 160 } 161 162 go func() { 163 // Avoid having too many parallel readSong calls, as we can run out of FDs. 164 // TODO: Find a better way to rate-limit this that also avoids creating so many 165 // simultaneous goroutines. Updating 10,000 songs brought my computer to its knees. 166 workers <- struct{}{} 167 s, err := files.ReadSong(cfg, path, fi, 0, gains) 168 <-workers 169 if err != nil && s == nil { 170 s = &db.Song{Filename: relPath} // return the filename for error reporting 171 } 172 ch <- songOrErr{s, err} 173 }() 174 175 numUpdates++ 176 return nil 177 }); err != nil { 178 return 0, nil, err 179 } 180 181 if opts.logProgress { 182 log.Printf("Found %v update(s) among %v files", numUpdates, numSongs) 183 } 184 for d := range newDirs { 185 seenDirs = append(seenDirs, d) 186 } 187 sort.Strings(seenDirs) 188 return numUpdates, seenDirs, nil 189 } 190 191 // getCtime returns fi's ctime (i.e. when its metadata was last changed). 192 func getCtime(fi os.FileInfo) time.Time { 193 stat := fi.Sys().(*syscall.Stat_t) 194 return time.Unix(int64(stat.Ctim.Sec), int64(stat.Ctim.Nsec)) 195 }