github.com/haraldrudell/parl@v0.4.176/pfs/traverser.go (about) 1 /* 2 © 2023–present Harald Rudell <harald.rudell@gmail.com> (https://haraldrudell.github.io/haraldrudell/) 3 ISC License 4 */ 5 6 // Package pfs provides a symlink-following file-systemtraverser and other file-system functions. 7 package pfs 8 9 import ( 10 "io/fs" 11 "path/filepath" 12 "strings" 13 "sync/atomic" 14 15 "github.com/haraldrudell/parl/perrors" 16 "github.com/haraldrudell/parl/pslices" 17 ) 18 19 const ( 20 // platform path separator as a string 21 sSep = string(filepath.Separator) 22 // [os.File.ReadDir] get all names 23 allNamesAtOnce = -1 24 ) 25 26 // Traverser represents a file system that is scanned following symlinks 27 // - each file system entry is returned exactly once except: 28 // - — error reading a directory returns the directory a second time 29 // - directories and symlinks are returned before they are read so that they can be 30 // more efficiently skipped by invoking [ResultEntry.Skip] 31 // - directory entries are returned in 8-bit character order 32 // - returned entries may not exist, such entries have [ResultEntry.Err] non-nil 33 // - result.ProvidedPath is based on the initial path and may be relative, 34 // unclean and contain symlinks 35 // - if [ResultEntry.Err] is nil, Abs is absolute, symlink-free clean path 36 // - — 37 // - ResultEntry.DirEntry.Info typically invokes [os.Lstat] every time, 38 // so this value should be cached 39 // - because symlinks may point to parents or separate trees, 40 // the file system scan may involve multiple roots which may 41 // affect the order of return entries 42 // - symlinks are followed and not returned. 43 // Therefore, a symlink pointing to a scanned location is effectively ignored 44 // - the returned struct is by value. If its address is not taken, 45 // no allocation will occur 46 type Traverser struct { 47 // path provided to new-function for the initial root 48 initialPath string 49 // skipNo provides a serial number for returned directories 50 skipNo atomic.Uint64 51 // skippables holds pending skippables 52 skippables []ResultEntry 53 // collection of skippables marked to be skipped 54 skipMap map[uint64]struct{} 55 // basenames from read directories to be processed 56 dirEntries []dirEntry 57 // index in rootsRegistry being traversed 58 rootIndex int 59 // registry of the absolute paths for each encountered root 60 // - key: absolute, symlink-free, clean path 61 rootsRegistry Registry[Root2] 62 // obsoleteRoots were obsoleted by a symlink pointing to 63 // a parent directory 64 // - these roots will be encountered during traversal 65 obsoleteRoots Registry[Root2] 66 } 67 68 // dirEntry is a value-container for a read directory entry 69 // - [os.File.ReadDir] returns dirEntry with deferred [fs.FileInfo] 70 type dirEntry struct { 71 abs, providedPath string 72 dirEntry fs.DirEntry 73 } 74 75 // NewTraverser returns a file-system traverser 76 // - typically used via [pfs.Iterator] or [pfs.DirIterator] 77 // - path is the initial path. 78 // Path may be relative or absolute, contain symlinks and be unclean. 79 // Path may be of any modeType: file, directory or special file. 80 // Empty string means process’ current directory 81 // - the Next method is used to obtain file-system entries and errors 82 // - consider using pfs iterators: 83 // - — [Iterator] for all entries and errors 84 // - — [DirIterator] for error-free directories 85 // 86 // Usage: 87 // 88 // var traverser = pfs.NewTraverser(path) 89 // for { 90 // var result = traverser.Next() 91 // if result.IsEnd() || result.Err != nil { 92 // break 93 // } 94 // println(result.Abs) 95 // } 96 func NewTraverser(path string) (traverser *Traverser) { 97 return &Traverser{ 98 initialPath: path, 99 skipMap: make(map[uint64]struct{}), 100 rootsRegistry: *NewRegistry[Root2](), 101 obsoleteRoots: *NewRegistry[Root2](), 102 } 103 } 104 105 // skip marks no for skipping 106 func (t *Traverser) skip(no uint64) { t.skipMap[no] = struct{}{} } 107 108 // skipCheck returns true if no is marked for skipping 109 func (t *Traverser) skipCheck(no uint64) (skip bool) { 110 if _, skip = t.skipMap[no]; !skip { 111 return 112 } 113 delete(t.skipMap, no) 114 115 return 116 } 117 118 // createInitialRoot returns the createInitialRoot entry and creates and registers its root 119 // - entry is non-nil and may be symbolic link 120 // - entry has ProvidedPath and DirEntry 121 // - if entry.Err is nil, Abs and Name/IsDir/Type/Info are available 122 func (t *Traverser) createInitialRoot() (entry ResultEntry) { 123 124 // create a root for path provided to NewTree2 125 var root = NewRoot2(t.initialPath) 126 t.initialPath = "" 127 128 // load absolute, symlink-free, clean path 129 // - errors if [os.Getwd] or [os.Readlink] fails 130 if entry.Err = root.Load(); entry.Err == nil { 131 // the root is usable 132 t.rootsRegistry.Add(root.Abs, root) 133 } 134 135 var err error 136 // modeType is required to examine the entry 137 // - it is not available, so [os.Lstat] and [os.Stat] must be invoked 138 // - start with Lstat to see if it is a symlink 139 entry.ProvidedPath = root.ProvidedPath 140 entry.Abs = root.Abs 141 if entry.Abs != "" { 142 entry.DirEntry, entry.Err = AddDirEntry(entry.Abs) 143 } else { 144 // provide best-effort DirEntry 145 if entry.DirEntry, err = AddDirEntry(entry.ProvidedPath); err != nil { 146 entry.Err = perrors.AppendError(entry.Err, err) 147 } 148 } 149 150 // if Lstat failed, use a deferred-error dirEntry 151 if entry.DirEntry == nil { 152 entry.DirEntry = NewDeferringDirEntry(entry.ProvidedPath) 153 } 154 155 return 156 } 157 158 // processSymlink checks for new or obsoleted roots from a symlink 159 func (t *Traverser) processSymlink(absTarget string) { 160 161 // check for exact match to existing root 162 if t.rootsRegistry.HasAbs(absTarget) { 163 return // symlink matches existing root: ignore it 164 } 165 166 // match absTarget against existing roots 167 var length = t.rootsRegistry.ListLength() 168 for i := 0; i < length; i++ { 169 170 // iterate over roots 171 var root = t.rootsRegistry.GetValue(i) 172 if root == nil { 173 continue // a discarded root 174 } 175 var rootAbs = root.Abs + sSep 176 var targetAbs = absTarget + sSep 177 178 // if absTarget is a subdirectory of an existing root, it can be ignored 179 if strings.HasPrefix(targetAbs, rootAbs) { 180 return // symlink is a sub-entry of an existing root: ignore it 181 } 182 183 // if root is not a subdirectory of absTarget, check the next root 184 if !strings.HasPrefix(rootAbs, targetAbs) { 185 continue 186 } 187 188 // root is a subdirectory of this symlink, obsolete the root 189 if i <= t.rootIndex { 190 // the obsolete root was already being traversed 191 // - save it 192 t.obsoleteRoots.Add(root.Abs, root) 193 } 194 t.rootsRegistry.ObsoleteIndex(i) 195 } 196 // the symlink is disparate from all existing roots 197 198 // scan as new root 199 var root = NewAbsRoot2(absTarget) 200 t.rootsRegistry.Add(absTarget, root) 201 } 202 203 // readDir reads a directory and adds entries to t.dirEntries 204 func (t *Traverser) readDir(abs, providedPath string) (err error) { 205 206 // DirEntry with basename and modeType 207 var entries []fs.DirEntry 208 if entries, err = ReadDir(abs); err != nil { 209 return 210 } 211 212 // create entries for Next function 213 // - defers symlink resolution 214 var index, endIndex = len(t.dirEntries), len(t.dirEntries) + len(entries) 215 pslices.SetLength(&t.dirEntries, endIndex) 216 var dir = dirEntry{abs: abs, providedPath: providedPath} 217 for i, dirEntry := range entries { 218 dir.dirEntry = dirEntry 219 t.dirEntries[index+i] = dir 220 } 221 222 return 223 }