github.com/haraldrudell/parl@v0.4.176/pfs/traverser.go (about)

     1  /*
     2  © 2023–present Harald Rudell <harald.rudell@gmail.com> (https://haraldrudell.github.io/haraldrudell/)
     3  ISC License
     4  */
     5  
     6  // Package pfs provides a symlink-following file-systemtraverser and other file-system functions.
     7  package pfs
     8  
     9  import (
    10  	"io/fs"
    11  	"path/filepath"
    12  	"strings"
    13  	"sync/atomic"
    14  
    15  	"github.com/haraldrudell/parl/perrors"
    16  	"github.com/haraldrudell/parl/pslices"
    17  )
    18  
    19  const (
    20  	// platform path separator as a string
    21  	sSep = string(filepath.Separator)
    22  	// [os.File.ReadDir] get all names
    23  	allNamesAtOnce = -1
    24  )
    25  
    26  // Traverser represents a file system that is scanned following symlinks
    27  //   - each file system entry is returned exactly once except:
    28  //   - — error reading a directory returns the directory a second time
    29  //   - directories and symlinks are returned before they are read so that they can be
    30  //     more efficiently skipped by invoking [ResultEntry.Skip]
    31  //   - directory entries are returned in 8-bit character order
    32  //   - returned entries may not exist, such entries have [ResultEntry.Err] non-nil
    33  //   - result.ProvidedPath is based on the initial path and may be relative,
    34  //     unclean and contain symlinks
    35  //   - if [ResultEntry.Err] is nil, Abs is absolute, symlink-free clean path
    36  //   - —
    37  //   - ResultEntry.DirEntry.Info typically invokes [os.Lstat] every time,
    38  //     so this value should be cached
    39  //   - because symlinks may point to parents or separate trees,
    40  //     the file system scan may involve multiple roots which may
    41  //     affect the order of return entries
    42  //   - symlinks are followed and not returned.
    43  //     Therefore, a symlink pointing to a scanned location is effectively ignored
    44  //   - the returned struct is by value. If its address is not taken,
    45  //     no allocation will occur
    46  type Traverser struct {
    47  	// path provided to new-function for the initial root
    48  	initialPath string
    49  	// skipNo provides a serial number for returned directories
    50  	skipNo atomic.Uint64
    51  	// skippables holds pending skippables
    52  	skippables []ResultEntry
    53  	// collection of skippables marked to be skipped
    54  	skipMap map[uint64]struct{}
    55  	// basenames from read directories to be processed
    56  	dirEntries []dirEntry
    57  	// index in rootsRegistry being traversed
    58  	rootIndex int
    59  	// registry of the absolute paths for each encountered root
    60  	//	- key: absolute, symlink-free, clean path
    61  	rootsRegistry Registry[Root2]
    62  	// obsoleteRoots were obsoleted by a symlink pointing to
    63  	//		a parent directory
    64  	//	- these roots will be encountered during traversal
    65  	obsoleteRoots Registry[Root2]
    66  }
    67  
    68  // dirEntry is a value-container for a read directory entry
    69  //   - [os.File.ReadDir] returns dirEntry with deferred [fs.FileInfo]
    70  type dirEntry struct {
    71  	abs, providedPath string
    72  	dirEntry          fs.DirEntry
    73  }
    74  
    75  // NewTraverser returns a file-system traverser
    76  //   - typically used via [pfs.Iterator] or [pfs.DirIterator]
    77  //   - path is the initial path.
    78  //     Path may be relative or absolute, contain symlinks and be unclean.
    79  //     Path may be of any modeType: file, directory or special file.
    80  //     Empty string means process’ current directory
    81  //   - the Next method is used to obtain file-system entries and errors
    82  //   - consider using pfs iterators:
    83  //   - — [Iterator] for all entries and errors
    84  //   - — [DirIterator] for error-free directories
    85  //
    86  // Usage:
    87  //
    88  //	var traverser = pfs.NewTraverser(path)
    89  //	for {
    90  //	  var result = traverser.Next()
    91  //	  if result.IsEnd() || result.Err != nil {
    92  //	    break
    93  //	  }
    94  //	  println(result.Abs)
    95  //	}
    96  func NewTraverser(path string) (traverser *Traverser) {
    97  	return &Traverser{
    98  		initialPath:   path,
    99  		skipMap:       make(map[uint64]struct{}),
   100  		rootsRegistry: *NewRegistry[Root2](),
   101  		obsoleteRoots: *NewRegistry[Root2](),
   102  	}
   103  }
   104  
   105  // skip marks no for skipping
   106  func (t *Traverser) skip(no uint64) { t.skipMap[no] = struct{}{} }
   107  
   108  // skipCheck returns true if no is marked for skipping
   109  func (t *Traverser) skipCheck(no uint64) (skip bool) {
   110  	if _, skip = t.skipMap[no]; !skip {
   111  		return
   112  	}
   113  	delete(t.skipMap, no)
   114  
   115  	return
   116  }
   117  
   118  // createInitialRoot returns the createInitialRoot entry and creates and registers its root
   119  //   - entry is non-nil and may be symbolic link
   120  //   - entry has ProvidedPath and DirEntry
   121  //   - if entry.Err is nil, Abs and Name/IsDir/Type/Info are available
   122  func (t *Traverser) createInitialRoot() (entry ResultEntry) {
   123  
   124  	// create a root for path provided to NewTree2
   125  	var root = NewRoot2(t.initialPath)
   126  	t.initialPath = ""
   127  
   128  	// load absolute, symlink-free, clean path
   129  	//	- errors if [os.Getwd] or [os.Readlink] fails
   130  	if entry.Err = root.Load(); entry.Err == nil {
   131  		// the root is usable
   132  		t.rootsRegistry.Add(root.Abs, root)
   133  	}
   134  
   135  	var err error
   136  	// modeType is required to examine the entry
   137  	//	- it is not available, so [os.Lstat] and [os.Stat] must be invoked
   138  	//	- start with Lstat to see if it is a symlink
   139  	entry.ProvidedPath = root.ProvidedPath
   140  	entry.Abs = root.Abs
   141  	if entry.Abs != "" {
   142  		entry.DirEntry, entry.Err = AddDirEntry(entry.Abs)
   143  	} else {
   144  		// provide best-effort DirEntry
   145  		if entry.DirEntry, err = AddDirEntry(entry.ProvidedPath); err != nil {
   146  			entry.Err = perrors.AppendError(entry.Err, err)
   147  		}
   148  	}
   149  
   150  	// if Lstat failed, use a deferred-error dirEntry
   151  	if entry.DirEntry == nil {
   152  		entry.DirEntry = NewDeferringDirEntry(entry.ProvidedPath)
   153  	}
   154  
   155  	return
   156  }
   157  
   158  // processSymlink checks for new or obsoleted roots from a symlink
   159  func (t *Traverser) processSymlink(absTarget string) {
   160  
   161  	// check for exact match to existing root
   162  	if t.rootsRegistry.HasAbs(absTarget) {
   163  		return // symlink matches existing root: ignore it
   164  	}
   165  
   166  	// match absTarget against existing roots
   167  	var length = t.rootsRegistry.ListLength()
   168  	for i := 0; i < length; i++ {
   169  
   170  		// iterate over roots
   171  		var root = t.rootsRegistry.GetValue(i)
   172  		if root == nil {
   173  			continue // a discarded root
   174  		}
   175  		var rootAbs = root.Abs + sSep
   176  		var targetAbs = absTarget + sSep
   177  
   178  		// if absTarget is a subdirectory of an existing root, it can be ignored
   179  		if strings.HasPrefix(targetAbs, rootAbs) {
   180  			return // symlink is a sub-entry of an existing root: ignore it
   181  		}
   182  
   183  		// if root is not a subdirectory of absTarget, check the next root
   184  		if !strings.HasPrefix(rootAbs, targetAbs) {
   185  			continue
   186  		}
   187  
   188  		// root is a subdirectory of this symlink, obsolete the root
   189  		if i <= t.rootIndex {
   190  			// the obsolete root was already being traversed
   191  			//	- save it
   192  			t.obsoleteRoots.Add(root.Abs, root)
   193  		}
   194  		t.rootsRegistry.ObsoleteIndex(i)
   195  	}
   196  	// the symlink is disparate from all existing roots
   197  
   198  	// scan as new root
   199  	var root = NewAbsRoot2(absTarget)
   200  	t.rootsRegistry.Add(absTarget, root)
   201  }
   202  
   203  // readDir reads a directory and adds entries to t.dirEntries
   204  func (t *Traverser) readDir(abs, providedPath string) (err error) {
   205  
   206  	// DirEntry with basename and modeType
   207  	var entries []fs.DirEntry
   208  	if entries, err = ReadDir(abs); err != nil {
   209  		return
   210  	}
   211  
   212  	// create entries for Next function
   213  	//	- defers symlink resolution
   214  	var index, endIndex = len(t.dirEntries), len(t.dirEntries) + len(entries)
   215  	pslices.SetLength(&t.dirEntries, endIndex)
   216  	var dir = dirEntry{abs: abs, providedPath: providedPath}
   217  	for i, dirEntry := range entries {
   218  		dir.dirEntry = dirEntry
   219  		t.dirEntries[index+i] = dir
   220  	}
   221  
   222  	return
   223  }