github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/cmd/ncdu/scan/scan.go (about)

     1  // Package scan does concurrent scanning of an Fs building up a directory tree.
     2  package scan
     3  
     4  import (
     5  	"context"
     6  	"fmt"
     7  	"path"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/rclone/rclone/fs"
    12  	"github.com/rclone/rclone/fs/walk"
    13  )
    14  
    15  // Dir represents a directory found in the remote
    16  type Dir struct {
    17  	parent            *Dir
    18  	path              string
    19  	mu                sync.Mutex
    20  	size              int64
    21  	count             int64
    22  	countUnknownSize  int64
    23  	entries           fs.DirEntries
    24  	dirs              map[string]*Dir
    25  	readError         error
    26  	entriesHaveErrors bool
    27  }
    28  
    29  // Attrs contains accumulated properties for a directory entry
    30  //
    31  // Files with unknown size are counted separately but also included
    32  // in the total count. They are not included in the size, i.e. treated
    33  // as empty files, which means the size may be underestimated.
    34  type Attrs struct {
    35  	ModTime           time.Time
    36  	Size              int64
    37  	Count             int64
    38  	CountUnknownSize  int64
    39  	IsDir             bool
    40  	Readable          bool
    41  	EntriesHaveErrors bool
    42  }
    43  
    44  // AverageSize calculates average size of files in directory
    45  //
    46  // If there are files with unknown size, this returns the average over
    47  // files with known sizes, which means it may be under- or
    48  // overestimated.
    49  func (a *Attrs) AverageSize() float64 {
    50  	countKnownSize := a.Count - a.CountUnknownSize
    51  	if countKnownSize > 0 {
    52  		return float64(a.Size) / float64(countKnownSize)
    53  	}
    54  	return 0
    55  }
    56  
    57  // Parent returns the directory above this one
    58  func (d *Dir) Parent() *Dir {
    59  	// no locking needed since these are write once in newDir()
    60  	return d.parent
    61  }
    62  
    63  // Path returns the position of the dir in the filesystem
    64  func (d *Dir) Path() string {
    65  	// no locking needed since these are write once in newDir()
    66  	return d.path
    67  }
    68  
    69  // make a new directory
    70  func newDir(parent *Dir, dirPath string, entries fs.DirEntries, err error) *Dir {
    71  	d := &Dir{
    72  		parent:    parent,
    73  		path:      dirPath,
    74  		entries:   entries,
    75  		dirs:      make(map[string]*Dir),
    76  		readError: err,
    77  	}
    78  	// Count size in this dir
    79  	for _, entry := range entries {
    80  		if o, ok := entry.(fs.Object); ok {
    81  			d.count++
    82  			size := o.Size()
    83  			if size < 0 {
    84  				// Some backends may return -1 because size of object is not known
    85  				d.countUnknownSize++
    86  			} else {
    87  				d.size += size
    88  			}
    89  		}
    90  	}
    91  	// Set my directory entry in parent
    92  	if parent != nil {
    93  		parent.mu.Lock()
    94  		leaf := path.Base(dirPath)
    95  		d.parent.dirs[leaf] = d
    96  		parent.mu.Unlock()
    97  	}
    98  	// Accumulate counts in parents
    99  	for ; parent != nil; parent = parent.parent {
   100  		parent.mu.Lock()
   101  		parent.size += d.size
   102  		parent.count += d.count
   103  		parent.countUnknownSize += d.countUnknownSize
   104  		if d.readError != nil {
   105  			parent.entriesHaveErrors = true
   106  		}
   107  		parent.mu.Unlock()
   108  	}
   109  	return d
   110  }
   111  
   112  // Entries returns a copy of the entries in the directory
   113  func (d *Dir) Entries() fs.DirEntries {
   114  	return append(fs.DirEntries(nil), d.entries...)
   115  }
   116  
   117  // Remove removes the i-th entry from the
   118  // in-memory representation of the remote directory
   119  func (d *Dir) Remove(i int) {
   120  	d.mu.Lock()
   121  	defer d.mu.Unlock()
   122  	d.remove(i)
   123  }
   124  
   125  // removes the i-th entry from the
   126  // in-memory representation of the remote directory
   127  //
   128  // Call with d.mu held
   129  func (d *Dir) remove(i int) {
   130  	size := d.entries[i].Size()
   131  	countUnknownSize := int64(0)
   132  	if size < 0 {
   133  		size = 0
   134  		countUnknownSize = 1
   135  	}
   136  	count := int64(1)
   137  
   138  	subDir, ok := d.getDir(i)
   139  	if ok {
   140  		size = subDir.size
   141  		count = subDir.count
   142  		countUnknownSize = subDir.countUnknownSize
   143  		delete(d.dirs, path.Base(subDir.path))
   144  	}
   145  
   146  	d.size -= size
   147  	d.count -= count
   148  	d.countUnknownSize -= countUnknownSize
   149  	d.entries = append(d.entries[:i], d.entries[i+1:]...)
   150  
   151  	dir := d
   152  	// populate changed size and count to parent(s)
   153  	for parent := d.parent; parent != nil; parent = parent.parent {
   154  		parent.mu.Lock()
   155  		parent.dirs[path.Base(dir.path)] = dir
   156  		parent.size -= size
   157  		parent.count -= count
   158  		parent.countUnknownSize -= countUnknownSize
   159  		dir = parent
   160  		parent.mu.Unlock()
   161  	}
   162  }
   163  
   164  // gets the directory of the i-th entry
   165  //
   166  // returns nil if it is a file
   167  // returns a flag as to whether is directory or not
   168  //
   169  // Call with d.mu held
   170  func (d *Dir) getDir(i int) (subDir *Dir, isDir bool) {
   171  	obj := d.entries[i]
   172  	dir, ok := obj.(fs.Directory)
   173  	if !ok {
   174  		return nil, false
   175  	}
   176  	leaf := path.Base(dir.Remote())
   177  	subDir = d.dirs[leaf]
   178  	return subDir, true
   179  }
   180  
   181  // GetDir returns the Dir of the i-th entry
   182  //
   183  // returns nil if it is a file
   184  // returns a flag as to whether is directory or not
   185  func (d *Dir) GetDir(i int) (subDir *Dir, isDir bool) {
   186  	d.mu.Lock()
   187  	defer d.mu.Unlock()
   188  	return d.getDir(i)
   189  }
   190  
   191  // Attr returns the size and count for the directory
   192  func (d *Dir) Attr() (size int64, count int64) {
   193  	d.mu.Lock()
   194  	defer d.mu.Unlock()
   195  	return d.size, d.count
   196  }
   197  
   198  // attrI returns the size, count and flags for the i-th directory entry
   199  func (d *Dir) attrI(i int) (attrs Attrs, err error) {
   200  	subDir, isDir := d.getDir(i)
   201  	if !isDir {
   202  		return Attrs{time.Time{}, d.entries[i].Size(), 0, 0, false, true, d.entriesHaveErrors}, d.readError
   203  	}
   204  	if subDir == nil {
   205  		return Attrs{time.Time{}, 0, 0, 0, true, false, false}, nil
   206  	}
   207  	size, count := subDir.Attr()
   208  	return Attrs{time.Time{}, size, count, subDir.countUnknownSize, true, true, subDir.entriesHaveErrors}, subDir.readError
   209  }
   210  
   211  // AttrI returns the size, count and flags for the i-th directory entry
   212  func (d *Dir) AttrI(i int) (attrs Attrs, err error) {
   213  	d.mu.Lock()
   214  	defer d.mu.Unlock()
   215  	return d.attrI(i)
   216  }
   217  
   218  // AttrWithModTimeI returns the modtime, size, count and flags for the i-th directory entry
   219  func (d *Dir) AttrWithModTimeI(ctx context.Context, i int) (attrs Attrs, err error) {
   220  	d.mu.Lock()
   221  	defer d.mu.Unlock()
   222  	attrs, err = d.attrI(i)
   223  	attrs.ModTime = d.entries[i].ModTime(ctx)
   224  	return
   225  }
   226  
   227  // Scan the Fs passed in, returning a root directory channel and an
   228  // error channel
   229  func Scan(ctx context.Context, f fs.Fs) (chan *Dir, chan error, chan struct{}) {
   230  	ci := fs.GetConfig(ctx)
   231  	root := make(chan *Dir, 1)
   232  	errChan := make(chan error, 1)
   233  	updated := make(chan struct{}, 1)
   234  	go func() {
   235  		parents := map[string]*Dir{}
   236  		err := walk.Walk(ctx, f, "", false, ci.MaxDepth, func(dirPath string, entries fs.DirEntries, err error) error {
   237  			var parent *Dir
   238  			if dirPath != "" {
   239  				parentPath := path.Dir(dirPath)
   240  				if parentPath == "." {
   241  					parentPath = ""
   242  				}
   243  				var ok bool
   244  				parent, ok = parents[parentPath]
   245  				if !ok {
   246  					errChan <- fmt.Errorf("couldn't find parent for %q", dirPath)
   247  				}
   248  			}
   249  			d := newDir(parent, dirPath, entries, err)
   250  			parents[dirPath] = d
   251  			if dirPath == "" {
   252  				root <- d
   253  			}
   254  			// Mark updated
   255  			select {
   256  			case updated <- struct{}{}:
   257  			default:
   258  				break
   259  			}
   260  			return nil
   261  		})
   262  		if err != nil {
   263  			errChan <- fmt.Errorf("ncdu listing failed: %w", err)
   264  		}
   265  		errChan <- nil
   266  	}()
   267  	return root, errChan, updated
   268  }