github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/cmd/ncdu/scan/scan.go (about) 1 // Package scan does concurrent scanning of an Fs building up a directory tree. 2 package scan 3 4 import ( 5 "context" 6 "fmt" 7 "path" 8 "sync" 9 "time" 10 11 "github.com/rclone/rclone/fs" 12 "github.com/rclone/rclone/fs/walk" 13 ) 14 15 // Dir represents a directory found in the remote 16 type Dir struct { 17 parent *Dir 18 path string 19 mu sync.Mutex 20 size int64 21 count int64 22 countUnknownSize int64 23 entries fs.DirEntries 24 dirs map[string]*Dir 25 readError error 26 entriesHaveErrors bool 27 } 28 29 // Attrs contains accumulated properties for a directory entry 30 // 31 // Files with unknown size are counted separately but also included 32 // in the total count. They are not included in the size, i.e. treated 33 // as empty files, which means the size may be underestimated. 34 type Attrs struct { 35 ModTime time.Time 36 Size int64 37 Count int64 38 CountUnknownSize int64 39 IsDir bool 40 Readable bool 41 EntriesHaveErrors bool 42 } 43 44 // AverageSize calculates average size of files in directory 45 // 46 // If there are files with unknown size, this returns the average over 47 // files with known sizes, which means it may be under- or 48 // overestimated. 49 func (a *Attrs) AverageSize() float64 { 50 countKnownSize := a.Count - a.CountUnknownSize 51 if countKnownSize > 0 { 52 return float64(a.Size) / float64(countKnownSize) 53 } 54 return 0 55 } 56 57 // Parent returns the directory above this one 58 func (d *Dir) Parent() *Dir { 59 // no locking needed since these are write once in newDir() 60 return d.parent 61 } 62 63 // Path returns the position of the dir in the filesystem 64 func (d *Dir) Path() string { 65 // no locking needed since these are write once in newDir() 66 return d.path 67 } 68 69 // make a new directory 70 func newDir(parent *Dir, dirPath string, entries fs.DirEntries, err error) *Dir { 71 d := &Dir{ 72 parent: parent, 73 path: dirPath, 74 entries: entries, 75 dirs: make(map[string]*Dir), 76 readError: err, 77 } 78 // Count size in this dir 79 for _, entry := range entries { 80 if o, ok := entry.(fs.Object); ok { 81 d.count++ 82 size := o.Size() 83 if size < 0 { 84 // Some backends may return -1 because size of object is not known 85 d.countUnknownSize++ 86 } else { 87 d.size += size 88 } 89 } 90 } 91 // Set my directory entry in parent 92 if parent != nil { 93 parent.mu.Lock() 94 leaf := path.Base(dirPath) 95 d.parent.dirs[leaf] = d 96 parent.mu.Unlock() 97 } 98 // Accumulate counts in parents 99 for ; parent != nil; parent = parent.parent { 100 parent.mu.Lock() 101 parent.size += d.size 102 parent.count += d.count 103 parent.countUnknownSize += d.countUnknownSize 104 if d.readError != nil { 105 parent.entriesHaveErrors = true 106 } 107 parent.mu.Unlock() 108 } 109 return d 110 } 111 112 // Entries returns a copy of the entries in the directory 113 func (d *Dir) Entries() fs.DirEntries { 114 return append(fs.DirEntries(nil), d.entries...) 115 } 116 117 // Remove removes the i-th entry from the 118 // in-memory representation of the remote directory 119 func (d *Dir) Remove(i int) { 120 d.mu.Lock() 121 defer d.mu.Unlock() 122 d.remove(i) 123 } 124 125 // removes the i-th entry from the 126 // in-memory representation of the remote directory 127 // 128 // Call with d.mu held 129 func (d *Dir) remove(i int) { 130 size := d.entries[i].Size() 131 countUnknownSize := int64(0) 132 if size < 0 { 133 size = 0 134 countUnknownSize = 1 135 } 136 count := int64(1) 137 138 subDir, ok := d.getDir(i) 139 if ok { 140 size = subDir.size 141 count = subDir.count 142 countUnknownSize = subDir.countUnknownSize 143 delete(d.dirs, path.Base(subDir.path)) 144 } 145 146 d.size -= size 147 d.count -= count 148 d.countUnknownSize -= countUnknownSize 149 d.entries = append(d.entries[:i], d.entries[i+1:]...) 150 151 dir := d 152 // populate changed size and count to parent(s) 153 for parent := d.parent; parent != nil; parent = parent.parent { 154 parent.mu.Lock() 155 parent.dirs[path.Base(dir.path)] = dir 156 parent.size -= size 157 parent.count -= count 158 parent.countUnknownSize -= countUnknownSize 159 dir = parent 160 parent.mu.Unlock() 161 } 162 } 163 164 // gets the directory of the i-th entry 165 // 166 // returns nil if it is a file 167 // returns a flag as to whether is directory or not 168 // 169 // Call with d.mu held 170 func (d *Dir) getDir(i int) (subDir *Dir, isDir bool) { 171 obj := d.entries[i] 172 dir, ok := obj.(fs.Directory) 173 if !ok { 174 return nil, false 175 } 176 leaf := path.Base(dir.Remote()) 177 subDir = d.dirs[leaf] 178 return subDir, true 179 } 180 181 // GetDir returns the Dir of the i-th entry 182 // 183 // returns nil if it is a file 184 // returns a flag as to whether is directory or not 185 func (d *Dir) GetDir(i int) (subDir *Dir, isDir bool) { 186 d.mu.Lock() 187 defer d.mu.Unlock() 188 return d.getDir(i) 189 } 190 191 // Attr returns the size and count for the directory 192 func (d *Dir) Attr() (size int64, count int64) { 193 d.mu.Lock() 194 defer d.mu.Unlock() 195 return d.size, d.count 196 } 197 198 // attrI returns the size, count and flags for the i-th directory entry 199 func (d *Dir) attrI(i int) (attrs Attrs, err error) { 200 subDir, isDir := d.getDir(i) 201 if !isDir { 202 return Attrs{time.Time{}, d.entries[i].Size(), 0, 0, false, true, d.entriesHaveErrors}, d.readError 203 } 204 if subDir == nil { 205 return Attrs{time.Time{}, 0, 0, 0, true, false, false}, nil 206 } 207 size, count := subDir.Attr() 208 return Attrs{time.Time{}, size, count, subDir.countUnknownSize, true, true, subDir.entriesHaveErrors}, subDir.readError 209 } 210 211 // AttrI returns the size, count and flags for the i-th directory entry 212 func (d *Dir) AttrI(i int) (attrs Attrs, err error) { 213 d.mu.Lock() 214 defer d.mu.Unlock() 215 return d.attrI(i) 216 } 217 218 // AttrWithModTimeI returns the modtime, size, count and flags for the i-th directory entry 219 func (d *Dir) AttrWithModTimeI(ctx context.Context, i int) (attrs Attrs, err error) { 220 d.mu.Lock() 221 defer d.mu.Unlock() 222 attrs, err = d.attrI(i) 223 attrs.ModTime = d.entries[i].ModTime(ctx) 224 return 225 } 226 227 // Scan the Fs passed in, returning a root directory channel and an 228 // error channel 229 func Scan(ctx context.Context, f fs.Fs) (chan *Dir, chan error, chan struct{}) { 230 ci := fs.GetConfig(ctx) 231 root := make(chan *Dir, 1) 232 errChan := make(chan error, 1) 233 updated := make(chan struct{}, 1) 234 go func() { 235 parents := map[string]*Dir{} 236 err := walk.Walk(ctx, f, "", false, ci.MaxDepth, func(dirPath string, entries fs.DirEntries, err error) error { 237 var parent *Dir 238 if dirPath != "" { 239 parentPath := path.Dir(dirPath) 240 if parentPath == "." { 241 parentPath = "" 242 } 243 var ok bool 244 parent, ok = parents[parentPath] 245 if !ok { 246 errChan <- fmt.Errorf("couldn't find parent for %q", dirPath) 247 } 248 } 249 d := newDir(parent, dirPath, entries, err) 250 parents[dirPath] = d 251 if dirPath == "" { 252 root <- d 253 } 254 // Mark updated 255 select { 256 case updated <- struct{}{}: 257 default: 258 break 259 } 260 return nil 261 }) 262 if err != nil { 263 errChan <- fmt.Errorf("ncdu listing failed: %w", err) 264 } 265 errChan <- nil 266 }() 267 return root, errChan, updated 268 }