github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/lib/dircache/dircache.go (about)

     1  // Package dircache provides a simple cache for caching directory ID
     2  // to path lookups and the inverse.
     3  package dircache
     4  
     5  // _methods are called without the lock
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"errors"
    11  	"fmt"
    12  	"path"
    13  	"strings"
    14  	"sync"
    15  
    16  	"github.com/rclone/rclone/fs"
    17  )
    18  
    19  // DirCache caches paths to directory IDs and vice versa
    20  type DirCache struct {
    21  	cacheMu  sync.RWMutex // protects cache and invCache
    22  	cache    map[string]string
    23  	invCache map[string]string
    24  
    25  	mu           sync.Mutex // protects the below
    26  	fs           DirCacher  // Interface to find and make directories
    27  	trueRootID   string     // ID of the absolute root
    28  	root         string     // the path the cache is rooted on
    29  	rootID       string     // ID of the root directory
    30  	rootParentID string     // ID of the root's parent directory
    31  	foundRoot    bool       // Whether we have found the root or not
    32  }
    33  
    34  // DirCacher describes an interface for doing the low level directory work
    35  //
    36  // This should be implemented by the backend and will be called by the
    37  // dircache package when appropriate.
    38  type DirCacher interface {
    39  	FindLeaf(ctx context.Context, pathID, leaf string) (pathIDOut string, found bool, err error)
    40  	CreateDir(ctx context.Context, pathID, leaf string) (newID string, err error)
    41  }
    42  
    43  // New makes a DirCache
    44  //
    45  // This is created with the true root ID and the root path.
    46  //
    47  // In order to use the cache FindRoot() must be called on it without
    48  // error. This isn't done at initialization as it isn't known whether
    49  // the root and intermediate directories need to be created or not.
    50  //
    51  // Most of the utility functions will call FindRoot() on the caller's
    52  // behalf with the create flag passed in.
    53  //
    54  // The cache is safe for concurrent use
    55  func New(root string, trueRootID string, fs DirCacher) *DirCache {
    56  	d := &DirCache{
    57  		trueRootID: trueRootID,
    58  		root:       root,
    59  		fs:         fs,
    60  	}
    61  	d.Flush()
    62  	d.ResetRoot()
    63  	return d
    64  }
    65  
    66  // String returns the directory cache in string form for debugging
    67  func (dc *DirCache) String() string {
    68  	dc.cacheMu.RLock()
    69  	defer dc.cacheMu.RUnlock()
    70  	var buf bytes.Buffer
    71  	_, _ = buf.WriteString("DirCache{\n")
    72  	_, _ = fmt.Fprintf(&buf, "\ttrueRootID: %q,\n", dc.trueRootID)
    73  	_, _ = fmt.Fprintf(&buf, "\troot: %q,\n", dc.root)
    74  	_, _ = fmt.Fprintf(&buf, "\trootID: %q,\n", dc.rootID)
    75  	_, _ = fmt.Fprintf(&buf, "\trootParentID: %q,\n", dc.rootParentID)
    76  	_, _ = fmt.Fprintf(&buf, "\tfoundRoot: %v,\n", dc.foundRoot)
    77  	_, _ = buf.WriteString("\tcache: {\n")
    78  	for k, v := range dc.cache {
    79  		_, _ = fmt.Fprintf(&buf, "\t\t%q: %q,\n", k, v)
    80  	}
    81  	_, _ = buf.WriteString("\t},\n")
    82  	_, _ = buf.WriteString("\tinvCache: {\n")
    83  	for k, v := range dc.invCache {
    84  		_, _ = fmt.Fprintf(&buf, "\t\t%q: %q,\n", k, v)
    85  	}
    86  	_, _ = buf.WriteString("\t},\n")
    87  	_, _ = buf.WriteString("}\n")
    88  	return buf.String()
    89  }
    90  
    91  // Get a directory ID given a path
    92  //
    93  // Returns the ID and a boolean as to whether it was found or not in
    94  // the cache.
    95  func (dc *DirCache) Get(path string) (id string, ok bool) {
    96  	dc.cacheMu.RLock()
    97  	id, ok = dc.cache[path]
    98  	dc.cacheMu.RUnlock()
    99  	return id, ok
   100  }
   101  
   102  // GetInv gets a path given a directory ID
   103  //
   104  // Returns the path and a boolean as to whether it was found or not in
   105  // the cache.
   106  func (dc *DirCache) GetInv(id string) (path string, ok bool) {
   107  	dc.cacheMu.RLock()
   108  	path, ok = dc.invCache[id]
   109  	dc.cacheMu.RUnlock()
   110  	return path, ok
   111  }
   112  
   113  // Put a (path, directory ID) pair into the cache
   114  func (dc *DirCache) Put(path, id string) {
   115  	dc.cacheMu.Lock()
   116  	dc.cache[path] = id
   117  	dc.invCache[id] = path
   118  	dc.cacheMu.Unlock()
   119  }
   120  
   121  // Flush the cache of all data
   122  func (dc *DirCache) Flush() {
   123  	dc.cacheMu.Lock()
   124  	dc.cache = make(map[string]string)
   125  	dc.invCache = make(map[string]string)
   126  	dc.cacheMu.Unlock()
   127  }
   128  
   129  // SetRootIDAlias sets the rootID to that passed in. This assumes that
   130  // the new ID is just an alias for the old ID so does not flush
   131  // anything.
   132  //
   133  // This should be called from FindLeaf (and only from FindLeaf) if it
   134  // is discovered that the root ID is incorrect. For example some
   135  // backends use "0" as a root ID, but it has a real ID which is needed
   136  // for some operations.
   137  func (dc *DirCache) SetRootIDAlias(rootID string) {
   138  	// No locking as this is called from FindLeaf
   139  	dc.rootID = rootID
   140  	dc.Put("", dc.rootID)
   141  }
   142  
   143  // FlushDir flushes the map of all data starting with the path
   144  // dir.
   145  //
   146  // If dir is empty string then this is equivalent to calling ResetRoot
   147  func (dc *DirCache) FlushDir(dir string) {
   148  	if dir == "" {
   149  		dc.ResetRoot()
   150  		return
   151  	}
   152  	dc.cacheMu.Lock()
   153  
   154  	// Delete the root dir
   155  	ID, ok := dc.cache[dir]
   156  	if ok {
   157  		delete(dc.cache, dir)
   158  		delete(dc.invCache, ID)
   159  	}
   160  
   161  	// And any sub directories
   162  	dir += "/"
   163  	for key, ID := range dc.cache {
   164  		if strings.HasPrefix(key, dir) {
   165  			delete(dc.cache, key)
   166  			delete(dc.invCache, ID)
   167  		}
   168  	}
   169  
   170  	dc.cacheMu.Unlock()
   171  }
   172  
   173  // SplitPath splits a path into directory, leaf
   174  //
   175  // Path shouldn't start or end with a /
   176  //
   177  // If there are no slashes then directory will be "" and leaf = path
   178  func SplitPath(path string) (directory, leaf string) {
   179  	lastSlash := strings.LastIndex(path, "/")
   180  	if lastSlash >= 0 {
   181  		directory = path[:lastSlash]
   182  		leaf = path[lastSlash+1:]
   183  	} else {
   184  		directory = ""
   185  		leaf = path
   186  	}
   187  	return
   188  }
   189  
   190  // FindDir finds the directory passed in returning the directory ID
   191  // starting from pathID
   192  //
   193  // Path shouldn't start or end with a /
   194  //
   195  // If create is set it will make the directory if not found.
   196  //
   197  // It will call FindRoot if it hasn't been called already
   198  func (dc *DirCache) FindDir(ctx context.Context, path string, create bool) (pathID string, err error) {
   199  	dc.mu.Lock()
   200  	defer dc.mu.Unlock()
   201  	err = dc._findRoot(ctx, create)
   202  	if err != nil {
   203  		return "", err
   204  	}
   205  	return dc._findDir(ctx, path, create)
   206  }
   207  
   208  // Unlocked findDir
   209  //
   210  // Call with a lock on mu
   211  func (dc *DirCache) _findDir(ctx context.Context, path string, create bool) (pathID string, err error) {
   212  	// If it is the root, then return it
   213  	if path == "" {
   214  		return dc.rootID, nil
   215  	}
   216  
   217  	// If it is in the cache then return it
   218  	pathID, ok := dc.Get(path)
   219  	if ok {
   220  		return pathID, nil
   221  	}
   222  
   223  	// Split the path into directory, leaf
   224  	directory, leaf := SplitPath(path)
   225  
   226  	// Recurse and find pathID for parent directory
   227  	parentPathID, err := dc._findDir(ctx, directory, create)
   228  	if err != nil {
   229  		return "", err
   230  
   231  	}
   232  
   233  	// Find the leaf in parentPathID
   234  	pathID, found, err := dc.fs.FindLeaf(ctx, parentPathID, leaf)
   235  	if err != nil {
   236  		return "", err
   237  	}
   238  
   239  	// If not found create the directory if required or return an error
   240  	if !found {
   241  		if create {
   242  			pathID, err = dc.fs.CreateDir(ctx, parentPathID, leaf)
   243  			if err != nil {
   244  				return "", fmt.Errorf("failed to make directory: %w", err)
   245  			}
   246  		} else {
   247  			return "", fs.ErrorDirNotFound
   248  		}
   249  	}
   250  
   251  	// Store the leaf directory in the cache
   252  	dc.Put(path, pathID)
   253  
   254  	// fmt.Println("Dir", path, "is", pathID)
   255  	return pathID, nil
   256  }
   257  
   258  // FindPath finds the leaf and directoryID from a path
   259  //
   260  // If called with path == "" then it will return the ID of the parent
   261  // directory of the root and the leaf name of the root in that
   262  // directory. Note that it won't create the root directory in this
   263  // case even if create is true.
   264  //
   265  // If create is set parent directories will be created if they don't exist
   266  //
   267  // It will call FindRoot if it hasn't been called already
   268  func (dc *DirCache) FindPath(ctx context.Context, path string, create bool) (leaf, directoryID string, err error) {
   269  	if path == "" {
   270  		_, leaf = SplitPath(dc.root)
   271  		directoryID, err = dc.RootParentID(ctx, create)
   272  	} else {
   273  		var directory string
   274  		directory, leaf = SplitPath(path)
   275  		directoryID, err = dc.FindDir(ctx, directory, create)
   276  	}
   277  	return leaf, directoryID, err
   278  }
   279  
   280  // FindRoot finds the root directory if not already found
   281  //
   282  // If successful this changes the root of the cache from the true root
   283  // to the root specified by the path passed into New.
   284  //
   285  // Resets the root directory.
   286  //
   287  // If create is set it will make the directory if not found
   288  func (dc *DirCache) FindRoot(ctx context.Context, create bool) error {
   289  	dc.mu.Lock()
   290  	defer dc.mu.Unlock()
   291  	return dc._findRoot(ctx, create)
   292  }
   293  
   294  // _findRoot finds the root directory if not already found
   295  //
   296  // Resets the root directory.
   297  //
   298  // If create is set it will make the directory if not found.
   299  //
   300  // Call with mu held
   301  func (dc *DirCache) _findRoot(ctx context.Context, create bool) error {
   302  	if dc.foundRoot {
   303  		return nil
   304  	}
   305  	rootID, err := dc._findDir(ctx, dc.root, create)
   306  	if err != nil {
   307  		return err
   308  	}
   309  	dc.foundRoot = true
   310  	dc.rootID = rootID
   311  
   312  	// Find the parent of the root while we still have the root
   313  	// directory tree cached
   314  	rootParentPath, _ := SplitPath(dc.root)
   315  	dc.rootParentID, _ = dc.Get(rootParentPath)
   316  
   317  	// Reset the tree based on dc.root
   318  	dc.Flush()
   319  	// Put the root directory in
   320  	dc.Put("", dc.rootID)
   321  	return nil
   322  }
   323  
   324  // FoundRoot returns whether the root directory has been found yet
   325  func (dc *DirCache) FoundRoot() bool {
   326  	dc.mu.Lock()
   327  	defer dc.mu.Unlock()
   328  	return dc.foundRoot
   329  }
   330  
   331  // RootID returns the ID of the root directory
   332  //
   333  // If create is set it will make the root directory if not found
   334  func (dc *DirCache) RootID(ctx context.Context, create bool) (ID string, err error) {
   335  	dc.mu.Lock()
   336  	defer dc.mu.Unlock()
   337  	err = dc._findRoot(ctx, create)
   338  	if err != nil {
   339  		return "", err
   340  	}
   341  	return dc.rootID, nil
   342  }
   343  
   344  // RootParentID returns the ID of the parent of the root directory
   345  //
   346  // If create is set it will make the root parent directory if not found (but not the root)
   347  func (dc *DirCache) RootParentID(ctx context.Context, create bool) (ID string, err error) {
   348  	dc.mu.Lock()
   349  	defer dc.mu.Unlock()
   350  	if !dc.foundRoot {
   351  		if dc.root == "" {
   352  			return "", errors.New("is root directory")
   353  		}
   354  		// Find the rootParentID without creating the root
   355  		rootParent, _ := SplitPath(dc.root)
   356  		rootParentID, err := dc._findDir(ctx, rootParent, create)
   357  		if err != nil {
   358  			return "", err
   359  		}
   360  		dc.rootParentID = rootParentID
   361  	} else {
   362  		if dc.rootID == dc.trueRootID {
   363  			return "", errors.New("is root directory")
   364  		}
   365  	}
   366  	if dc.rootParentID == "" {
   367  		return "", errors.New("internal error: didn't find rootParentID")
   368  	}
   369  	return dc.rootParentID, nil
   370  }
   371  
   372  // ResetRoot resets the root directory to the absolute root and clears
   373  // the DirCache
   374  func (dc *DirCache) ResetRoot() {
   375  	dc.mu.Lock()
   376  	defer dc.mu.Unlock()
   377  	dc.foundRoot = false
   378  	dc.Flush()
   379  
   380  	// Put the true root in
   381  	dc.rootID = dc.trueRootID
   382  
   383  	// Put the root directory in
   384  	dc.Put("", dc.rootID)
   385  }
   386  
   387  // DirMove prepares to move the directory (srcDC, srcRoot, srcRemote)
   388  // into the directory (dc, dstRoot, dstRemote)
   389  //
   390  // It does all the checking, creates intermediate directories and
   391  // returns leafs and IDs ready for the move.
   392  //
   393  // This returns:
   394  //
   395  // - srcID - ID of the source directory
   396  // - srcDirectoryID - ID of the parent of the source directory
   397  // - srcLeaf - leaf name of the source directory
   398  // - dstDirectoryID - ID of the parent of the destination directory
   399  // - dstLeaf - leaf name of the destination directory
   400  //
   401  // These should be used to do the actual move then
   402  // srcDC.FlushDir(srcRemote) should be called.
   403  func (dc *DirCache) DirMove(
   404  	ctx context.Context, srcDC *DirCache, srcRoot, srcRemote, dstRoot, dstRemote string) (srcID, srcDirectoryID, srcLeaf, dstDirectoryID, dstLeaf string, err error) {
   405  	var (
   406  		dstDC   = dc
   407  		srcPath = path.Join(srcRoot, srcRemote)
   408  		dstPath = path.Join(dstRoot, dstRemote)
   409  	)
   410  
   411  	// Refuse to move to or from the root
   412  	if srcPath == "" || dstPath == "" {
   413  		// fs.Debugf(src, "DirMove error: Can't move root")
   414  		err = errors.New("can't move root directory")
   415  		return
   416  	}
   417  
   418  	// Find ID of dst parent, creating subdirs if necessary
   419  	dstLeaf, dstDirectoryID, err = dstDC.FindPath(ctx, dstRemote, true)
   420  	if err != nil {
   421  		return
   422  	}
   423  
   424  	// Check destination does not exist
   425  	_, err = dstDC.FindDir(ctx, dstRemote, false)
   426  	if err == fs.ErrorDirNotFound {
   427  		// OK
   428  	} else if err != nil {
   429  		return
   430  	} else {
   431  		err = fs.ErrorDirExists
   432  		return
   433  	}
   434  
   435  	// Find ID of src parent
   436  	srcLeaf, srcDirectoryID, err = srcDC.FindPath(ctx, srcRemote, false)
   437  	if err != nil {
   438  		return
   439  	}
   440  
   441  	// Find ID of src
   442  	srcID, err = srcDC.FindDir(ctx, srcRemote, false)
   443  	if err != nil {
   444  		return
   445  	}
   446  
   447  	return
   448  }