github.com/artpar/rclone@v1.67.3/backend/hasher/hasher.go (about)

     1  // Package hasher implements a checksum handling overlay backend
     2  package hasher
     3  
     4  import (
     5  	"context"
     6  	"encoding/gob"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"path"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/artpar/rclone/fs"
    16  	"github.com/artpar/rclone/fs/cache"
    17  	"github.com/artpar/rclone/fs/config/configmap"
    18  	"github.com/artpar/rclone/fs/config/configstruct"
    19  	"github.com/artpar/rclone/fs/fspath"
    20  	"github.com/artpar/rclone/fs/hash"
    21  	"github.com/artpar/rclone/lib/kv"
    22  )
    23  
    24  // Register with Fs
    25  func init() {
    26  	fs.Register(&fs.RegInfo{
    27  		Name:        "hasher",
    28  		Description: "Better checksums for other remotes",
    29  		NewFs:       NewFs,
    30  		MetadataInfo: &fs.MetadataInfo{
    31  			Help: `Any metadata supported by the underlying remote is read and written.`,
    32  		},
    33  		CommandHelp: commandHelp,
    34  		Options: []fs.Option{{
    35  			Name:     "remote",
    36  			Required: true,
    37  			Help:     "Remote to cache checksums for (e.g. myRemote:path).",
    38  		}, {
    39  			Name:     "hashes",
    40  			Default:  fs.CommaSepList{"md5", "sha1"},
    41  			Advanced: false,
    42  			Help:     "Comma separated list of supported checksum types.",
    43  		}, {
    44  			Name:     "max_age",
    45  			Advanced: false,
    46  			Default:  fs.DurationOff,
    47  			Help:     "Maximum time to keep checksums in cache (0 = no cache, off = cache forever).",
    48  		}, {
    49  			Name:     "auto_size",
    50  			Advanced: true,
    51  			Default:  fs.SizeSuffix(0),
    52  			Help:     "Auto-update checksum for files smaller than this size (disabled by default).",
    53  		}},
    54  	})
    55  }
    56  
    57  // Options defines the configuration for this backend
    58  type Options struct {
    59  	Remote   string          `config:"remote"`
    60  	Hashes   fs.CommaSepList `config:"hashes"`
    61  	AutoSize fs.SizeSuffix   `config:"auto_size"`
    62  	MaxAge   fs.Duration     `config:"max_age"`
    63  }
    64  
    65  // Fs represents a wrapped fs.Fs
    66  type Fs struct {
    67  	fs.Fs
    68  	name     string
    69  	root     string
    70  	wrapper  fs.Fs
    71  	features *fs.Features
    72  	opt      *Options
    73  	db       *kv.DB
    74  	// fingerprinting
    75  	fpTime bool      // true if using time in fingerprints
    76  	fpHash hash.Type // hash type to use in fingerprints or None
    77  	// hash types triaged by groups
    78  	suppHashes hash.Set // all supported checksum types
    79  	passHashes hash.Set // passed directly to the base without caching
    80  	slowHashes hash.Set // passed to the base and then cached
    81  	autoHashes hash.Set // calculated in-house and cached
    82  	keepHashes hash.Set // checksums to keep in cache (slow + auto)
    83  }
    84  
    85  var warnExperimental sync.Once
    86  
    87  // NewFs constructs an Fs from the remote:path string
    88  func NewFs(ctx context.Context, fsname, rpath string, cmap configmap.Mapper) (fs.Fs, error) {
    89  	if !kv.Supported() {
    90  		return nil, errors.New("hasher is not supported on this OS")
    91  	}
    92  	warnExperimental.Do(func() {
    93  		fs.Infof(nil, "Hasher is EXPERIMENTAL!")
    94  	})
    95  
    96  	opt := &Options{}
    97  	err := configstruct.Set(cmap, opt)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	if strings.HasPrefix(opt.Remote, fsname+":") {
   103  		return nil, errors.New("can't point remote at itself")
   104  	}
   105  	remotePath := fspath.JoinRootPath(opt.Remote, rpath)
   106  	baseFs, err := cache.Get(ctx, remotePath)
   107  	if err != nil && err != fs.ErrorIsFile {
   108  		return nil, fmt.Errorf("failed to derive base remote %q: %w", opt.Remote, err)
   109  	}
   110  
   111  	f := &Fs{
   112  		Fs:   baseFs,
   113  		name: fsname,
   114  		root: rpath,
   115  		opt:  opt,
   116  	}
   117  	// Correct root if definitely pointing to a file
   118  	if err == fs.ErrorIsFile {
   119  		f.root = path.Dir(f.root)
   120  		if f.root == "." || f.root == "/" {
   121  			f.root = ""
   122  		}
   123  	}
   124  	baseFeatures := baseFs.Features()
   125  	f.fpTime = baseFs.Precision() != fs.ModTimeNotSupported
   126  
   127  	if baseFeatures.SlowHash {
   128  		f.slowHashes = f.Fs.Hashes()
   129  	} else {
   130  		f.passHashes = f.Fs.Hashes()
   131  		f.fpHash = f.passHashes.GetOne()
   132  	}
   133  
   134  	f.suppHashes = f.passHashes
   135  	f.suppHashes.Add(f.slowHashes.Array()...)
   136  
   137  	for _, hashName := range opt.Hashes {
   138  		var ht hash.Type
   139  		if err := ht.Set(hashName); err != nil {
   140  			return nil, fmt.Errorf("invalid token %q in hash string %q", hashName, opt.Hashes.String())
   141  		}
   142  		if !f.slowHashes.Contains(ht) {
   143  			f.autoHashes.Add(ht)
   144  		}
   145  		f.keepHashes.Add(ht)
   146  		f.suppHashes.Add(ht)
   147  	}
   148  
   149  	fs.Debugf(f, "Groups by usage: cached %s, passed %s, auto %s, slow %s, supported %s",
   150  		f.keepHashes, f.passHashes, f.autoHashes, f.slowHashes, f.suppHashes)
   151  
   152  	var nilSet hash.Set
   153  	if f.keepHashes == nilSet {
   154  		return nil, errors.New("configured hash_names have nothing to keep in cache")
   155  	}
   156  
   157  	if f.opt.MaxAge > 0 {
   158  		gob.Register(hashRecord{})
   159  		db, err := kv.Start(ctx, "hasher", f.Fs)
   160  		if err != nil {
   161  			return nil, err
   162  		}
   163  		f.db = db
   164  	}
   165  
   166  	stubFeatures := &fs.Features{
   167  		CanHaveEmptyDirectories:  true,
   168  		IsLocal:                  true,
   169  		ReadMimeType:             true,
   170  		WriteMimeType:            true,
   171  		SetTier:                  true,
   172  		GetTier:                  true,
   173  		ReadMetadata:             true,
   174  		WriteMetadata:            true,
   175  		UserMetadata:             true,
   176  		ReadDirMetadata:          true,
   177  		WriteDirMetadata:         true,
   178  		WriteDirSetModTime:       true,
   179  		UserDirMetadata:          true,
   180  		DirModTimeUpdatesOnWrite: true,
   181  		PartialUploads:           true,
   182  	}
   183  	f.features = stubFeatures.Fill(ctx, f).Mask(ctx, f.Fs).WrapsFs(f, f.Fs)
   184  
   185  	cache.PinUntilFinalized(f.Fs, f)
   186  	return f, err
   187  }
   188  
   189  //
   190  // Filesystem
   191  //
   192  
   193  // Name of the remote (as passed into NewFs)
   194  func (f *Fs) Name() string { return f.name }
   195  
   196  // Root of the remote (as passed into NewFs)
   197  func (f *Fs) Root() string { return f.root }
   198  
   199  // Features returns the optional features of this Fs
   200  func (f *Fs) Features() *fs.Features { return f.features }
   201  
   202  // Hashes returns the supported hash sets.
   203  func (f *Fs) Hashes() hash.Set { return f.suppHashes }
   204  
   205  // String returns a description of the FS
   206  // The "hasher::" prefix is a distinctive feature.
   207  func (f *Fs) String() string {
   208  	return fmt.Sprintf("hasher::%s:%s", f.name, f.root)
   209  }
   210  
   211  // UnWrap returns the Fs that this Fs is wrapping
   212  func (f *Fs) UnWrap() fs.Fs { return f.Fs }
   213  
   214  // WrapFs returns the Fs that is wrapping this Fs
   215  func (f *Fs) WrapFs() fs.Fs { return f.wrapper }
   216  
   217  // SetWrapper sets the Fs that is wrapping this Fs
   218  func (f *Fs) SetWrapper(wrapper fs.Fs) { f.wrapper = wrapper }
   219  
   220  // Wrap base entries into hasher entries.
   221  func (f *Fs) wrapEntries(baseEntries fs.DirEntries) (hashEntries fs.DirEntries, err error) {
   222  	hashEntries = baseEntries[:0] // work inplace
   223  	for _, entry := range baseEntries {
   224  		switch x := entry.(type) {
   225  		case fs.Object:
   226  			obj, err := f.wrapObject(x, nil)
   227  			if err != nil {
   228  				return nil, err
   229  			}
   230  			hashEntries = append(hashEntries, obj)
   231  		default:
   232  			hashEntries = append(hashEntries, entry) // trash in - trash out
   233  		}
   234  	}
   235  	return hashEntries, nil
   236  }
   237  
   238  // List the objects and directories in dir into entries.
   239  func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
   240  	if entries, err = f.Fs.List(ctx, dir); err != nil {
   241  		return nil, err
   242  	}
   243  	return f.wrapEntries(entries)
   244  }
   245  
   246  // ListR lists the objects and directories recursively into out.
   247  func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) (err error) {
   248  	return f.Fs.Features().ListR(ctx, dir, func(baseEntries fs.DirEntries) error {
   249  		hashEntries, err := f.wrapEntries(baseEntries)
   250  		if err != nil {
   251  			return err
   252  		}
   253  		return callback(hashEntries)
   254  	})
   255  }
   256  
   257  // Purge a directory
   258  func (f *Fs) Purge(ctx context.Context, dir string) error {
   259  	if do := f.Fs.Features().Purge; do != nil {
   260  		if err := do(ctx, dir); err != nil {
   261  			return err
   262  		}
   263  		err := f.db.Do(true, &kvPurge{
   264  			dir: path.Join(f.Fs.Root(), dir),
   265  		})
   266  		if err != nil {
   267  			fs.Errorf(f, "Failed to purge some hashes: %v", err)
   268  		}
   269  		return nil
   270  	}
   271  	return fs.ErrorCantPurge
   272  }
   273  
   274  // PutStream uploads to the remote path with undeterminate size.
   275  func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
   276  	if do := f.Fs.Features().PutStream; do != nil {
   277  		_ = f.pruneHash(src.Remote())
   278  		oResult, err := do(ctx, in, src, options...)
   279  		return f.wrapObject(oResult, err)
   280  	}
   281  	return nil, errors.New("PutStream not supported")
   282  }
   283  
   284  // PutUnchecked uploads the object, allowing duplicates.
   285  func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
   286  	if do := f.Fs.Features().PutUnchecked; do != nil {
   287  		_ = f.pruneHash(src.Remote())
   288  		oResult, err := do(ctx, in, src, options...)
   289  		return f.wrapObject(oResult, err)
   290  	}
   291  	return nil, errors.New("PutUnchecked not supported")
   292  }
   293  
   294  // pruneHash deletes hash for a path
   295  func (f *Fs) pruneHash(remote string) error {
   296  	return f.db.Do(true, &kvPrune{
   297  		key: path.Join(f.Fs.Root(), remote),
   298  	})
   299  }
   300  
   301  // CleanUp the trash in the Fs
   302  func (f *Fs) CleanUp(ctx context.Context) error {
   303  	if do := f.Fs.Features().CleanUp; do != nil {
   304  		return do(ctx)
   305  	}
   306  	return errors.New("not supported by underlying remote")
   307  }
   308  
   309  // About gets quota information from the Fs
   310  func (f *Fs) About(ctx context.Context) (*fs.Usage, error) {
   311  	if do := f.Fs.Features().About; do != nil {
   312  		return do(ctx)
   313  	}
   314  	return nil, errors.New("not supported by underlying remote")
   315  }
   316  
   317  // ChangeNotify calls the passed function with a path that has had changes.
   318  func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryType), pollIntervalChan <-chan time.Duration) {
   319  	if do := f.Fs.Features().ChangeNotify; do != nil {
   320  		do(ctx, notifyFunc, pollIntervalChan)
   321  	}
   322  }
   323  
   324  // UserInfo returns info about the connected user
   325  func (f *Fs) UserInfo(ctx context.Context) (map[string]string, error) {
   326  	if do := f.Fs.Features().UserInfo; do != nil {
   327  		return do(ctx)
   328  	}
   329  	return nil, fs.ErrorNotImplemented
   330  }
   331  
   332  // Disconnect the current user
   333  func (f *Fs) Disconnect(ctx context.Context) error {
   334  	if do := f.Fs.Features().Disconnect; do != nil {
   335  		return do(ctx)
   336  	}
   337  	return fs.ErrorNotImplemented
   338  }
   339  
   340  // MergeDirs merges the contents of all the directories passed
   341  // in into the first one and rmdirs the other directories.
   342  func (f *Fs) MergeDirs(ctx context.Context, dirs []fs.Directory) error {
   343  	if do := f.Fs.Features().MergeDirs; do != nil {
   344  		return do(ctx, dirs)
   345  	}
   346  	return errors.New("MergeDirs not supported")
   347  }
   348  
   349  // DirSetModTime sets the directory modtime for dir
   350  func (f *Fs) DirSetModTime(ctx context.Context, dir string, modTime time.Time) error {
   351  	if do := f.Fs.Features().DirSetModTime; do != nil {
   352  		return do(ctx, dir, modTime)
   353  	}
   354  	return fs.ErrorNotImplemented
   355  }
   356  
   357  // MkdirMetadata makes the root directory of the Fs object
   358  func (f *Fs) MkdirMetadata(ctx context.Context, dir string, metadata fs.Metadata) (fs.Directory, error) {
   359  	if do := f.Fs.Features().MkdirMetadata; do != nil {
   360  		return do(ctx, dir, metadata)
   361  	}
   362  	return nil, fs.ErrorNotImplemented
   363  }
   364  
   365  // DirCacheFlush resets the directory cache - used in testing
   366  // as an optional interface
   367  func (f *Fs) DirCacheFlush() {
   368  	if do := f.Fs.Features().DirCacheFlush; do != nil {
   369  		do()
   370  	}
   371  }
   372  
   373  // PublicLink generates a public link to the remote path (usually readable by anyone)
   374  func (f *Fs) PublicLink(ctx context.Context, remote string, expire fs.Duration, unlink bool) (string, error) {
   375  	if do := f.Fs.Features().PublicLink; do != nil {
   376  		return do(ctx, remote, expire, unlink)
   377  	}
   378  	return "", errors.New("PublicLink not supported")
   379  }
   380  
   381  // Copy src to this remote using server-side copy operations.
   382  func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) {
   383  	do := f.Fs.Features().Copy
   384  	if do == nil {
   385  		return nil, fs.ErrorCantCopy
   386  	}
   387  	o, ok := src.(*Object)
   388  	if !ok {
   389  		return nil, fs.ErrorCantCopy
   390  	}
   391  	oResult, err := do(ctx, o.Object, remote)
   392  	return f.wrapObject(oResult, err)
   393  }
   394  
   395  // Move src to this remote using server-side move operations.
   396  func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) {
   397  	do := f.Fs.Features().Move
   398  	if do == nil {
   399  		return nil, fs.ErrorCantMove
   400  	}
   401  	o, ok := src.(*Object)
   402  	if !ok {
   403  		return nil, fs.ErrorCantMove
   404  	}
   405  	oResult, err := do(ctx, o.Object, remote)
   406  	if err != nil {
   407  		return nil, err
   408  	}
   409  	_ = f.db.Do(true, &kvMove{
   410  		src: path.Join(f.Fs.Root(), src.Remote()),
   411  		dst: path.Join(f.Fs.Root(), remote),
   412  		dir: false,
   413  		fs:  f,
   414  	})
   415  	return f.wrapObject(oResult, nil)
   416  }
   417  
   418  // DirMove moves src, srcRemote to this remote at dstRemote using server-side move operations.
   419  func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) error {
   420  	do := f.Fs.Features().DirMove
   421  	if do == nil {
   422  		return fs.ErrorCantDirMove
   423  	}
   424  	srcFs, ok := src.(*Fs)
   425  	if !ok {
   426  		return fs.ErrorCantDirMove
   427  	}
   428  	err := do(ctx, srcFs.Fs, srcRemote, dstRemote)
   429  	if err == nil {
   430  		_ = f.db.Do(true, &kvMove{
   431  			src: path.Join(srcFs.Fs.Root(), srcRemote),
   432  			dst: path.Join(f.Fs.Root(), dstRemote),
   433  			dir: true,
   434  			fs:  f,
   435  		})
   436  	}
   437  	return err
   438  }
   439  
   440  // Shutdown the backend, closing any background tasks and any cached connections.
   441  func (f *Fs) Shutdown(ctx context.Context) (err error) {
   442  	if f.db != nil && !f.db.IsStopped() {
   443  		err = f.db.Stop(false)
   444  	}
   445  	if do := f.Fs.Features().Shutdown; do != nil {
   446  		if err2 := do(ctx); err2 != nil {
   447  			err = err2
   448  		}
   449  	}
   450  	return
   451  }
   452  
   453  // NewObject finds the Object at remote.
   454  func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
   455  	o, err := f.Fs.NewObject(ctx, remote)
   456  	return f.wrapObject(o, err)
   457  }
   458  
   459  //
   460  // Object
   461  //
   462  
   463  // Object represents a composite file wrapping one or more data chunks
   464  type Object struct {
   465  	fs.Object
   466  	f *Fs
   467  }
   468  
   469  // Wrap base object into hasher object
   470  func (f *Fs) wrapObject(o fs.Object, err error) (obj fs.Object, outErr error) {
   471  	// log.Trace(o, "err=%v", err)("obj=%#v, outErr=%v", &obj, &outErr)
   472  	if err != nil {
   473  		return nil, err
   474  	}
   475  	if o == nil {
   476  		return nil, fs.ErrorObjectNotFound
   477  	}
   478  	return &Object{Object: o, f: f}, nil
   479  }
   480  
   481  // Fs returns read only access to the Fs that this object is part of
   482  func (o *Object) Fs() fs.Info { return o.f }
   483  
   484  // UnWrap returns the wrapped Object
   485  func (o *Object) UnWrap() fs.Object { return o.Object }
   486  
   487  // Return a string version
   488  func (o *Object) String() string {
   489  	if o == nil {
   490  		return "<nil>"
   491  	}
   492  	return o.Object.String()
   493  }
   494  
   495  // ID returns the ID of the Object if possible
   496  func (o *Object) ID() string {
   497  	if doer, ok := o.Object.(fs.IDer); ok {
   498  		return doer.ID()
   499  	}
   500  	return ""
   501  }
   502  
   503  // GetTier returns the Tier of the Object if possible
   504  func (o *Object) GetTier() string {
   505  	if doer, ok := o.Object.(fs.GetTierer); ok {
   506  		return doer.GetTier()
   507  	}
   508  	return ""
   509  }
   510  
   511  // SetTier set the Tier of the Object if possible
   512  func (o *Object) SetTier(tier string) error {
   513  	if doer, ok := o.Object.(fs.SetTierer); ok {
   514  		return doer.SetTier(tier)
   515  	}
   516  	return errors.New("SetTier not supported")
   517  }
   518  
   519  // MimeType of an Object if known, "" otherwise
   520  func (o *Object) MimeType(ctx context.Context) string {
   521  	if doer, ok := o.Object.(fs.MimeTyper); ok {
   522  		return doer.MimeType(ctx)
   523  	}
   524  	return ""
   525  }
   526  
   527  // Metadata returns metadata for an object
   528  //
   529  // It should return nil if there is no Metadata
   530  func (o *Object) Metadata(ctx context.Context) (fs.Metadata, error) {
   531  	do, ok := o.Object.(fs.Metadataer)
   532  	if !ok {
   533  		return nil, nil
   534  	}
   535  	return do.Metadata(ctx)
   536  }
   537  
   538  // Check the interfaces are satisfied
   539  var (
   540  	_ fs.Fs              = (*Fs)(nil)
   541  	_ fs.Purger          = (*Fs)(nil)
   542  	_ fs.Copier          = (*Fs)(nil)
   543  	_ fs.Mover           = (*Fs)(nil)
   544  	_ fs.DirMover        = (*Fs)(nil)
   545  	_ fs.Commander       = (*Fs)(nil)
   546  	_ fs.PutUncheckeder  = (*Fs)(nil)
   547  	_ fs.PutStreamer     = (*Fs)(nil)
   548  	_ fs.CleanUpper      = (*Fs)(nil)
   549  	_ fs.UnWrapper       = (*Fs)(nil)
   550  	_ fs.ListRer         = (*Fs)(nil)
   551  	_ fs.Abouter         = (*Fs)(nil)
   552  	_ fs.Wrapper         = (*Fs)(nil)
   553  	_ fs.MergeDirser     = (*Fs)(nil)
   554  	_ fs.DirSetModTimer  = (*Fs)(nil)
   555  	_ fs.MkdirMetadataer = (*Fs)(nil)
   556  	_ fs.DirCacheFlusher = (*Fs)(nil)
   557  	_ fs.ChangeNotifier  = (*Fs)(nil)
   558  	_ fs.PublicLinker    = (*Fs)(nil)
   559  	_ fs.UserInfoer      = (*Fs)(nil)
   560  	_ fs.Disconnecter    = (*Fs)(nil)
   561  	_ fs.Shutdowner      = (*Fs)(nil)
   562  	_ fs.FullObject      = (*Object)(nil)
   563  )