github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/hasher/object.go (about)

     1  package hasher
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"path"
     9  	"time"
    10  
    11  	"github.com/rclone/rclone/fs"
    12  	"github.com/rclone/rclone/fs/hash"
    13  	"github.com/rclone/rclone/fs/operations"
    14  )
    15  
    16  // obtain hash for an object
    17  func (o *Object) getHash(ctx context.Context, hashType hash.Type) (string, error) {
    18  	maxAge := time.Duration(o.f.opt.MaxAge)
    19  	if maxAge <= 0 {
    20  		return "", nil
    21  	}
    22  	fp := o.fingerprint(ctx)
    23  	if fp == "" {
    24  		return "", errors.New("fingerprint failed")
    25  	}
    26  	return o.f.getRawHash(ctx, hashType, o.Remote(), fp, maxAge)
    27  }
    28  
    29  // obtain hash for a path
    30  func (f *Fs) getRawHash(ctx context.Context, hashType hash.Type, remote, fp string, age time.Duration) (string, error) {
    31  	key := path.Join(f.Fs.Root(), remote)
    32  	op := &kvGet{
    33  		key:  key,
    34  		fp:   fp,
    35  		hash: hashType.String(),
    36  		age:  age,
    37  	}
    38  	err := f.db.Do(false, op)
    39  	return op.val, err
    40  }
    41  
    42  // put new hashes for an object
    43  func (o *Object) putHashes(ctx context.Context, rawHashes hashMap) error {
    44  	if o.f.opt.MaxAge <= 0 {
    45  		return nil
    46  	}
    47  	fp := o.fingerprint(ctx)
    48  	if fp == "" {
    49  		return nil
    50  	}
    51  	key := path.Join(o.f.Fs.Root(), o.Remote())
    52  	hashes := operations.HashSums{}
    53  	for hashType, hashVal := range rawHashes {
    54  		hashes[hashType.String()] = hashVal
    55  	}
    56  	return o.f.putRawHashes(ctx, key, fp, hashes)
    57  }
    58  
    59  // set hashes for a path without any validation
    60  func (f *Fs) putRawHashes(ctx context.Context, key, fp string, hashes operations.HashSums) error {
    61  	return f.db.Do(true, &kvPut{
    62  		key:    key,
    63  		fp:     fp,
    64  		hashes: hashes,
    65  		age:    time.Duration(f.opt.MaxAge),
    66  	})
    67  }
    68  
    69  // Hash returns the selected checksum of the file or "" if unavailable.
    70  func (o *Object) Hash(ctx context.Context, hashType hash.Type) (hashVal string, err error) {
    71  	f := o.f
    72  	if f.passHashes.Contains(hashType) {
    73  		fs.Debugf(o, "pass %s", hashType)
    74  		hashVal, err = o.Object.Hash(ctx, hashType)
    75  		if hashVal != "" {
    76  			return hashVal, err
    77  		}
    78  		if err != nil {
    79  			fs.Debugf(o, "error passing %s: %v", hashType, err)
    80  		}
    81  		fs.Debugf(o, "passed %s is blank -- trying other methods", hashType)
    82  	}
    83  	if !f.suppHashes.Contains(hashType) {
    84  		fs.Debugf(o, "unsupp %s", hashType)
    85  		return "", hash.ErrUnsupported
    86  	}
    87  	if hashVal, err = o.getHash(ctx, hashType); err != nil {
    88  		fs.Debugf(o, "getHash: %v", err)
    89  		err = nil
    90  		hashVal = ""
    91  	}
    92  	if hashVal != "" {
    93  		fs.Debugf(o, "cached %s = %q", hashType, hashVal)
    94  		return hashVal, nil
    95  	}
    96  	if f.slowHashes.Contains(hashType) {
    97  		fs.Debugf(o, "slow %s", hashType)
    98  		hashVal, err = o.Object.Hash(ctx, hashType)
    99  		if err == nil && hashVal != "" && f.keepHashes.Contains(hashType) {
   100  			if err = o.putHashes(ctx, hashMap{hashType: hashVal}); err != nil {
   101  				fs.Debugf(o, "putHashes: %v", err)
   102  				err = nil
   103  			}
   104  		}
   105  		return hashVal, err
   106  	}
   107  	if f.autoHashes.Contains(hashType) && o.Size() < int64(f.opt.AutoSize) {
   108  		_ = o.updateHashes(ctx)
   109  		if hashVal, err = o.getHash(ctx, hashType); err != nil {
   110  			fs.Debugf(o, "auto %s = %q (%v)", hashType, hashVal, err)
   111  			err = nil
   112  		}
   113  	}
   114  	return hashVal, err
   115  }
   116  
   117  // updateHashes performs implicit "rclone hashsum --download" and updates cache.
   118  func (o *Object) updateHashes(ctx context.Context) error {
   119  	r, err := o.Open(ctx)
   120  	if err != nil {
   121  		fs.Infof(o, "update failed (open): %v", err)
   122  		return err
   123  	}
   124  	defer func() {
   125  		_ = r.Close()
   126  	}()
   127  	if _, err = io.Copy(io.Discard, r); err != nil {
   128  		fs.Infof(o, "update failed (copy): %v", err)
   129  		return err
   130  	}
   131  	return nil
   132  }
   133  
   134  // Update the object with the given data, time and size.
   135  func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error {
   136  	_ = o.f.pruneHash(src.Remote())
   137  	return o.Object.Update(ctx, in, src, options...)
   138  }
   139  
   140  // Remove an object.
   141  func (o *Object) Remove(ctx context.Context) error {
   142  	_ = o.f.pruneHash(o.Remote())
   143  	return o.Object.Remove(ctx)
   144  }
   145  
   146  // SetModTime sets the modification time of the file.
   147  // Also prunes the cache entry when modtime changes so that
   148  // touching a file will trigger checksum recalculation even
   149  // on backends that don't provide modTime with fingerprint.
   150  func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error {
   151  	if mtime != o.Object.ModTime(ctx) {
   152  		_ = o.f.pruneHash(o.Remote())
   153  	}
   154  	return o.Object.SetModTime(ctx, mtime)
   155  }
   156  
   157  // Open opens the file for read.
   158  // Full reads will also update object hashes.
   159  func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (r io.ReadCloser, err error) {
   160  	size := o.Size()
   161  	var offset, limit int64 = 0, -1
   162  	for _, option := range options {
   163  		switch opt := option.(type) {
   164  		case *fs.SeekOption:
   165  			offset = opt.Offset
   166  		case *fs.RangeOption:
   167  			offset, limit = opt.Decode(size)
   168  		}
   169  	}
   170  	if offset < 0 {
   171  		return nil, errors.New("invalid offset")
   172  	}
   173  	if limit < 0 {
   174  		limit = size - offset
   175  	}
   176  	if r, err = o.Object.Open(ctx, options...); err != nil {
   177  		return nil, err
   178  	}
   179  	if offset != 0 || limit < size {
   180  		// It's a partial read
   181  		return r, err
   182  	}
   183  	return o.f.newHashingReader(ctx, r, func(sums hashMap) {
   184  		if err := o.putHashes(ctx, sums); err != nil {
   185  			fs.Infof(o, "auto hashing error: %v", err)
   186  		}
   187  	})
   188  }
   189  
   190  // Put data into the remote path with given modTime and size
   191  func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
   192  	var (
   193  		o      fs.Object
   194  		common hash.Set
   195  		rehash bool
   196  		hashes hashMap
   197  	)
   198  	if fsrc := src.Fs(); fsrc != nil {
   199  		common = fsrc.Hashes().Overlap(f.keepHashes)
   200  		// Rehash if source does not have all required hashes or hashing is slow
   201  		rehash = fsrc.Features().SlowHash || common != f.keepHashes
   202  	}
   203  
   204  	wrapIn := in
   205  	if rehash {
   206  		r, err := f.newHashingReader(ctx, in, func(sums hashMap) {
   207  			hashes = sums
   208  		})
   209  		fs.Debugf(src, "Rehash in-fly due to incomplete or slow source set %v (err: %v)", common, err)
   210  		if err == nil {
   211  			wrapIn = r
   212  		} else {
   213  			rehash = false
   214  		}
   215  	}
   216  
   217  	_ = f.pruneHash(src.Remote())
   218  	oResult, err := f.Fs.Put(ctx, wrapIn, src, options...)
   219  	o, err = f.wrapObject(oResult, err)
   220  	if err != nil {
   221  		return nil, err
   222  	}
   223  
   224  	if !rehash {
   225  		hashes = hashMap{}
   226  		for _, ht := range common.Array() {
   227  			if h, e := src.Hash(ctx, ht); e == nil && h != "" {
   228  				hashes[ht] = h
   229  			}
   230  		}
   231  	}
   232  	if len(hashes) > 0 {
   233  		err := o.(*Object).putHashes(ctx, hashes)
   234  		fs.Debugf(o, "Applied %d source hashes, err: %v", len(hashes), err)
   235  	}
   236  	return o, err
   237  }
   238  
   239  type hashingReader struct {
   240  	rd     io.Reader
   241  	hasher *hash.MultiHasher
   242  	fun    func(hashMap)
   243  }
   244  
   245  func (f *Fs) newHashingReader(ctx context.Context, rd io.Reader, fun func(hashMap)) (*hashingReader, error) {
   246  	hasher, err := hash.NewMultiHasherTypes(f.keepHashes)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	hr := &hashingReader{
   251  		rd:     rd,
   252  		hasher: hasher,
   253  		fun:    fun,
   254  	}
   255  	return hr, nil
   256  }
   257  
   258  func (r *hashingReader) Read(p []byte) (n int, err error) {
   259  	n, err = r.rd.Read(p)
   260  	if err != nil && err != io.EOF {
   261  		r.hasher = nil
   262  	}
   263  	if r.hasher != nil {
   264  		if _, errHash := r.hasher.Write(p[:n]); errHash != nil {
   265  			r.hasher = nil
   266  			err = errHash
   267  		}
   268  	}
   269  	if err == io.EOF && r.hasher != nil {
   270  		r.fun(r.hasher.Sums())
   271  		r.hasher = nil
   272  	}
   273  	return
   274  }
   275  
   276  func (r *hashingReader) Close() error {
   277  	if rc, ok := r.rd.(io.ReadCloser); ok {
   278  		return rc.Close()
   279  	}
   280  	return nil
   281  }
   282  
   283  // Return object fingerprint or empty string in case of errors
   284  //
   285  // Note that we can't use the generic `fs.Fingerprint` here because
   286  // this fingerprint is used to pick _derived hashes_ that are slow
   287  // to calculate or completely unsupported by the base remote.
   288  //
   289  // The hasher fingerprint must be based on `fsHash`, the first _fast_
   290  // hash supported _by the underlying remote_ (if there is one),
   291  // while `fs.Fingerprint` would select a hash _produced by hasher_
   292  // creating unresolvable fingerprint loop.
   293  func (o *Object) fingerprint(ctx context.Context) string {
   294  	size := o.Object.Size()
   295  	timeStr := "-"
   296  	if o.f.fpTime {
   297  		timeStr = o.Object.ModTime(ctx).UTC().Format(timeFormat)
   298  		if timeStr == "" {
   299  			return ""
   300  		}
   301  	}
   302  	hashStr := "-"
   303  	if o.f.fpHash != hash.None {
   304  		var err error
   305  		hashStr, err = o.Object.Hash(ctx, o.f.fpHash)
   306  		if hashStr == "" || err != nil {
   307  			return ""
   308  		}
   309  	}
   310  	return fmt.Sprintf("%d,%s,%s", size, timeStr, hashStr)
   311  }