github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/hasher/object.go (about) 1 package hasher 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "path" 9 "time" 10 11 "github.com/rclone/rclone/fs" 12 "github.com/rclone/rclone/fs/hash" 13 "github.com/rclone/rclone/fs/operations" 14 ) 15 16 // obtain hash for an object 17 func (o *Object) getHash(ctx context.Context, hashType hash.Type) (string, error) { 18 maxAge := time.Duration(o.f.opt.MaxAge) 19 if maxAge <= 0 { 20 return "", nil 21 } 22 fp := o.fingerprint(ctx) 23 if fp == "" { 24 return "", errors.New("fingerprint failed") 25 } 26 return o.f.getRawHash(ctx, hashType, o.Remote(), fp, maxAge) 27 } 28 29 // obtain hash for a path 30 func (f *Fs) getRawHash(ctx context.Context, hashType hash.Type, remote, fp string, age time.Duration) (string, error) { 31 key := path.Join(f.Fs.Root(), remote) 32 op := &kvGet{ 33 key: key, 34 fp: fp, 35 hash: hashType.String(), 36 age: age, 37 } 38 err := f.db.Do(false, op) 39 return op.val, err 40 } 41 42 // put new hashes for an object 43 func (o *Object) putHashes(ctx context.Context, rawHashes hashMap) error { 44 if o.f.opt.MaxAge <= 0 { 45 return nil 46 } 47 fp := o.fingerprint(ctx) 48 if fp == "" { 49 return nil 50 } 51 key := path.Join(o.f.Fs.Root(), o.Remote()) 52 hashes := operations.HashSums{} 53 for hashType, hashVal := range rawHashes { 54 hashes[hashType.String()] = hashVal 55 } 56 return o.f.putRawHashes(ctx, key, fp, hashes) 57 } 58 59 // set hashes for a path without any validation 60 func (f *Fs) putRawHashes(ctx context.Context, key, fp string, hashes operations.HashSums) error { 61 return f.db.Do(true, &kvPut{ 62 key: key, 63 fp: fp, 64 hashes: hashes, 65 age: time.Duration(f.opt.MaxAge), 66 }) 67 } 68 69 // Hash returns the selected checksum of the file or "" if unavailable. 70 func (o *Object) Hash(ctx context.Context, hashType hash.Type) (hashVal string, err error) { 71 f := o.f 72 if f.passHashes.Contains(hashType) { 73 fs.Debugf(o, "pass %s", hashType) 74 hashVal, err = o.Object.Hash(ctx, hashType) 75 if hashVal != "" { 76 return hashVal, err 77 } 78 if err != nil { 79 fs.Debugf(o, "error passing %s: %v", hashType, err) 80 } 81 fs.Debugf(o, "passed %s is blank -- trying other methods", hashType) 82 } 83 if !f.suppHashes.Contains(hashType) { 84 fs.Debugf(o, "unsupp %s", hashType) 85 return "", hash.ErrUnsupported 86 } 87 if hashVal, err = o.getHash(ctx, hashType); err != nil { 88 fs.Debugf(o, "getHash: %v", err) 89 err = nil 90 hashVal = "" 91 } 92 if hashVal != "" { 93 fs.Debugf(o, "cached %s = %q", hashType, hashVal) 94 return hashVal, nil 95 } 96 if f.slowHashes.Contains(hashType) { 97 fs.Debugf(o, "slow %s", hashType) 98 hashVal, err = o.Object.Hash(ctx, hashType) 99 if err == nil && hashVal != "" && f.keepHashes.Contains(hashType) { 100 if err = o.putHashes(ctx, hashMap{hashType: hashVal}); err != nil { 101 fs.Debugf(o, "putHashes: %v", err) 102 err = nil 103 } 104 } 105 return hashVal, err 106 } 107 if f.autoHashes.Contains(hashType) && o.Size() < int64(f.opt.AutoSize) { 108 _ = o.updateHashes(ctx) 109 if hashVal, err = o.getHash(ctx, hashType); err != nil { 110 fs.Debugf(o, "auto %s = %q (%v)", hashType, hashVal, err) 111 err = nil 112 } 113 } 114 return hashVal, err 115 } 116 117 // updateHashes performs implicit "rclone hashsum --download" and updates cache. 118 func (o *Object) updateHashes(ctx context.Context) error { 119 r, err := o.Open(ctx) 120 if err != nil { 121 fs.Infof(o, "update failed (open): %v", err) 122 return err 123 } 124 defer func() { 125 _ = r.Close() 126 }() 127 if _, err = io.Copy(io.Discard, r); err != nil { 128 fs.Infof(o, "update failed (copy): %v", err) 129 return err 130 } 131 return nil 132 } 133 134 // Update the object with the given data, time and size. 135 func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { 136 _ = o.f.pruneHash(src.Remote()) 137 return o.Object.Update(ctx, in, src, options...) 138 } 139 140 // Remove an object. 141 func (o *Object) Remove(ctx context.Context) error { 142 _ = o.f.pruneHash(o.Remote()) 143 return o.Object.Remove(ctx) 144 } 145 146 // SetModTime sets the modification time of the file. 147 // Also prunes the cache entry when modtime changes so that 148 // touching a file will trigger checksum recalculation even 149 // on backends that don't provide modTime with fingerprint. 150 func (o *Object) SetModTime(ctx context.Context, mtime time.Time) error { 151 if mtime != o.Object.ModTime(ctx) { 152 _ = o.f.pruneHash(o.Remote()) 153 } 154 return o.Object.SetModTime(ctx, mtime) 155 } 156 157 // Open opens the file for read. 158 // Full reads will also update object hashes. 159 func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (r io.ReadCloser, err error) { 160 size := o.Size() 161 var offset, limit int64 = 0, -1 162 for _, option := range options { 163 switch opt := option.(type) { 164 case *fs.SeekOption: 165 offset = opt.Offset 166 case *fs.RangeOption: 167 offset, limit = opt.Decode(size) 168 } 169 } 170 if offset < 0 { 171 return nil, errors.New("invalid offset") 172 } 173 if limit < 0 { 174 limit = size - offset 175 } 176 if r, err = o.Object.Open(ctx, options...); err != nil { 177 return nil, err 178 } 179 if offset != 0 || limit < size { 180 // It's a partial read 181 return r, err 182 } 183 return o.f.newHashingReader(ctx, r, func(sums hashMap) { 184 if err := o.putHashes(ctx, sums); err != nil { 185 fs.Infof(o, "auto hashing error: %v", err) 186 } 187 }) 188 } 189 190 // Put data into the remote path with given modTime and size 191 func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 192 var ( 193 o fs.Object 194 common hash.Set 195 rehash bool 196 hashes hashMap 197 ) 198 if fsrc := src.Fs(); fsrc != nil { 199 common = fsrc.Hashes().Overlap(f.keepHashes) 200 // Rehash if source does not have all required hashes or hashing is slow 201 rehash = fsrc.Features().SlowHash || common != f.keepHashes 202 } 203 204 wrapIn := in 205 if rehash { 206 r, err := f.newHashingReader(ctx, in, func(sums hashMap) { 207 hashes = sums 208 }) 209 fs.Debugf(src, "Rehash in-fly due to incomplete or slow source set %v (err: %v)", common, err) 210 if err == nil { 211 wrapIn = r 212 } else { 213 rehash = false 214 } 215 } 216 217 _ = f.pruneHash(src.Remote()) 218 oResult, err := f.Fs.Put(ctx, wrapIn, src, options...) 219 o, err = f.wrapObject(oResult, err) 220 if err != nil { 221 return nil, err 222 } 223 224 if !rehash { 225 hashes = hashMap{} 226 for _, ht := range common.Array() { 227 if h, e := src.Hash(ctx, ht); e == nil && h != "" { 228 hashes[ht] = h 229 } 230 } 231 } 232 if len(hashes) > 0 { 233 err := o.(*Object).putHashes(ctx, hashes) 234 fs.Debugf(o, "Applied %d source hashes, err: %v", len(hashes), err) 235 } 236 return o, err 237 } 238 239 type hashingReader struct { 240 rd io.Reader 241 hasher *hash.MultiHasher 242 fun func(hashMap) 243 } 244 245 func (f *Fs) newHashingReader(ctx context.Context, rd io.Reader, fun func(hashMap)) (*hashingReader, error) { 246 hasher, err := hash.NewMultiHasherTypes(f.keepHashes) 247 if err != nil { 248 return nil, err 249 } 250 hr := &hashingReader{ 251 rd: rd, 252 hasher: hasher, 253 fun: fun, 254 } 255 return hr, nil 256 } 257 258 func (r *hashingReader) Read(p []byte) (n int, err error) { 259 n, err = r.rd.Read(p) 260 if err != nil && err != io.EOF { 261 r.hasher = nil 262 } 263 if r.hasher != nil { 264 if _, errHash := r.hasher.Write(p[:n]); errHash != nil { 265 r.hasher = nil 266 err = errHash 267 } 268 } 269 if err == io.EOF && r.hasher != nil { 270 r.fun(r.hasher.Sums()) 271 r.hasher = nil 272 } 273 return 274 } 275 276 func (r *hashingReader) Close() error { 277 if rc, ok := r.rd.(io.ReadCloser); ok { 278 return rc.Close() 279 } 280 return nil 281 } 282 283 // Return object fingerprint or empty string in case of errors 284 // 285 // Note that we can't use the generic `fs.Fingerprint` here because 286 // this fingerprint is used to pick _derived hashes_ that are slow 287 // to calculate or completely unsupported by the base remote. 288 // 289 // The hasher fingerprint must be based on `fsHash`, the first _fast_ 290 // hash supported _by the underlying remote_ (if there is one), 291 // while `fs.Fingerprint` would select a hash _produced by hasher_ 292 // creating unresolvable fingerprint loop. 293 func (o *Object) fingerprint(ctx context.Context) string { 294 size := o.Object.Size() 295 timeStr := "-" 296 if o.f.fpTime { 297 timeStr = o.Object.ModTime(ctx).UTC().Format(timeFormat) 298 if timeStr == "" { 299 return "" 300 } 301 } 302 hashStr := "-" 303 if o.f.fpHash != hash.None { 304 var err error 305 hashStr, err = o.Object.Hash(ctx, o.f.fpHash) 306 if hashStr == "" || err != nil { 307 return "" 308 } 309 } 310 return fmt.Sprintf("%d,%s,%s", size, timeStr, hashStr) 311 }