github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/core/lcopy.go (about) 1 // Package core provides core metadata and in-cluster API 2 /* 3 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package core 6 7 import ( 8 "fmt" 9 "os" 10 11 "github.com/NVIDIA/aistore/cmn/cos" 12 "github.com/NVIDIA/aistore/cmn/debug" 13 "github.com/NVIDIA/aistore/cmn/nlog" 14 "github.com/NVIDIA/aistore/fs" 15 ) 16 17 // 18 // LOM copy management 19 // 20 21 func (lom *LOM) whingeCopy() (yes bool) { 22 if !lom.IsCopy() { 23 return 24 } 25 msg := fmt.Sprintf("unexpected: %s([fqn=%s] [hrw=%s] %+v)", lom, lom.FQN, lom.HrwFQN, lom.md.copies) 26 debug.Assert(false, msg) 27 nlog.Errorln(msg) 28 return true 29 } 30 31 func (lom *LOM) HasCopies() bool { return len(lom.md.copies) > 1 } 32 func (lom *LOM) NumCopies() int { return max(len(lom.md.copies), 1) } // metadata-wise 33 34 // GetCopies returns all copies 35 // NOTE: a) copies include lom.FQN aka "main repl.", and b) caller must take a lock 36 func (lom *LOM) GetCopies() fs.MPI { 37 debug.AssertFunc(func() bool { 38 rc, exclusive := lom.IsLocked() 39 return exclusive || rc > 0 40 }) 41 return lom.md.copies 42 } 43 44 // given an existing (on-disk) object, determines whether it is a _copy_ 45 // (compare with isMirror below) 46 func (lom *LOM) IsCopy() bool { 47 if lom.IsHRW() { 48 return false 49 } 50 // misplaced or a copy 51 _, ok := lom.md.copies[lom.FQN] 52 return ok 53 } 54 55 // determines whether the two LOM _structures_ represent objects that must be _copies_ of each other 56 // (compare with IsCopy above) 57 func (lom *LOM) isMirror(dst *LOM) bool { 58 return lom.MirrorConf().Enabled && 59 lom.ObjName == dst.ObjName && 60 lom.Bck().Equal(dst.Bck(), true /* must have same BID*/, true /* same backend */) 61 } 62 63 func (lom *LOM) delCopyMd(copyFQN string) { 64 delete(lom.md.copies, copyFQN) 65 if len(lom.md.copies) <= 1 { 66 lom.md.copies = nil 67 } 68 } 69 70 // NOTE: used only in tests 71 func (lom *LOM) AddCopy(copyFQN string, mpi *fs.Mountpath) error { 72 if lom.md.copies == nil { 73 lom.md.copies = make(fs.MPI, 2) 74 } 75 lom.md.copies[copyFQN] = mpi 76 lom.md.copies[lom.FQN] = lom.mi 77 return lom.syncMetaWithCopies() 78 } 79 80 func (lom *LOM) DelCopies(copiesFQN ...string) (err error) { 81 numCopies := lom.NumCopies() 82 // 1. Delete all copies from the metadata 83 for _, copyFQN := range copiesFQN { 84 if _, ok := lom.md.copies[copyFQN]; !ok { 85 return fmt.Errorf("lom %s(num: %d): copy %s does not exist", lom, numCopies, copyFQN) 86 } 87 lom.delCopyMd(copyFQN) 88 } 89 90 // 2. Update metadata on remaining copies, if any 91 if err := lom.syncMetaWithCopies(); err != nil { 92 debug.AssertNoErr(err) 93 return err 94 } 95 96 // 3. Remove the copies 97 for _, copyFQN := range copiesFQN { 98 if err1 := cos.RemoveFile(copyFQN); err1 != nil { 99 nlog.Errorln(err1) // TODO: LRU should take care of that later. 100 continue 101 } 102 } 103 return 104 } 105 106 func (lom *LOM) DelAllCopies() (err error) { 107 copiesFQN := make([]string, 0, len(lom.md.copies)) 108 for copyFQN := range lom.md.copies { 109 if copyFQN == lom.FQN { 110 continue 111 } 112 copiesFQN = append(copiesFQN, copyFQN) 113 } 114 return lom.DelCopies(copiesFQN...) 115 } 116 117 // DelExtraCopies deletes obj replicas that are not part of the lom.md.copies metadata 118 // (cleanup) 119 func (lom *LOM) DelExtraCopies(fqn ...string) (removed bool, err error) { 120 if lom.whingeCopy() { 121 return 122 } 123 availablePaths := fs.GetAvail() 124 for _, mi := range availablePaths { 125 copyFQN := mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName) 126 if _, ok := lom.md.copies[copyFQN]; ok { 127 continue 128 } 129 if err1 := cos.RemoveFile(copyFQN); err1 != nil { 130 err = err1 131 continue 132 } 133 if len(fqn) > 0 && fqn[0] == copyFQN { 134 removed = true 135 } 136 } 137 return 138 } 139 140 // syncMetaWithCopies tries to make sure that all copies have identical metadata. 141 // NOTE: uname for LOM must be already locked. 142 // NOTE: changes _may_ be made - the caller must call lom.Persist() upon return 143 func (lom *LOM) syncMetaWithCopies() (err error) { 144 var copyFQN string 145 if !lom.HasCopies() { 146 return nil 147 } 148 // NOTE: caller is responsible for write-locking 149 debug.AssertFunc(func() bool { 150 _, exclusive := lom.IsLocked() 151 return exclusive 152 }) 153 if !lom.WritePolicy().IsImmediate() { 154 lom.md.makeDirty() 155 return nil 156 } 157 for { 158 if copyFQN, err = lom.persistMdOnCopies(); err == nil { 159 break 160 } 161 lom.delCopyMd(copyFQN) 162 if err1 := cos.Stat(copyFQN); err1 != nil && !os.IsNotExist(err1) { 163 T.FSHC(err, copyFQN) // TODO: notify scrubber 164 } 165 } 166 return 167 } 168 169 // RestoreObjectFromAny tries to restore the object at its default location. 170 // Returns true if object exists, false otherwise 171 // TODO: locking vs concurrent restore: consider (read-lock object + write-lock meta) split 172 func (lom *LOM) RestoreToLocation() (exists bool) { 173 lom.Lock(true) 174 if err := lom.Load(true /*cache it*/, true /*locked*/); err == nil { 175 lom.Unlock(true) 176 return true // nothing to do 177 } 178 var ( 179 saved = lom.md.pushrt() 180 availablePaths = fs.GetAvail() 181 buf, slab = g.pmm.Alloc() 182 ) 183 for path, mi := range availablePaths { 184 if path == lom.mi.Path { 185 continue 186 } 187 fqn := mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName) 188 if err := cos.Stat(fqn); err != nil { 189 continue 190 } 191 dst, err := lom._restore(fqn, buf) 192 if err == nil { 193 lom.md = dst.md 194 lom.md.poprt(saved) 195 exists = true 196 FreeLOM(dst) 197 break 198 } 199 if dst != nil { 200 FreeLOM(dst) 201 } 202 } 203 lom.Unlock(true) 204 slab.Free(buf) 205 return 206 } 207 208 func (lom *LOM) _restore(fqn string, buf []byte) (dst *LOM, err error) { 209 src := lom.CloneMD(fqn) 210 defer FreeLOM(src) 211 if err = src.InitFQN(fqn, lom.Bucket()); err != nil { 212 return 213 } 214 if err = src.Load(false /*cache it*/, true /*locked*/); err != nil { 215 return 216 } 217 // restore at default location 218 dst, err = src.Copy2FQN(lom.FQN, buf) 219 return 220 } 221 222 // increment the object's num copies by (well) copying the former 223 // (compare with lom.Copy2FQN below) 224 func (lom *LOM) Copy(mi *fs.Mountpath, buf []byte) (err error) { 225 var ( 226 copyFQN = mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName) 227 workFQN = mi.MakePathFQN(lom.Bucket(), fs.WorkfileType, fs.WorkfileCopy+"."+lom.ObjName) 228 ) 229 // check if the copy destination exists and then skip copying if it's also identical 230 if errExists := cos.Stat(copyFQN); errExists == nil { 231 cplom := AllocLOM(lom.ObjName) 232 defer FreeLOM(cplom) 233 if errExists = cplom.InitFQN(copyFQN, lom.Bucket()); errExists == nil { 234 if errExists = cplom.Load(false /*cache it*/, true /*locked*/); errExists == nil && cplom.Equal(lom) { 235 goto add 236 } 237 } 238 } 239 240 // copy 241 _, _, err = cos.CopyFile(lom.FQN, workFQN, buf, cos.ChecksumNone) // TODO: checksumming 242 if err != nil { 243 return 244 } 245 if err = cos.Rename(workFQN, copyFQN); err != nil { 246 if errRemove := cos.RemoveFile(workFQN); errRemove != nil && !os.IsNotExist(errRemove) { 247 nlog.Errorln("nested err:", errRemove) 248 } 249 return 250 } 251 add: 252 // add md and persist 253 lom.AddCopy(copyFQN, mi) 254 err = lom.Persist() 255 if err != nil { 256 lom.delCopyMd(copyFQN) 257 nlog.Errorln(err) 258 return err 259 } 260 err = lom.syncMetaWithCopies() 261 return 262 } 263 264 // copy object => any local destination 265 // recommended for copying between different buckets (compare with lom.Copy() above) 266 // NOTE: `lom` source must be w-locked 267 func (lom *LOM) Copy2FQN(dstFQN string, buf []byte) (dst *LOM, err error) { 268 dst = lom.CloneMD(dstFQN) 269 if err = dst.InitFQN(dstFQN, nil); err == nil { 270 err = lom.copy2fqn(dst, buf) 271 } 272 if err != nil { 273 FreeLOM(dst) 274 dst = nil 275 } 276 return 277 } 278 279 func (lom *LOM) copy2fqn(dst *LOM, buf []byte) (err error) { 280 var ( 281 dstCksum *cos.CksumHash 282 dstFQN = dst.FQN 283 srcCksum = lom.Checksum() 284 cksumType = cos.ChecksumNone 285 ) 286 if !srcCksum.IsEmpty() { 287 cksumType = srcCksum.Ty() 288 } 289 if dst.isMirror(lom) && lom.md.copies != nil { 290 dst.md.copies = make(fs.MPI, len(lom.md.copies)+1) 291 for fqn, mpi := range lom.md.copies { 292 dst.md.copies[fqn] = mpi 293 } 294 } 295 if !dst.Bck().Equal(lom.Bck(), true /*same ID*/, true /*same backend*/) { 296 // The copy will be in a new bucket - completely separate object. Hence, we have to set initial version. 297 dst.SetVersion(lomInitialVersion) 298 } 299 300 workFQN := fs.CSM.Gen(dst, fs.WorkfileType, fs.WorkfileCopy) 301 _, dstCksum, err = cos.CopyFile(lom.FQN, workFQN, buf, cksumType) 302 if err != nil { 303 return 304 } 305 306 if err = cos.Rename(workFQN, dstFQN); err != nil { 307 if errRemove := cos.RemoveFile(workFQN); errRemove != nil && !os.IsNotExist(errRemove) { 308 nlog.Errorln("nested err:", errRemove) 309 } 310 return 311 } 312 313 if cksumType != cos.ChecksumNone { 314 if !dstCksum.Equal(lom.Checksum()) { 315 return cos.NewErrDataCksum(&dstCksum.Cksum, lom.Checksum()) 316 } 317 dst.SetCksum(dstCksum.Clone()) 318 } 319 320 // persist 321 if lom.isMirror(dst) { 322 if lom.md.copies == nil { 323 lom.md.copies = make(fs.MPI, 2) 324 dst.md.copies = make(fs.MPI, 2) 325 } 326 lom.md.copies[dstFQN], dst.md.copies[dstFQN] = dst.mi, dst.mi 327 lom.md.copies[lom.FQN], dst.md.copies[lom.FQN] = lom.mi, lom.mi 328 if err = lom.syncMetaWithCopies(); err != nil { 329 if _, ok := lom.md.copies[dst.FQN]; !ok { 330 if errRemove := os.Remove(dst.FQN); errRemove != nil && !os.IsNotExist(errRemove) { 331 nlog.Errorln("nested err:", errRemove) 332 } 333 } 334 // `lom.syncMetaWithCopies()` may have made changes notwithstanding 335 if errPersist := lom.Persist(); errPersist != nil { 336 nlog.Errorln("nested err:", errPersist) 337 } 338 return 339 } 340 err = lom.Persist() 341 } else if err = dst.Persist(); err != nil { 342 if errRemove := os.Remove(dst.FQN); errRemove != nil && !os.IsNotExist(errRemove) { 343 nlog.Errorln("nested err:", errRemove) 344 } 345 } 346 return 347 } 348 349 // load-balanced GET 350 func (lom *LOM) LBGet() (fqn string) { 351 if !lom.HasCopies() { 352 return lom.FQN 353 } 354 return lom.leastUtilCopy() 355 } 356 357 // NOTE: reconsider counting GETs (and the associated overhead) 358 // vs ios.refreshIostatCache (and the associated delay) 359 func (lom *LOM) leastUtilCopy() (fqn string) { 360 var ( 361 mpathUtils = fs.GetAllMpathUtils() 362 minUtil = mpathUtils.Get(lom.mi.Path) 363 copies = lom.GetCopies() 364 ) 365 fqn = lom.FQN 366 for copyFQN, copyMPI := range copies { 367 if copyFQN != lom.FQN { 368 if util := mpathUtils.Get(copyMPI.Path); util < minUtil { 369 fqn, minUtil = copyFQN, util 370 } 371 } 372 } 373 return 374 } 375 376 // returns the least utilized mountpath that does _not_ have a copy of this `lom` yet 377 // (compare with leastUtilCopy()) 378 func (lom *LOM) LeastUtilNoCopy() (mi *fs.Mountpath) { 379 var ( 380 availablePaths = fs.GetAvail() 381 mpathUtils = fs.GetAllMpathUtils() 382 minUtil = int64(101) // to motivate the first assignment 383 ) 384 for mpath, mpathInfo := range availablePaths { 385 if lom.haveMpath(mpath) || mpathInfo.IsAnySet(fs.FlagWaitingDD) { 386 continue 387 } 388 if util := mpathUtils.Get(mpath); util < minUtil { 389 minUtil, mi = util, mpathInfo 390 } 391 } 392 return 393 } 394 395 func (lom *LOM) haveMpath(mpath string) bool { 396 if len(lom.md.copies) == 0 { 397 return lom.mi.Path == mpath 398 } 399 for _, mi := range lom.md.copies { 400 if mi.Path == mpath { 401 return true 402 } 403 } 404 return false 405 } 406 407 // must be called under w-lock 408 // returns mountpath destination to copy this object, or nil if no copying is required 409 // - checks hrw location first, and 410 // - checks copies (if any) against the current configuation and available mountpaths; 411 // - does not check `fstat` in either case (TODO: configurable or scrub); 412 func (lom *LOM) ToMpath() (mi *fs.Mountpath, isHrw bool) { 413 var ( 414 availablePaths = fs.GetAvail() 415 hrwMi, _, err = fs.Hrw(lom.md.uname) 416 ) 417 if err != nil { 418 nlog.Errorln(err) 419 return 420 } 421 debug.Assert(!hrwMi.IsAnySet(fs.FlagWaitingDD)) 422 if lom.mi.Path != hrwMi.Path { 423 return hrwMi, true 424 } 425 mirror := lom.MirrorConf() 426 if !mirror.Enabled || mirror.Copies < 2 { 427 return 428 } 429 // count copies vs. configuration 430 // take into account mountpath flags but stop short of `fstat`-ing 431 expCopies, gotCopies := int(mirror.Copies), 0 432 for fqn, mpi := range lom.md.copies { 433 mpathInfo, ok := availablePaths[mpi.Path] 434 if !ok || mpathInfo.IsAnySet(fs.FlagWaitingDD) { 435 lom.delCopyMd(fqn) 436 } else { 437 gotCopies++ 438 } 439 } 440 if expCopies <= gotCopies { 441 return 442 } 443 mi = lom.LeastUtilNoCopy() // NOTE: nil when not enough mountpaths 444 return 445 }