github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/res/resilver.go (about) 1 // Package res provides local volume resilvering upon mountpath-attach and similar 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package res 6 7 import ( 8 "fmt" 9 "os" 10 "path/filepath" 11 "strings" 12 "time" 13 14 "github.com/NVIDIA/aistore/api/apc" 15 "github.com/NVIDIA/aistore/cmn" 16 "github.com/NVIDIA/aistore/cmn/atomic" 17 "github.com/NVIDIA/aistore/cmn/cos" 18 "github.com/NVIDIA/aistore/cmn/debug" 19 "github.com/NVIDIA/aistore/cmn/fname" 20 "github.com/NVIDIA/aistore/cmn/mono" 21 "github.com/NVIDIA/aistore/cmn/nlog" 22 "github.com/NVIDIA/aistore/core" 23 "github.com/NVIDIA/aistore/fs" 24 "github.com/NVIDIA/aistore/fs/mpather" 25 "github.com/NVIDIA/aistore/memsys" 26 "github.com/NVIDIA/aistore/xact" 27 "github.com/NVIDIA/aistore/xact/xreg" 28 "github.com/NVIDIA/aistore/xact/xs" 29 ) 30 31 const timedDuration = 4 * time.Second // see also: timedDuration in tgtgfn.go 32 33 type ( 34 Res struct { 35 // last or current resilver's time interval 36 begin atomic.Int64 37 end atomic.Int64 38 } 39 Args struct { 40 UUID string 41 Notif *xact.NotifXact 42 Rmi *fs.Mountpath 43 Action string 44 PostDD func(rmi *fs.Mountpath, action string, xres *xs.Resilver, err error) 45 SkipGlobMisplaced bool 46 SingleRmiJogger bool 47 } 48 joggerCtx struct { 49 xres *xs.Resilver 50 config *cmn.Config 51 } 52 ) 53 54 func New() *Res { return &Res{} } 55 56 func (res *Res) IsActive(multiplier int64) (yes bool) { 57 begin := res.begin.Load() 58 if begin == 0 { 59 return 60 } 61 now := mono.NanoTime() 62 if now-begin < multiplier*int64(timedDuration) { 63 yes = true 64 } else { 65 end := res.end.Load() 66 yes = end == 0 || time.Duration(now-end) < timedDuration 67 } 68 return 69 } 70 71 func (res *Res) _begin() { 72 res.begin.Store(mono.NanoTime()) 73 res.end.Store(0) 74 } 75 76 func (res *Res) _end() { 77 res.end.Store(mono.NanoTime()) 78 } 79 80 func (res *Res) RunResilver(args Args) { 81 res._begin() 82 defer res._end() 83 if fatalErr, writeErr := fs.PersistMarker(fname.ResilverMarker); fatalErr != nil || writeErr != nil { 84 nlog.Errorf("FATAL: %v, WRITE: %v", fatalErr, writeErr) 85 return 86 } 87 availablePaths, _ := fs.Get() 88 if len(availablePaths) < 1 { 89 nlog.Errorln(cmn.ErrNoMountpaths) 90 return 91 } 92 xres := xreg.RenewResilver(args.UUID).(*xs.Resilver) 93 if args.Notif != nil { 94 args.Notif.Xact = xres 95 xres.AddNotif(args.Notif) 96 } 97 98 // jogger group 99 var ( 100 jg *mpather.Jgroup 101 slab, err = core.T.PageMM().GetSlab(memsys.MaxPageSlabSize) 102 config = cmn.GCO.Get() 103 jctx = &joggerCtx{xres: xres, config: config} 104 105 opts = &mpather.JgroupOpts{ 106 CTs: []string{fs.ObjectType, fs.ECSliceType}, 107 VisitObj: jctx.visitObj, 108 VisitCT: jctx.visitCT, 109 Slab: slab, 110 SkipGloballyMisplaced: args.SkipGlobMisplaced, 111 } 112 ) 113 debug.AssertNoErr(err) 114 debug.Assert(args.PostDD == nil || (args.Action == apc.ActMountpathDetach || args.Action == apc.ActMountpathDisable)) 115 116 if args.SingleRmiJogger { 117 jg = mpather.NewJoggerGroup(opts, config, args.Rmi.Path) 118 nlog.Infof("%s, action %q, jogger->(%q)", xres.Name(), args.Action, args.Rmi) 119 } else { 120 jg = mpather.NewJoggerGroup(opts, config, "") 121 if args.Rmi != nil { 122 nlog.Infof("%s, action %q, rmi %s, num %d", xres.Name(), args.Action, args.Rmi, jg.Num()) 123 } else { 124 nlog.Infof("%s, num %d", xres.Name(), jg.Num()) 125 } 126 } 127 128 // run and block waiting 129 res.end.Store(0) 130 jg.Run() 131 err = wait(jg, xres) 132 if err != nil { 133 xres.AddErr(err) 134 } 135 // callback to, finally, detach-disable 136 if args.PostDD != nil { 137 args.PostDD(args.Rmi, args.Action, xres, err) 138 } 139 xres.Finish() 140 } 141 142 // Wait for an abort or for resilvering joggers to finish. 143 func wait(jg *mpather.Jgroup, xres *xs.Resilver) (err error) { 144 for { 145 select { 146 case errCause := <-xres.ChanAbort(): 147 if err = jg.Stop(); err != nil { 148 xres.AddErr(err, 0) 149 } else { 150 nlog.Infoln(core.T.String()+":", xres.Name(), "aborted, cause:", errCause) 151 } 152 return cmn.NewErrAborted(xres.Name(), "", errCause) 153 case <-jg.ListenFinished(): 154 if err = fs.RemoveMarker(fname.ResilverMarker); err == nil { 155 nlog.Infoln(core.T.String()+":", xres.Name(), "removed marker ok") 156 } 157 return 158 } 159 } 160 } 161 162 // Copies a slice and its metafile (if exists) to the current mpath. At the 163 // end does proper cleanup: removes ether source files(on success), or 164 // destination files(on copy failure) 165 func (jg *joggerCtx) _mvSlice(ct *core.CT, buf []byte) { 166 uname := ct.Bck().MakeUname(ct.ObjectName()) 167 destMpath, _, err := fs.Hrw(uname) 168 if err != nil { 169 jg.xres.AddErr(err) 170 nlog.Infoln("Warning:", err) 171 return 172 } 173 if destMpath.Path == ct.Mountpath().Path { 174 return 175 } 176 177 destFQN := destMpath.MakePathFQN(ct.Bucket(), fs.ECSliceType, ct.ObjectName()) 178 srcMetaFQN, destMetaFQN, err := _moveECMeta(ct, ct.Mountpath(), destMpath, buf) 179 if err != nil { 180 jg.xres.AddErr(err) 181 return 182 } 183 // Slice without metafile - skip it as unusable, let LRU clean it up 184 if srcMetaFQN == "" { 185 return 186 } 187 if cmn.Rom.FastV(4, cos.SmoduleReb) { 188 nlog.Infof("%s: moving %q -> %q", core.T, ct.FQN(), destFQN) 189 } 190 if _, _, err = cos.CopyFile(ct.FQN(), destFQN, buf, cos.ChecksumNone); err != nil { 191 errV := fmt.Errorf("failed to copy %q -> %q: %v. Rolling back", ct.FQN(), destFQN, err) 192 jg.xres.AddErr(errV, 0) 193 if err = os.Remove(destMetaFQN); err != nil { 194 errV := fmt.Errorf("failed to cleanup metafile %q: %v", destMetaFQN, err) 195 nlog.Infoln("Warning:", errV) 196 jg.xres.AddErr(errV) 197 } 198 } 199 errMeta := os.Remove(srcMetaFQN) 200 errSlice := os.Remove(ct.FQN()) 201 if errMeta != nil || errSlice != nil { 202 nlog.Warningf("Failed to cleanup %q: %v, %v", ct.FQN(), errSlice, errMeta) 203 } 204 } 205 206 // Copies EC metafile to correct mpath. It returns FQNs of the source and 207 // destination for a caller to do proper cleanup. Empty values means: either 208 // the source FQN does not exist(err==nil), or copying failed 209 func _moveECMeta(ct *core.CT, srcMpath, dstMpath *fs.Mountpath, buf []byte) (string, string, error) { 210 src := srcMpath.MakePathFQN(ct.Bucket(), fs.ECMetaType, ct.ObjectName()) 211 // If metafile does not exist it may mean that EC has not processed the 212 // object yet (e.g, EC was enabled after the bucket was filled), or 213 // the metafile has gone 214 if err := cos.Stat(src); os.IsNotExist(err) { 215 return "", "", nil 216 } 217 dst := dstMpath.MakePathFQN(ct.Bucket(), fs.ECMetaType, ct.ObjectName()) 218 _, _, err := cos.CopyFile(src, dst, buf, cos.ChecksumNone) 219 if err == nil { 220 return src, dst, nil 221 } 222 if os.IsNotExist(err) { 223 err = nil 224 } 225 return "", "", err 226 } 227 228 // TODO: revisit EC bits and check for OOS preemptively 229 // NOTE: not deleting extra copies - delegating to `storage cleanup` 230 func (jg *joggerCtx) visitObj(lom *core.LOM, buf []byte) (errHrw error) { 231 const maxRetries = 3 232 var ( 233 orig = lom 234 hlom *core.LOM 235 xname = jg.xres.Name() 236 size int64 237 copied bool 238 ) 239 if !lom.TryLock(true) { // NOTE: skipping busy 240 time.Sleep(time.Second >> 1) 241 if !lom.TryLock(true) { 242 return 243 } 244 } 245 // cleanup 246 defer func() { 247 lom = orig 248 lom.Unlock(true) 249 if copied && errHrw == nil { 250 jg.xres.ObjsAdd(1, size) 251 } 252 }() 253 254 // 1. fix EC metafile 255 var metaOldPath, metaNewPath string 256 if !lom.IsHRW() && lom.ECEnabled() { 257 var parsed fs.ParsedFQN 258 _, err := core.ResolveFQN(lom.HrwFQN, &parsed) 259 if err != nil { 260 nlog.Warningf("%s: %s %v", xname, lom, err) 261 return nil 262 } 263 ct := core.NewCTFromLOM(lom, fs.ObjectType) 264 // copy metafile 265 metaOldPath, metaNewPath, err = _moveECMeta(ct, lom.Mountpath(), parsed.Mountpath, buf) 266 if err != nil { 267 nlog.Warningf("%s: failed to copy EC metafile %s %q -> %q: %v", xname, lom, lom.Mountpath().Path, 268 parsed.Mountpath.Path, err) 269 return nil 270 } 271 } 272 273 if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil { 274 return nil 275 } 276 size = lom.SizeBytes() 277 // 2. fix hrw location; fail and subsequently abort if unsuccessful 278 var ( 279 retries int 280 mi, isHrw = lom.ToMpath() 281 ) 282 if mi == nil { 283 goto ret // nothing to do 284 } 285 redo: 286 if isHrw { 287 // cannot have it associated with a non-hrw mp; TODO: !lom.WritePolicy().IsImmediate() 288 lom.Uncache() 289 290 hlom, errHrw = jg.fixHrw(lom, mi, buf) 291 if errHrw != nil { 292 if !os.IsNotExist(errHrw) && !strings.Contains(errHrw.Error(), "does not exist") { 293 errV := fmt.Errorf("%s: failed to restore %s, errHrw: %v", xname, lom, errHrw) 294 jg.xres.AddErr(errV, 0) 295 } 296 // EC cleanup and return 297 if metaNewPath != "" { 298 if errHrw = os.Remove(metaNewPath); errHrw != nil { 299 errV := fmt.Errorf("%s: nested (%s %s: %v)", xname, lom, metaNewPath, errHrw) 300 nlog.Infoln("Warning:", errV) 301 jg.xres.AddErr(errV, 0) 302 } 303 } 304 return 305 } 306 lom = hlom 307 copied = true 308 } 309 310 // 3. fix copies 311 for { 312 mi, isHrw := lom.ToMpath() 313 if mi == nil { 314 break 315 } 316 if isHrw { 317 // redo hlom in an unlikely event 318 retries++ 319 if retries > maxRetries { 320 hmi := "???" 321 if hlom != nil && hlom.Mountpath() != nil { 322 hmi = hlom.Mountpath().String() 323 } 324 errHrw = fmt.Errorf("%s: hrw mountpaths keep changing (%s(%s) => %s => %s ...)", 325 xname, orig, orig.Mountpath(), hmi, mi) 326 jg.xres.AddErr(errHrw, 0) 327 return 328 } 329 copied = false 330 lom, hlom = orig, nil 331 time.Sleep(cmn.Rom.CplaneOperation() / 2) 332 goto redo 333 } 334 err := lom.Copy(mi, buf) 335 if err == nil { 336 copied = true 337 continue 338 } 339 if cos.IsErrOOS(err) { 340 errV := fmt.Errorf("%s: %s OOS, err: %w", core.T, mi, err) 341 jg.xres.AddErr(errV, 0) 342 err = cmn.NewErrAborted(xname, "", errV) 343 } else if !os.IsNotExist(err) && !strings.Contains(err.Error(), "does not exist") { 344 errV := fmt.Errorf("%s: failed to copy %s to %s, err: %w", xname, lom, mi, err) 345 nlog.Infoln("Warning:", errV) 346 jg.xres.AddErr(errV) 347 } 348 break 349 } 350 ret: 351 // EC: remove old metafile 352 if metaOldPath != "" { 353 if err := os.Remove(metaOldPath); err != nil { 354 nlog.Warningf("%s: failed to cleanup %s old metafile %q: %v", xname, lom, metaOldPath, err) 355 } 356 } 357 return nil 358 } 359 360 func (*joggerCtx) fixHrw(lom *core.LOM, mi *fs.Mountpath, buf []byte) (hlom *core.LOM, err error) { 361 if err = lom.Copy(mi, buf); err != nil { 362 return 363 } 364 hrwFQN := mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName) 365 hlom = &core.LOM{} 366 if err = hlom.InitFQN(hrwFQN, lom.Bucket()); err != nil { 367 return 368 } 369 debug.Assert(hlom.Mountpath().Path == mi.Path) 370 371 // reload; cache iff write-policy != immediate 372 err = hlom.Load(!hlom.WritePolicy().IsImmediate() /*cache it*/, true /*locked*/) 373 return 374 } 375 376 func (jg *joggerCtx) visitCT(ct *core.CT, buf []byte) (err error) { 377 debug.Assert(ct.ContentType() == fs.ECSliceType) 378 if !ct.Bck().Props.EC.Enabled { 379 // Since `%ec` directory is inside a bucket, it is safe to skip 380 // the entire `%ec` directory when EC is disabled for the bucket. 381 return filepath.SkipDir 382 } 383 jg._mvSlice(ct, buf) 384 return nil 385 }