github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/core/lcopy.go (about)

     1  // Package core provides core metadata and in-cluster API
     2  /*
     3   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package core
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  
    11  	"github.com/NVIDIA/aistore/cmn/cos"
    12  	"github.com/NVIDIA/aistore/cmn/debug"
    13  	"github.com/NVIDIA/aistore/cmn/nlog"
    14  	"github.com/NVIDIA/aistore/fs"
    15  )
    16  
    17  //
    18  // LOM copy management
    19  //
    20  
    21  func (lom *LOM) whingeCopy() (yes bool) {
    22  	if !lom.IsCopy() {
    23  		return
    24  	}
    25  	msg := fmt.Sprintf("unexpected: %s([fqn=%s] [hrw=%s] %+v)", lom, lom.FQN, lom.HrwFQN, lom.md.copies)
    26  	debug.Assert(false, msg)
    27  	nlog.Errorln(msg)
    28  	return true
    29  }
    30  
    31  func (lom *LOM) HasCopies() bool { return len(lom.md.copies) > 1 }
    32  func (lom *LOM) NumCopies() int  { return max(len(lom.md.copies), 1) } // metadata-wise
    33  
    34  // GetCopies returns all copies
    35  // NOTE: a) copies include lom.FQN aka "main repl.", and b) caller must take a lock
    36  func (lom *LOM) GetCopies() fs.MPI {
    37  	debug.AssertFunc(func() bool {
    38  		rc, exclusive := lom.IsLocked()
    39  		return exclusive || rc > 0
    40  	})
    41  	return lom.md.copies
    42  }
    43  
    44  // given an existing (on-disk) object, determines whether it is a _copy_
    45  // (compare with isMirror below)
    46  func (lom *LOM) IsCopy() bool {
    47  	if lom.IsHRW() {
    48  		return false
    49  	}
    50  	// misplaced or a copy
    51  	_, ok := lom.md.copies[lom.FQN]
    52  	return ok
    53  }
    54  
    55  // determines whether the two LOM _structures_ represent objects that must be _copies_ of each other
    56  // (compare with IsCopy above)
    57  func (lom *LOM) isMirror(dst *LOM) bool {
    58  	return lom.MirrorConf().Enabled &&
    59  		lom.ObjName == dst.ObjName &&
    60  		lom.Bck().Equal(dst.Bck(), true /* must have same BID*/, true /* same backend */)
    61  }
    62  
    63  func (lom *LOM) delCopyMd(copyFQN string) {
    64  	delete(lom.md.copies, copyFQN)
    65  	if len(lom.md.copies) <= 1 {
    66  		lom.md.copies = nil
    67  	}
    68  }
    69  
    70  // NOTE: used only in tests
    71  func (lom *LOM) AddCopy(copyFQN string, mpi *fs.Mountpath) error {
    72  	if lom.md.copies == nil {
    73  		lom.md.copies = make(fs.MPI, 2)
    74  	}
    75  	lom.md.copies[copyFQN] = mpi
    76  	lom.md.copies[lom.FQN] = lom.mi
    77  	return lom.syncMetaWithCopies()
    78  }
    79  
    80  func (lom *LOM) DelCopies(copiesFQN ...string) (err error) {
    81  	numCopies := lom.NumCopies()
    82  	// 1. Delete all copies from the metadata
    83  	for _, copyFQN := range copiesFQN {
    84  		if _, ok := lom.md.copies[copyFQN]; !ok {
    85  			return fmt.Errorf("lom %s(num: %d): copy %s does not exist", lom, numCopies, copyFQN)
    86  		}
    87  		lom.delCopyMd(copyFQN)
    88  	}
    89  
    90  	// 2. Update metadata on remaining copies, if any
    91  	if err := lom.syncMetaWithCopies(); err != nil {
    92  		debug.AssertNoErr(err)
    93  		return err
    94  	}
    95  
    96  	// 3. Remove the copies
    97  	for _, copyFQN := range copiesFQN {
    98  		if err1 := cos.RemoveFile(copyFQN); err1 != nil {
    99  			nlog.Errorln(err1) // TODO: LRU should take care of that later.
   100  			continue
   101  		}
   102  	}
   103  	return
   104  }
   105  
   106  func (lom *LOM) DelAllCopies() (err error) {
   107  	copiesFQN := make([]string, 0, len(lom.md.copies))
   108  	for copyFQN := range lom.md.copies {
   109  		if copyFQN == lom.FQN {
   110  			continue
   111  		}
   112  		copiesFQN = append(copiesFQN, copyFQN)
   113  	}
   114  	return lom.DelCopies(copiesFQN...)
   115  }
   116  
   117  // DelExtraCopies deletes obj replicas that are not part of the lom.md.copies metadata
   118  // (cleanup)
   119  func (lom *LOM) DelExtraCopies(fqn ...string) (removed bool, err error) {
   120  	if lom.whingeCopy() {
   121  		return
   122  	}
   123  	availablePaths := fs.GetAvail()
   124  	for _, mi := range availablePaths {
   125  		copyFQN := mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName)
   126  		if _, ok := lom.md.copies[copyFQN]; ok {
   127  			continue
   128  		}
   129  		if err1 := cos.RemoveFile(copyFQN); err1 != nil {
   130  			err = err1
   131  			continue
   132  		}
   133  		if len(fqn) > 0 && fqn[0] == copyFQN {
   134  			removed = true
   135  		}
   136  	}
   137  	return
   138  }
   139  
   140  // syncMetaWithCopies tries to make sure that all copies have identical metadata.
   141  // NOTE: uname for LOM must be already locked.
   142  // NOTE: changes _may_ be made - the caller must call lom.Persist() upon return
   143  func (lom *LOM) syncMetaWithCopies() (err error) {
   144  	var copyFQN string
   145  	if !lom.HasCopies() {
   146  		return nil
   147  	}
   148  	// NOTE: caller is responsible for write-locking
   149  	debug.AssertFunc(func() bool {
   150  		_, exclusive := lom.IsLocked()
   151  		return exclusive
   152  	})
   153  	if !lom.WritePolicy().IsImmediate() {
   154  		lom.md.makeDirty()
   155  		return nil
   156  	}
   157  	for {
   158  		if copyFQN, err = lom.persistMdOnCopies(); err == nil {
   159  			break
   160  		}
   161  		lom.delCopyMd(copyFQN)
   162  		if err1 := cos.Stat(copyFQN); err1 != nil && !os.IsNotExist(err1) {
   163  			T.FSHC(err, copyFQN) // TODO: notify scrubber
   164  		}
   165  	}
   166  	return
   167  }
   168  
   169  // RestoreObjectFromAny tries to restore the object at its default location.
   170  // Returns true if object exists, false otherwise
   171  // TODO: locking vs concurrent restore: consider (read-lock object + write-lock meta) split
   172  func (lom *LOM) RestoreToLocation() (exists bool) {
   173  	lom.Lock(true)
   174  	if err := lom.Load(true /*cache it*/, true /*locked*/); err == nil {
   175  		lom.Unlock(true)
   176  		return true // nothing to do
   177  	}
   178  	var (
   179  		saved          = lom.md.pushrt()
   180  		availablePaths = fs.GetAvail()
   181  		buf, slab      = g.pmm.Alloc()
   182  	)
   183  	for path, mi := range availablePaths {
   184  		if path == lom.mi.Path {
   185  			continue
   186  		}
   187  		fqn := mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName)
   188  		if err := cos.Stat(fqn); err != nil {
   189  			continue
   190  		}
   191  		dst, err := lom._restore(fqn, buf)
   192  		if err == nil {
   193  			lom.md = dst.md
   194  			lom.md.poprt(saved)
   195  			exists = true
   196  			FreeLOM(dst)
   197  			break
   198  		}
   199  		if dst != nil {
   200  			FreeLOM(dst)
   201  		}
   202  	}
   203  	lom.Unlock(true)
   204  	slab.Free(buf)
   205  	return
   206  }
   207  
   208  func (lom *LOM) _restore(fqn string, buf []byte) (dst *LOM, err error) {
   209  	src := lom.CloneMD(fqn)
   210  	defer FreeLOM(src)
   211  	if err = src.InitFQN(fqn, lom.Bucket()); err != nil {
   212  		return
   213  	}
   214  	if err = src.Load(false /*cache it*/, true /*locked*/); err != nil {
   215  		return
   216  	}
   217  	// restore at default location
   218  	dst, err = src.Copy2FQN(lom.FQN, buf)
   219  	return
   220  }
   221  
   222  // increment the object's num copies by (well) copying the former
   223  // (compare with lom.Copy2FQN below)
   224  func (lom *LOM) Copy(mi *fs.Mountpath, buf []byte) (err error) {
   225  	var (
   226  		copyFQN = mi.MakePathFQN(lom.Bucket(), fs.ObjectType, lom.ObjName)
   227  		workFQN = mi.MakePathFQN(lom.Bucket(), fs.WorkfileType, fs.WorkfileCopy+"."+lom.ObjName)
   228  	)
   229  	// check if the copy destination exists and then skip copying if it's also identical
   230  	if errExists := cos.Stat(copyFQN); errExists == nil {
   231  		cplom := AllocLOM(lom.ObjName)
   232  		defer FreeLOM(cplom)
   233  		if errExists = cplom.InitFQN(copyFQN, lom.Bucket()); errExists == nil {
   234  			if errExists = cplom.Load(false /*cache it*/, true /*locked*/); errExists == nil && cplom.Equal(lom) {
   235  				goto add
   236  			}
   237  		}
   238  	}
   239  
   240  	// copy
   241  	_, _, err = cos.CopyFile(lom.FQN, workFQN, buf, cos.ChecksumNone) // TODO: checksumming
   242  	if err != nil {
   243  		return
   244  	}
   245  	if err = cos.Rename(workFQN, copyFQN); err != nil {
   246  		if errRemove := cos.RemoveFile(workFQN); errRemove != nil && !os.IsNotExist(errRemove) {
   247  			nlog.Errorln("nested err:", errRemove)
   248  		}
   249  		return
   250  	}
   251  add:
   252  	// add md and persist
   253  	lom.AddCopy(copyFQN, mi)
   254  	err = lom.Persist()
   255  	if err != nil {
   256  		lom.delCopyMd(copyFQN)
   257  		nlog.Errorln(err)
   258  		return err
   259  	}
   260  	err = lom.syncMetaWithCopies()
   261  	return
   262  }
   263  
   264  // copy object => any local destination
   265  // recommended for copying between different buckets (compare with lom.Copy() above)
   266  // NOTE: `lom` source must be w-locked
   267  func (lom *LOM) Copy2FQN(dstFQN string, buf []byte) (dst *LOM, err error) {
   268  	dst = lom.CloneMD(dstFQN)
   269  	if err = dst.InitFQN(dstFQN, nil); err == nil {
   270  		err = lom.copy2fqn(dst, buf)
   271  	}
   272  	if err != nil {
   273  		FreeLOM(dst)
   274  		dst = nil
   275  	}
   276  	return
   277  }
   278  
   279  func (lom *LOM) copy2fqn(dst *LOM, buf []byte) (err error) {
   280  	var (
   281  		dstCksum  *cos.CksumHash
   282  		dstFQN    = dst.FQN
   283  		srcCksum  = lom.Checksum()
   284  		cksumType = cos.ChecksumNone
   285  	)
   286  	if !srcCksum.IsEmpty() {
   287  		cksumType = srcCksum.Ty()
   288  	}
   289  	if dst.isMirror(lom) && lom.md.copies != nil {
   290  		dst.md.copies = make(fs.MPI, len(lom.md.copies)+1)
   291  		for fqn, mpi := range lom.md.copies {
   292  			dst.md.copies[fqn] = mpi
   293  		}
   294  	}
   295  	if !dst.Bck().Equal(lom.Bck(), true /*same ID*/, true /*same backend*/) {
   296  		// The copy will be in a new bucket - completely separate object. Hence, we have to set initial version.
   297  		dst.SetVersion(lomInitialVersion)
   298  	}
   299  
   300  	workFQN := fs.CSM.Gen(dst, fs.WorkfileType, fs.WorkfileCopy)
   301  	_, dstCksum, err = cos.CopyFile(lom.FQN, workFQN, buf, cksumType)
   302  	if err != nil {
   303  		return
   304  	}
   305  
   306  	if err = cos.Rename(workFQN, dstFQN); err != nil {
   307  		if errRemove := cos.RemoveFile(workFQN); errRemove != nil && !os.IsNotExist(errRemove) {
   308  			nlog.Errorln("nested err:", errRemove)
   309  		}
   310  		return
   311  	}
   312  
   313  	if cksumType != cos.ChecksumNone {
   314  		if !dstCksum.Equal(lom.Checksum()) {
   315  			return cos.NewErrDataCksum(&dstCksum.Cksum, lom.Checksum())
   316  		}
   317  		dst.SetCksum(dstCksum.Clone())
   318  	}
   319  
   320  	// persist
   321  	if lom.isMirror(dst) {
   322  		if lom.md.copies == nil {
   323  			lom.md.copies = make(fs.MPI, 2)
   324  			dst.md.copies = make(fs.MPI, 2)
   325  		}
   326  		lom.md.copies[dstFQN], dst.md.copies[dstFQN] = dst.mi, dst.mi
   327  		lom.md.copies[lom.FQN], dst.md.copies[lom.FQN] = lom.mi, lom.mi
   328  		if err = lom.syncMetaWithCopies(); err != nil {
   329  			if _, ok := lom.md.copies[dst.FQN]; !ok {
   330  				if errRemove := os.Remove(dst.FQN); errRemove != nil && !os.IsNotExist(errRemove) {
   331  					nlog.Errorln("nested err:", errRemove)
   332  				}
   333  			}
   334  			// `lom.syncMetaWithCopies()` may have made changes notwithstanding
   335  			if errPersist := lom.Persist(); errPersist != nil {
   336  				nlog.Errorln("nested err:", errPersist)
   337  			}
   338  			return
   339  		}
   340  		err = lom.Persist()
   341  	} else if err = dst.Persist(); err != nil {
   342  		if errRemove := os.Remove(dst.FQN); errRemove != nil && !os.IsNotExist(errRemove) {
   343  			nlog.Errorln("nested err:", errRemove)
   344  		}
   345  	}
   346  	return
   347  }
   348  
   349  // load-balanced GET
   350  func (lom *LOM) LBGet() (fqn string) {
   351  	if !lom.HasCopies() {
   352  		return lom.FQN
   353  	}
   354  	return lom.leastUtilCopy()
   355  }
   356  
   357  // NOTE: reconsider counting GETs (and the associated overhead)
   358  // vs ios.refreshIostatCache (and the associated delay)
   359  func (lom *LOM) leastUtilCopy() (fqn string) {
   360  	var (
   361  		mpathUtils = fs.GetAllMpathUtils()
   362  		minUtil    = mpathUtils.Get(lom.mi.Path)
   363  		copies     = lom.GetCopies()
   364  	)
   365  	fqn = lom.FQN
   366  	for copyFQN, copyMPI := range copies {
   367  		if copyFQN != lom.FQN {
   368  			if util := mpathUtils.Get(copyMPI.Path); util < minUtil {
   369  				fqn, minUtil = copyFQN, util
   370  			}
   371  		}
   372  	}
   373  	return
   374  }
   375  
   376  // returns the least utilized mountpath that does _not_ have a copy of this `lom` yet
   377  // (compare with leastUtilCopy())
   378  func (lom *LOM) LeastUtilNoCopy() (mi *fs.Mountpath) {
   379  	var (
   380  		availablePaths = fs.GetAvail()
   381  		mpathUtils     = fs.GetAllMpathUtils()
   382  		minUtil        = int64(101) // to motivate the first assignment
   383  	)
   384  	for mpath, mpathInfo := range availablePaths {
   385  		if lom.haveMpath(mpath) || mpathInfo.IsAnySet(fs.FlagWaitingDD) {
   386  			continue
   387  		}
   388  		if util := mpathUtils.Get(mpath); util < minUtil {
   389  			minUtil, mi = util, mpathInfo
   390  		}
   391  	}
   392  	return
   393  }
   394  
   395  func (lom *LOM) haveMpath(mpath string) bool {
   396  	if len(lom.md.copies) == 0 {
   397  		return lom.mi.Path == mpath
   398  	}
   399  	for _, mi := range lom.md.copies {
   400  		if mi.Path == mpath {
   401  			return true
   402  		}
   403  	}
   404  	return false
   405  }
   406  
   407  // must be called under w-lock
   408  // returns mountpath destination to copy this object, or nil if no copying is required
   409  // - checks hrw location first, and
   410  // - checks copies (if any) against the current configuation and available mountpaths;
   411  // - does not check `fstat` in either case (TODO: configurable or scrub);
   412  func (lom *LOM) ToMpath() (mi *fs.Mountpath, isHrw bool) {
   413  	var (
   414  		availablePaths = fs.GetAvail()
   415  		hrwMi, _, err  = fs.Hrw(lom.md.uname)
   416  	)
   417  	if err != nil {
   418  		nlog.Errorln(err)
   419  		return
   420  	}
   421  	debug.Assert(!hrwMi.IsAnySet(fs.FlagWaitingDD))
   422  	if lom.mi.Path != hrwMi.Path {
   423  		return hrwMi, true
   424  	}
   425  	mirror := lom.MirrorConf()
   426  	if !mirror.Enabled || mirror.Copies < 2 {
   427  		return
   428  	}
   429  	// count copies vs. configuration
   430  	// take into account mountpath flags but stop short of `fstat`-ing
   431  	expCopies, gotCopies := int(mirror.Copies), 0
   432  	for fqn, mpi := range lom.md.copies {
   433  		mpathInfo, ok := availablePaths[mpi.Path]
   434  		if !ok || mpathInfo.IsAnySet(fs.FlagWaitingDD) {
   435  			lom.delCopyMd(fqn)
   436  		} else {
   437  			gotCopies++
   438  		}
   439  	}
   440  	if expCopies <= gotCopies {
   441  		return
   442  	}
   443  	mi = lom.LeastUtilNoCopy() // NOTE: nil when not enough mountpaths
   444  	return
   445  }