github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/etlmeta.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"sync"
    12  	ratomic "sync/atomic"
    13  
    14  	"github.com/NVIDIA/aistore/cmn"
    15  	"github.com/NVIDIA/aistore/cmn/cos"
    16  	"github.com/NVIDIA/aistore/cmn/debug"
    17  	"github.com/NVIDIA/aistore/cmn/fname"
    18  	"github.com/NVIDIA/aistore/cmn/jsp"
    19  	"github.com/NVIDIA/aistore/cmn/nlog"
    20  	"github.com/NVIDIA/aistore/ext/etl"
    21  	"github.com/NVIDIA/aistore/fs"
    22  	"github.com/NVIDIA/aistore/memsys"
    23  )
    24  
    25  const etlMDCopies = 2 // local copies
    26  
    27  var etlMDImmSize int64
    28  
    29  type (
    30  	etlMD struct {
    31  		etl.MD
    32  		cksum *cos.Cksum
    33  	}
    34  
    35  	etlOwner interface {
    36  		sync.Locker
    37  		Get() *etl.MD
    38  
    39  		init()
    40  		get() (etlMD *etlMD)
    41  		putPersist(etlMD *etlMD, payload msPayload) error
    42  		persist(clone *etlMD, payload msPayload) error
    43  		modify(*etlMDModifier) (*etlMD, error)
    44  	}
    45  
    46  	etlMDModifier struct {
    47  		pre   func(ctx *etlMDModifier, clone *etlMD) (err error)
    48  		final func(ctx *etlMDModifier, clone *etlMD)
    49  
    50  		msg     etl.InitMsg
    51  		etlName string
    52  		wait    bool
    53  	}
    54  
    55  	etlMDOwnerBase struct {
    56  		etlMD ratomic.Pointer[etlMD]
    57  		sync.Mutex
    58  	}
    59  	etlMDOwnerPrx struct {
    60  		etlMDOwnerBase
    61  		fpath string
    62  	}
    63  	etlMDOwnerTgt struct{ etlMDOwnerBase }
    64  )
    65  
    66  // interface guard
    67  var (
    68  	_ revs     = (*etlMD)(nil)
    69  	_ etlOwner = (*etlMDOwnerPrx)(nil)
    70  	_ etlOwner = (*etlMDOwnerTgt)(nil)
    71  )
    72  
    73  // c-tor
    74  func newEtlMD() (e *etlMD) {
    75  	e = &etlMD{}
    76  	e.MD.Init(4)
    77  	return
    78  }
    79  
    80  // as revs
    81  func (*etlMD) tag() string       { return revsEtlMDTag }
    82  func (e *etlMD) version() int64  { return e.Version }
    83  func (*etlMD) jit(p *proxy) revs { return p.owner.etl.get() }
    84  func (*etlMD) sgl() *memsys.SGL  { return nil }
    85  
    86  // always remarshal (TODO: unify and optimize across all cluster-level metadata types)
    87  func (e *etlMD) marshal() []byte {
    88  	sgl := memsys.PageMM().NewSGL(etlMDImmSize)
    89  	err := jsp.Encode(sgl, e, jsp.CCSign(cmn.MetaverEtlMD))
    90  	debug.AssertNoErr(err)
    91  	etlMDImmSize = max(etlMDImmSize, sgl.Len())
    92  	b := sgl.ReadAll() // TODO: optimize
    93  	sgl.Free()
    94  	return b
    95  }
    96  
    97  func (e *etlMD) clone() *etlMD {
    98  	dst := &etlMD{}
    99  	*dst = *e
   100  	dst.Init(len(e.ETLs))
   101  	for id, etl := range e.ETLs {
   102  		dst.ETLs[id] = etl
   103  	}
   104  	return dst
   105  }
   106  
   107  func (e *etlMD) add(spec etl.InitMsg) {
   108  	e.Add(spec)
   109  	e.Version++
   110  }
   111  
   112  func (e *etlMD) get(id string) etl.InitMsg { return e.ETLs[id] }
   113  
   114  func (e *etlMD) del(id string) (exists bool) {
   115  	_, exists = e.ETLs[id]
   116  	delete(e.ETLs, id)
   117  	return
   118  }
   119  
   120  ////////////////////
   121  // etlMDOwnerBase //
   122  ////////////////////
   123  
   124  func (eo *etlMDOwnerBase) Get() *etl.MD { return &eo.get().MD }
   125  
   126  func (eo *etlMDOwnerBase) get() *etlMD      { return eo.etlMD.Load() }
   127  func (eo *etlMDOwnerBase) put(etlMD *etlMD) { eo.etlMD.Store(etlMD) }
   128  
   129  // write metasync-sent bytes directly (no json)
   130  func (*etlMDOwnerBase) persistBytes(payload msPayload, fpath string) (done bool) {
   131  	if payload == nil {
   132  		return
   133  	}
   134  	etlMDValue := payload[revsEtlMDTag]
   135  	if etlMDValue == nil {
   136  		return
   137  	}
   138  	var (
   139  		etlMD *etl.MD
   140  		wto   = cos.NewBuffer(etlMDValue)
   141  		err   = jsp.SaveMeta(fpath, etlMD, wto)
   142  	)
   143  	done = err == nil
   144  	return
   145  }
   146  
   147  ///////////////////
   148  // etlMDOwnerPrx //
   149  ///////////////////
   150  
   151  func newEtlMDOwnerPrx(config *cmn.Config) *etlMDOwnerPrx {
   152  	return &etlMDOwnerPrx{fpath: filepath.Join(config.ConfigDir, fname.Emd)}
   153  }
   154  
   155  func (eo *etlMDOwnerPrx) init() {
   156  	etlMD := newEtlMD()
   157  	_, err := jsp.LoadMeta(eo.fpath, etlMD)
   158  	if err != nil {
   159  		if !os.IsNotExist(err) {
   160  			nlog.Errorf("failed to load %s from %s, err: %v", etlMD, eo.fpath, err)
   161  		} else {
   162  			nlog.Infof("%s does not exist at %s - initializing", etlMD, eo.fpath)
   163  		}
   164  	}
   165  	eo.put(etlMD)
   166  }
   167  
   168  func (eo *etlMDOwnerPrx) putPersist(etlMD *etlMD, payload msPayload) (err error) {
   169  	if !eo.persistBytes(payload, eo.fpath) {
   170  		err = jsp.SaveMeta(eo.fpath, etlMD, nil)
   171  	}
   172  	if err == nil {
   173  		eo.put(etlMD)
   174  	}
   175  	return
   176  }
   177  
   178  func (*etlMDOwnerPrx) persist(_ *etlMD, _ msPayload) (err error) { debug.Assert(false); return }
   179  
   180  func (eo *etlMDOwnerPrx) _pre(ctx *etlMDModifier) (clone *etlMD, err error) {
   181  	eo.Lock()
   182  	defer eo.Unlock()
   183  	etlMD := eo.get()
   184  	clone = etlMD.clone()
   185  	if err = ctx.pre(ctx, clone); err != nil {
   186  		return
   187  	}
   188  	err = eo.putPersist(clone, nil)
   189  	return
   190  }
   191  
   192  func (eo *etlMDOwnerPrx) modify(ctx *etlMDModifier) (clone *etlMD, err error) {
   193  	if clone, err = eo._pre(ctx); err != nil {
   194  		return
   195  	}
   196  	if ctx.final != nil {
   197  		ctx.final(ctx, clone)
   198  	}
   199  	return
   200  }
   201  
   202  ///////////////////
   203  // etlMDOwnerTgt //
   204  ///////////////////
   205  
   206  func newEtlMDOwnerTgt() *etlMDOwnerTgt {
   207  	return &etlMDOwnerTgt{}
   208  }
   209  
   210  func (eo *etlMDOwnerTgt) init() {
   211  	var (
   212  		etlMD     *etlMD
   213  		available = fs.GetAvail()
   214  	)
   215  	if etlMD = loadEtlMD(available, fname.Emd); etlMD != nil {
   216  		nlog.Infoln("loaded", etlMD.String())
   217  	} else {
   218  		etlMD = newEtlMD()
   219  		nlog.Infoln("initializing new", etlMD.String())
   220  	}
   221  	eo.put(etlMD)
   222  }
   223  
   224  func (eo *etlMDOwnerTgt) putPersist(etlMD *etlMD, payload msPayload) (err error) {
   225  	if err = eo.persist(etlMD, payload); err == nil {
   226  		eo.put(etlMD)
   227  	}
   228  	return
   229  }
   230  
   231  func (*etlMDOwnerTgt) persist(clone *etlMD, payload msPayload) (err error) {
   232  	var b []byte
   233  	if payload != nil {
   234  		if etlMDValue := payload[revsEtlMDTag]; etlMDValue != nil {
   235  			b = etlMDValue
   236  		}
   237  	}
   238  	if b == nil {
   239  		b = clone.marshal()
   240  	}
   241  	cnt, availCnt := fs.PersistOnMpaths(fname.Emd, "" /*backup*/, clone, etlMDCopies, b, nil /*sgl*/)
   242  	if cnt > 0 {
   243  		return
   244  	}
   245  	if availCnt == 0 {
   246  		nlog.Errorln("Cannot store", clone.String()+":", cmn.ErrNoMountpaths) // there's a bigger problem
   247  		return
   248  	}
   249  	err = fmt.Errorf("failed to store %s on any of the mountpaths (%d)", clone, availCnt)
   250  	nlog.Errorln(err)
   251  	return
   252  }
   253  
   254  func (*etlMDOwnerTgt) modify(_ *etlMDModifier) (*etlMD, error) {
   255  	debug.Assert(false)
   256  	return nil, nil
   257  }
   258  
   259  func loadEtlMD(mpaths fs.MPI, path string) (mainEtlMD *etlMD) {
   260  	for _, mpath := range mpaths {
   261  		etlMD := loadEtlMDFromMpath(mpath, path)
   262  		if etlMD == nil {
   263  			continue
   264  		}
   265  		if mainEtlMD == nil {
   266  			mainEtlMD = etlMD
   267  			continue
   268  		}
   269  		if !mainEtlMD.cksum.Equal(etlMD.cksum) {
   270  			cos.ExitLogf("EtlMD is different (%q): %v vs %v", mpath, mainEtlMD, etlMD)
   271  		}
   272  		if mainEtlMD.cksum.Equal(etlMD.cksum) {
   273  			continue
   274  		}
   275  		if mainEtlMD.Version == etlMD.Version {
   276  			cos.ExitLogf("EtlMD is different (%q): %v vs %v", mpath, mainEtlMD, etlMD)
   277  		}
   278  		nlog.Errorf("Warning: detected different EtlMD versions (%q): %v != %v", mpath, mainEtlMD, etlMD)
   279  		if mainEtlMD.Version < etlMD.Version {
   280  			mainEtlMD = etlMD
   281  		}
   282  	}
   283  	return
   284  }
   285  
   286  func loadEtlMDFromMpath(mpath *fs.Mountpath, path string) (etlMD *etlMD) {
   287  	var (
   288  		fpath = filepath.Join(mpath.Path, path)
   289  		err   error
   290  	)
   291  	etlMD = newEtlMD()
   292  	etlMD.cksum, err = jsp.LoadMeta(fpath, etlMD)
   293  	if err == nil {
   294  		return etlMD
   295  	}
   296  	if !os.IsNotExist(err) {
   297  		// Should never be NotExist error as mpi should include only mpaths with relevant etlMDs stored.
   298  		nlog.Errorf("failed to load %s from %s: %v", etlMD, fpath, err)
   299  	}
   300  	return nil
   301  }
   302  
   303  func hasEnoughEtlMDCopies() bool { return fs.CountPersisted(fname.Emd) >= etlMDCopies }