github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/etlmeta.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "fmt" 9 "os" 10 "path/filepath" 11 "sync" 12 ratomic "sync/atomic" 13 14 "github.com/NVIDIA/aistore/cmn" 15 "github.com/NVIDIA/aistore/cmn/cos" 16 "github.com/NVIDIA/aistore/cmn/debug" 17 "github.com/NVIDIA/aistore/cmn/fname" 18 "github.com/NVIDIA/aistore/cmn/jsp" 19 "github.com/NVIDIA/aistore/cmn/nlog" 20 "github.com/NVIDIA/aistore/ext/etl" 21 "github.com/NVIDIA/aistore/fs" 22 "github.com/NVIDIA/aistore/memsys" 23 ) 24 25 const etlMDCopies = 2 // local copies 26 27 var etlMDImmSize int64 28 29 type ( 30 etlMD struct { 31 etl.MD 32 cksum *cos.Cksum 33 } 34 35 etlOwner interface { 36 sync.Locker 37 Get() *etl.MD 38 39 init() 40 get() (etlMD *etlMD) 41 putPersist(etlMD *etlMD, payload msPayload) error 42 persist(clone *etlMD, payload msPayload) error 43 modify(*etlMDModifier) (*etlMD, error) 44 } 45 46 etlMDModifier struct { 47 pre func(ctx *etlMDModifier, clone *etlMD) (err error) 48 final func(ctx *etlMDModifier, clone *etlMD) 49 50 msg etl.InitMsg 51 etlName string 52 wait bool 53 } 54 55 etlMDOwnerBase struct { 56 etlMD ratomic.Pointer[etlMD] 57 sync.Mutex 58 } 59 etlMDOwnerPrx struct { 60 etlMDOwnerBase 61 fpath string 62 } 63 etlMDOwnerTgt struct{ etlMDOwnerBase } 64 ) 65 66 // interface guard 67 var ( 68 _ revs = (*etlMD)(nil) 69 _ etlOwner = (*etlMDOwnerPrx)(nil) 70 _ etlOwner = (*etlMDOwnerTgt)(nil) 71 ) 72 73 // c-tor 74 func newEtlMD() (e *etlMD) { 75 e = &etlMD{} 76 e.MD.Init(4) 77 return 78 } 79 80 // as revs 81 func (*etlMD) tag() string { return revsEtlMDTag } 82 func (e *etlMD) version() int64 { return e.Version } 83 func (*etlMD) jit(p *proxy) revs { return p.owner.etl.get() } 84 func (*etlMD) sgl() *memsys.SGL { return nil } 85 86 // always remarshal (TODO: unify and optimize across all cluster-level metadata types) 87 func (e *etlMD) marshal() []byte { 88 sgl := memsys.PageMM().NewSGL(etlMDImmSize) 89 err := jsp.Encode(sgl, e, jsp.CCSign(cmn.MetaverEtlMD)) 90 debug.AssertNoErr(err) 91 etlMDImmSize = max(etlMDImmSize, sgl.Len()) 92 b := sgl.ReadAll() // TODO: optimize 93 sgl.Free() 94 return b 95 } 96 97 func (e *etlMD) clone() *etlMD { 98 dst := &etlMD{} 99 *dst = *e 100 dst.Init(len(e.ETLs)) 101 for id, etl := range e.ETLs { 102 dst.ETLs[id] = etl 103 } 104 return dst 105 } 106 107 func (e *etlMD) add(spec etl.InitMsg) { 108 e.Add(spec) 109 e.Version++ 110 } 111 112 func (e *etlMD) get(id string) etl.InitMsg { return e.ETLs[id] } 113 114 func (e *etlMD) del(id string) (exists bool) { 115 _, exists = e.ETLs[id] 116 delete(e.ETLs, id) 117 return 118 } 119 120 //////////////////// 121 // etlMDOwnerBase // 122 //////////////////// 123 124 func (eo *etlMDOwnerBase) Get() *etl.MD { return &eo.get().MD } 125 126 func (eo *etlMDOwnerBase) get() *etlMD { return eo.etlMD.Load() } 127 func (eo *etlMDOwnerBase) put(etlMD *etlMD) { eo.etlMD.Store(etlMD) } 128 129 // write metasync-sent bytes directly (no json) 130 func (*etlMDOwnerBase) persistBytes(payload msPayload, fpath string) (done bool) { 131 if payload == nil { 132 return 133 } 134 etlMDValue := payload[revsEtlMDTag] 135 if etlMDValue == nil { 136 return 137 } 138 var ( 139 etlMD *etl.MD 140 wto = cos.NewBuffer(etlMDValue) 141 err = jsp.SaveMeta(fpath, etlMD, wto) 142 ) 143 done = err == nil 144 return 145 } 146 147 /////////////////// 148 // etlMDOwnerPrx // 149 /////////////////// 150 151 func newEtlMDOwnerPrx(config *cmn.Config) *etlMDOwnerPrx { 152 return &etlMDOwnerPrx{fpath: filepath.Join(config.ConfigDir, fname.Emd)} 153 } 154 155 func (eo *etlMDOwnerPrx) init() { 156 etlMD := newEtlMD() 157 _, err := jsp.LoadMeta(eo.fpath, etlMD) 158 if err != nil { 159 if !os.IsNotExist(err) { 160 nlog.Errorf("failed to load %s from %s, err: %v", etlMD, eo.fpath, err) 161 } else { 162 nlog.Infof("%s does not exist at %s - initializing", etlMD, eo.fpath) 163 } 164 } 165 eo.put(etlMD) 166 } 167 168 func (eo *etlMDOwnerPrx) putPersist(etlMD *etlMD, payload msPayload) (err error) { 169 if !eo.persistBytes(payload, eo.fpath) { 170 err = jsp.SaveMeta(eo.fpath, etlMD, nil) 171 } 172 if err == nil { 173 eo.put(etlMD) 174 } 175 return 176 } 177 178 func (*etlMDOwnerPrx) persist(_ *etlMD, _ msPayload) (err error) { debug.Assert(false); return } 179 180 func (eo *etlMDOwnerPrx) _pre(ctx *etlMDModifier) (clone *etlMD, err error) { 181 eo.Lock() 182 defer eo.Unlock() 183 etlMD := eo.get() 184 clone = etlMD.clone() 185 if err = ctx.pre(ctx, clone); err != nil { 186 return 187 } 188 err = eo.putPersist(clone, nil) 189 return 190 } 191 192 func (eo *etlMDOwnerPrx) modify(ctx *etlMDModifier) (clone *etlMD, err error) { 193 if clone, err = eo._pre(ctx); err != nil { 194 return 195 } 196 if ctx.final != nil { 197 ctx.final(ctx, clone) 198 } 199 return 200 } 201 202 /////////////////// 203 // etlMDOwnerTgt // 204 /////////////////// 205 206 func newEtlMDOwnerTgt() *etlMDOwnerTgt { 207 return &etlMDOwnerTgt{} 208 } 209 210 func (eo *etlMDOwnerTgt) init() { 211 var ( 212 etlMD *etlMD 213 available = fs.GetAvail() 214 ) 215 if etlMD = loadEtlMD(available, fname.Emd); etlMD != nil { 216 nlog.Infoln("loaded", etlMD.String()) 217 } else { 218 etlMD = newEtlMD() 219 nlog.Infoln("initializing new", etlMD.String()) 220 } 221 eo.put(etlMD) 222 } 223 224 func (eo *etlMDOwnerTgt) putPersist(etlMD *etlMD, payload msPayload) (err error) { 225 if err = eo.persist(etlMD, payload); err == nil { 226 eo.put(etlMD) 227 } 228 return 229 } 230 231 func (*etlMDOwnerTgt) persist(clone *etlMD, payload msPayload) (err error) { 232 var b []byte 233 if payload != nil { 234 if etlMDValue := payload[revsEtlMDTag]; etlMDValue != nil { 235 b = etlMDValue 236 } 237 } 238 if b == nil { 239 b = clone.marshal() 240 } 241 cnt, availCnt := fs.PersistOnMpaths(fname.Emd, "" /*backup*/, clone, etlMDCopies, b, nil /*sgl*/) 242 if cnt > 0 { 243 return 244 } 245 if availCnt == 0 { 246 nlog.Errorln("Cannot store", clone.String()+":", cmn.ErrNoMountpaths) // there's a bigger problem 247 return 248 } 249 err = fmt.Errorf("failed to store %s on any of the mountpaths (%d)", clone, availCnt) 250 nlog.Errorln(err) 251 return 252 } 253 254 func (*etlMDOwnerTgt) modify(_ *etlMDModifier) (*etlMD, error) { 255 debug.Assert(false) 256 return nil, nil 257 } 258 259 func loadEtlMD(mpaths fs.MPI, path string) (mainEtlMD *etlMD) { 260 for _, mpath := range mpaths { 261 etlMD := loadEtlMDFromMpath(mpath, path) 262 if etlMD == nil { 263 continue 264 } 265 if mainEtlMD == nil { 266 mainEtlMD = etlMD 267 continue 268 } 269 if !mainEtlMD.cksum.Equal(etlMD.cksum) { 270 cos.ExitLogf("EtlMD is different (%q): %v vs %v", mpath, mainEtlMD, etlMD) 271 } 272 if mainEtlMD.cksum.Equal(etlMD.cksum) { 273 continue 274 } 275 if mainEtlMD.Version == etlMD.Version { 276 cos.ExitLogf("EtlMD is different (%q): %v vs %v", mpath, mainEtlMD, etlMD) 277 } 278 nlog.Errorf("Warning: detected different EtlMD versions (%q): %v != %v", mpath, mainEtlMD, etlMD) 279 if mainEtlMD.Version < etlMD.Version { 280 mainEtlMD = etlMD 281 } 282 } 283 return 284 } 285 286 func loadEtlMDFromMpath(mpath *fs.Mountpath, path string) (etlMD *etlMD) { 287 var ( 288 fpath = filepath.Join(mpath.Path, path) 289 err error 290 ) 291 etlMD = newEtlMD() 292 etlMD.cksum, err = jsp.LoadMeta(fpath, etlMD) 293 if err == nil { 294 return etlMD 295 } 296 if !os.IsNotExist(err) { 297 // Should never be NotExist error as mpi should include only mpaths with relevant etlMDs stored. 298 nlog.Errorf("failed to load %s from %s: %v", etlMD, fpath, err) 299 } 300 return nil 301 } 302 303 func hasEnoughEtlMDCopies() bool { return fs.CountPersisted(fname.Emd) >= etlMDCopies }