github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/ic.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "net/http" 12 "net/url" 13 "strconv" 14 "time" 15 16 "github.com/NVIDIA/aistore/api/apc" 17 "github.com/NVIDIA/aistore/cmn" 18 "github.com/NVIDIA/aistore/cmn/cos" 19 "github.com/NVIDIA/aistore/cmn/debug" 20 "github.com/NVIDIA/aistore/cmn/nlog" 21 "github.com/NVIDIA/aistore/core/meta" 22 "github.com/NVIDIA/aistore/nl" 23 "github.com/NVIDIA/aistore/xact" 24 jsoniter "github.com/json-iterator/go" 25 ) 26 27 // Information Center (IC) is a group of proxies that take care of ownership of 28 // (Job, Task, eXtended action) entities. IC manages their lifecycle and monitors 29 // status. When a job, task or xaction is created, it gets registered with all IC 30 // members. Henceforth, IC acts as information source as far as status (running, 31 // aborted, finished), progress, and statistics. 32 // 33 // Non-IC AIS proxies, on the other hand, redirect all corresponding requests to 34 // one (anyone) of the IC (proxy) members. 35 36 const ( 37 // Implies equal ownership by all IC members and applies to all async ops 38 // that have no associated cache other than start/end timestamps and stats counters 39 // (case in point: list/query-objects that MAY be cached, etc.) 40 equalIC = "\x00" 41 ) 42 43 type ( 44 regIC struct { 45 nl nl.Listener 46 smap *smapX 47 query url.Values 48 msg any 49 } 50 51 xactRegMsg struct { 52 UUID string `json:"uuid"` 53 Kind string `json:"kind"` 54 Srcs []string `json:"srcs"` // list of daemonIDs 55 } 56 57 icBundle struct { 58 Smap *smapX `json:"smap"` 59 OwnershipTbl jsoniter.RawMessage `json:"ownership_table"` 60 } 61 62 ic struct { 63 p *proxy 64 } 65 ) 66 67 func (ic *ic) init(p *proxy) { 68 ic.p = p 69 } 70 71 func (ic *ic) reverseToOwner(w http.ResponseWriter, r *http.Request, uuid string, msg any) (reversedOrFailed bool) { 72 retry := true 73 begin: 74 var ( 75 smap = ic.p.owner.smap.get() 76 selfIC = smap.IsIC(ic.p.si) 77 owner, exists = ic.p.notifs.getOwner(uuid) 78 psi *meta.Snode 79 ) 80 if exists { 81 goto outer 82 } 83 if selfIC { 84 if !exists && !retry { 85 err := fmt.Errorf("x-[%s] not found (%s)", uuid, smap.StrIC(ic.p.si)) 86 ic.p.writeErr(w, r, err, http.StatusNotFound, Silent) 87 return true 88 } 89 if retry { 90 withRetry(cmn.Rom.CplaneOperation(), func() bool { 91 owner, exists = ic.p.notifs.getOwner(uuid) 92 return exists 93 }) 94 if !exists { 95 retry = false 96 _ = ic.syncICBundle() // TODO handle error 97 goto begin 98 } 99 } 100 } else { 101 hrwOwner, err := smap.HrwIC(uuid) 102 if err != nil { 103 ic.p.writeErr(w, r, err, http.StatusInternalServerError) 104 return true 105 } 106 owner = hrwOwner.ID() 107 } 108 outer: 109 switch owner { 110 case "": // not owned 111 return 112 case equalIC: 113 if selfIC { 114 owner = ic.p.SID() 115 } else { 116 for pid, si := range smap.Pmap { 117 if !smap.IsIC(psi) { 118 continue 119 } 120 owner = pid 121 psi = si 122 break outer 123 } 124 } 125 default: // cached + owned 126 psi = smap.GetProxy(owner) 127 if psi == nil || !smap.IsIC(psi) { 128 var err error 129 if psi, err = smap.HrwIC(uuid); err != nil { 130 ic.p.writeErr(w, r, err, http.StatusInternalServerError) 131 return true 132 } 133 } 134 debug.Assertf(smap.IsIC(psi), "%s, %s", psi, smap.StrIC(ic.p.si)) 135 } 136 if owner == ic.p.SID() { 137 return 138 } 139 // otherwise, hand it over 140 if msg != nil { 141 body := cos.MustMarshal(msg) 142 r.ContentLength = int64(len(body)) 143 r.Body = io.NopCloser(bytes.NewReader(body)) 144 } 145 ic.p.reverseNodeRequest(w, r, psi) 146 return true 147 } 148 149 func (ic *ic) redirectToIC(w http.ResponseWriter, r *http.Request) bool { 150 smap := ic.p.owner.smap.get() 151 if smap.IsIC(ic.p.si) { 152 return false 153 } 154 155 var node *meta.Snode 156 for _, psi := range smap.Pmap { 157 if smap.IsIC(psi) { 158 node = psi 159 break 160 } 161 } 162 redirectURL := ic.p.redirectURL(r, node, time.Now(), cmn.NetIntraControl) 163 http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect) 164 return true 165 } 166 167 func (ic *ic) xstatusAll(w http.ResponseWriter, r *http.Request, query url.Values) { 168 msg := &xact.QueryMsg{} 169 if err := cmn.ReadJSON(w, r, msg); err != nil { 170 return 171 } 172 flt := nlFilter{ID: msg.ID, Kind: msg.Kind, Bck: (*meta.Bck)(&msg.Bck), OnlyRunning: msg.OnlyRunning} 173 if !msg.Bck.IsEmpty() { 174 flt.Bck = (*meta.Bck)(&msg.Bck) 175 } 176 177 var ( 178 vec nl.StatusVec 179 nls = ic.p.notifs.findAll(flt) 180 ) 181 if cos.IsParseBool(query.Get(apc.QparamForce)) { 182 // (force just-in-time) 183 // for each args-selected xaction: 184 // check if any of the targets delayed updating the corresponding status, 185 // and query those targets directly 186 var ( 187 config = cmn.GCO.Get() 188 interval = config.Periodic.NotifTime.D() 189 ) 190 for _, nl := range nls { 191 ic.p.notifs.bcastGetStats(nl, interval) 192 status := nl.Status() 193 if err := nl.Err(); err != nil { 194 status.ErrMsg = err.Error() 195 } 196 vec = append(vec, *status) 197 } 198 } else { 199 for _, nl := range nls { 200 vec = append(vec, *nl.Status()) 201 } 202 } 203 b := cos.MustMarshal(vec) 204 w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(b))) 205 w.Write(b) 206 } 207 208 func (ic *ic) xstatusOne(w http.ResponseWriter, r *http.Request) { 209 var ( 210 nl nl.Listener 211 bck *meta.Bck 212 msg = &xact.QueryMsg{} 213 ) 214 if err := cmn.ReadJSON(w, r, msg); err != nil { 215 return 216 } 217 msg.Kind, _ = xact.GetKindName(msg.Kind) // display name => kind 218 if msg.ID == "" && msg.Kind == "" { 219 ic.p.writeErrStatusf(w, r, http.StatusBadRequest, "invalid %s", msg) 220 return 221 } 222 223 // for queries of the type {Kind: apc.ActRebalance} 224 if msg.ID == "" && ic.redirectToIC(w, r) { 225 return 226 } 227 if msg.ID != "" && ic.reverseToOwner(w, r, msg.ID, msg) { 228 return 229 } 230 231 if msg.Bck.Name != "" { 232 bck = meta.CloneBck(&msg.Bck) 233 if err := bck.Init(ic.p.owner.bmd); err != nil { 234 ic.p.writeErr(w, r, err, http.StatusNotFound, Silent) 235 return 236 } 237 } 238 flt := nlFilter{ID: msg.ID, Kind: msg.Kind, Bck: bck, OnlyRunning: msg.OnlyRunning} 239 withRetry(cmn.Rom.CplaneOperation(), func() bool { 240 nl = ic.p.notifs.find(flt) 241 return nl != nil 242 }) 243 if nl == nil { 244 smap := ic.p.owner.smap.get() 245 err := fmt.Errorf("nl not found: %s, %s", smap.StrIC(ic.p.si), msg) 246 ic.p.writeErr(w, r, err, http.StatusNotFound, Silent) 247 return 248 } 249 250 if msg.Kind != "" && nl.Kind() != msg.Kind { 251 ic.p.writeErrf(w, r, "kind mismatch: %s, expected kind=%s", msg, nl.Kind()) 252 return 253 } 254 255 // refresh NotifStatus 256 var ( 257 config = cmn.GCO.Get() 258 interval = config.Periodic.NotifTime.D() 259 ) 260 ic.p.notifs.bcastGetStats(nl, interval) 261 262 status := nl.Status() 263 if err := nl.Err(); err != nil { 264 status.ErrMsg = err.Error() 265 } 266 b := cos.MustMarshal(status) // TODO: include stats, e.g., progress when ready 267 w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(b))) 268 w.Write(b) 269 } 270 271 // verb /v1/ic 272 func (ic *ic) handler(w http.ResponseWriter, r *http.Request) { 273 switch r.Method { 274 case http.MethodGet: 275 ic.handleGet(w, r) 276 case http.MethodPost: 277 ic.handlePost(w, r) 278 default: 279 debug.Assert(false) 280 } 281 } 282 283 // GET /v1/ic 284 func (ic *ic) handleGet(w http.ResponseWriter, r *http.Request) { 285 var ( 286 smap = ic.p.owner.smap.get() 287 what = r.URL.Query().Get(apc.QparamWhat) 288 ) 289 if !smap.IsIC(ic.p.si) { 290 ic.p.writeErrf(w, r, "%s: not an IC member", ic.p.si) 291 return 292 } 293 294 switch what { 295 case apc.WhatICBundle: 296 bundle := icBundle{Smap: smap, OwnershipTbl: cos.MustMarshal(&ic.p.notifs)} 297 ic.p.writeJSON(w, r, bundle, what) 298 default: 299 ic.p.writeErrf(w, r, fmtUnknownQue, what) 300 } 301 } 302 303 // POST /v1/ic 304 func (ic *ic) handlePost(w http.ResponseWriter, r *http.Request) { 305 var ( 306 smap = ic.p.owner.smap.get() 307 msg = &aisMsg{} 308 ) 309 if err := cmn.ReadJSON(w, r, msg); err != nil { 310 return 311 } 312 if !smap.IsIC(ic.p.si) { 313 if !withRetry(cmn.Rom.CplaneOperation(), func() bool { 314 smap = ic.p.owner.smap.get() 315 return smap.IsIC(ic.p.si) 316 }) { 317 ic.p.writeErrf(w, r, "%s: not an IC member", ic.p.si) 318 return 319 } 320 } 321 322 switch msg.Action { 323 case apc.ActMergeOwnershipTbl: 324 if err := cos.MorphMarshal(msg.Value, &ic.p.notifs); err != nil { 325 ic.p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, ic.p.si, msg.Action, msg.Value, err) 326 return 327 } 328 case apc.ActListenToNotif: 329 nlMsg := ¬ifListenMsg{} 330 if err := cos.MorphMarshal(msg.Value, nlMsg); err != nil { 331 ic.p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, ic.p.si, msg.Action, msg.Value, err) 332 return 333 } 334 if err := ic.p.notifs.add(nlMsg.nl); err != nil { 335 ic.p.writeErr(w, r, err) 336 return 337 } 338 case apc.ActRegGlobalXaction: 339 var ( 340 regMsg = &xactRegMsg{} 341 tmap meta.NodeMap 342 callerSver = r.Header.Get(apc.HdrCallerSmapVer) 343 err error 344 ) 345 if err = cos.MorphMarshal(msg.Value, regMsg); err != nil { 346 ic.p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, ic.p.si, msg.Action, msg.Value, err) 347 return 348 } 349 debug.Assert(len(regMsg.Srcs) != 0) 350 withRetry(cmn.Rom.CplaneOperation(), func() bool { 351 smap = ic.p.owner.smap.get() 352 tmap, err = smap.NewTmap(regMsg.Srcs) 353 return err == nil && callerSver == smap.vstr 354 }) 355 if err != nil { 356 ic.p.writeErrStatusf(w, r, http.StatusNotFound, "%s: failed to %q: %v", ic.p, msg.Action, err) 357 return 358 } 359 nl := xact.NewXactNL(regMsg.UUID, regMsg.Kind, &smap.Smap, tmap) 360 if err = ic.p.notifs.add(nl); err != nil { 361 ic.p.writeErr(w, r, err) 362 return 363 } 364 default: 365 ic.p.writeErrAct(w, r, msg.Action) 366 } 367 } 368 369 func (ic *ic) registerEqual(a regIC) { 370 if a.query != nil { 371 a.query.Set(apc.QparamNotifyMe, equalIC) 372 } 373 if a.smap.IsIC(ic.p.si) { 374 err := ic.p.notifs.add(a.nl) 375 debug.AssertNoErr(err) 376 } 377 if a.smap.ICCount() > 1 { 378 ic.bcastListenIC(a.nl) 379 } 380 } 381 382 func (ic *ic) bcastListenIC(nl nl.Listener) { 383 var ( 384 actMsg = apc.ActMsg{Action: apc.ActListenToNotif, Value: newNLMsg(nl)} 385 msg = ic.p.newAmsg(&actMsg, nil) 386 ) 387 ic.p.bcastAsyncIC(msg) 388 } 389 390 func (ic *ic) sendOwnershipTbl(si *meta.Snode, smap *smapX) error { 391 if ic.p.notifs.size() == 0 { 392 if cmn.Rom.FastV(4, cos.SmoduleAIS) { 393 nlog.Infof("%s: notifs empty, not sending to %s", ic.p, si) 394 } 395 return nil 396 } 397 msg := ic.p.newAmsgActVal(apc.ActMergeOwnershipTbl, &ic.p.notifs) 398 cargs := allocCargs() 399 { 400 cargs.si = si 401 cargs.req = cmn.HreqArgs{Method: http.MethodPost, Path: apc.URLPathIC.S, Body: cos.MustMarshal(msg)} 402 cargs.timeout = cmn.Rom.CplaneOperation() 403 } 404 res := ic.p.call(cargs, smap) 405 freeCargs(cargs) 406 return res.err 407 } 408 409 // sync ownership table; TODO: review control flows and revisit impl. 410 func (ic *ic) syncICBundle() error { 411 smap := ic.p.owner.smap.get() 412 si := ic.p.si 413 for _, psi := range smap.Pmap { 414 if smap.IsIC(psi) && psi.ID() != si.ID() { 415 si = psi 416 break 417 } 418 } 419 420 if si.Eq(ic.p.si) { 421 return nil 422 } 423 cargs := allocCargs() 424 { 425 cargs.si = si 426 cargs.req = cmn.HreqArgs{ 427 Method: http.MethodGet, 428 Path: apc.URLPathIC.S, 429 Query: url.Values{apc.QparamWhat: []string{apc.WhatICBundle}}, 430 } 431 cargs.timeout = cmn.Rom.CplaneOperation() 432 cargs.cresv = cresIC{} // -> icBundle 433 } 434 res := ic.p.call(cargs, smap) 435 freeCargs(cargs) 436 if res.err != nil { 437 return res.err 438 } 439 440 bundle := res.v.(*icBundle) 441 debug.Assertf(smap.UUID == bundle.Smap.UUID, "%s vs %s", smap.StringEx(), bundle.Smap.StringEx()) 442 443 if err := ic.p.owner.smap.synchronize(ic.p.si, bundle.Smap, nil /*ms payload*/, ic.p.htrun.smapUpdatedCB); err != nil { 444 if !isErrDowngrade(err) { 445 nlog.Errorln(cmn.NewErrFailedTo(ic.p, "sync", bundle.Smap, err)) 446 } 447 } else { 448 smap = ic.p.owner.smap.get() 449 nlog.Infof("%s: synch %s", ic.p, smap) 450 } 451 452 if !smap.IsIC(ic.p.si) { 453 return nil 454 } 455 if err := jsoniter.Unmarshal(bundle.OwnershipTbl, &ic.p.notifs); err != nil { 456 return fmt.Errorf(cmn.FmtErrUnmarshal, ic.p, "ownership table", cos.BHead(bundle.OwnershipTbl), err) 457 } 458 return nil 459 }