github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/backend/ais.go (about) 1 // Package backend contains implementation of various backend providers. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package backend 6 7 import ( 8 "context" 9 "fmt" 10 "io" 11 "net/http" 12 "net/url" 13 "regexp" 14 "sync" 15 "time" 16 17 "github.com/NVIDIA/aistore/api" 18 "github.com/NVIDIA/aistore/api/apc" 19 "github.com/NVIDIA/aistore/cmn" 20 "github.com/NVIDIA/aistore/cmn/cos" 21 "github.com/NVIDIA/aistore/cmn/debug" 22 "github.com/NVIDIA/aistore/cmn/nlog" 23 "github.com/NVIDIA/aistore/core" 24 "github.com/NVIDIA/aistore/core/meta" 25 "github.com/NVIDIA/aistore/fs" 26 ) 27 28 // NOTE: some of the methods here are part of the of the *extended* native AIS API outside 29 // generic `BackendProvider` (see core/backend.go) 30 31 // TODO: 32 // - include `appliedCfgVer` in the GetInfo* response (to synchronize p._remais, etc.) 33 // - periodically refresh remote Smap 34 // - use m.remote[uuid].smap to load balance and retry disconnects 35 36 const ua = "aisnode/backend" 37 38 const remAisDefunct = "defunct" // uuid configured offline 39 40 type ( 41 remAis struct { 42 smap *meta.Smap 43 m *AISbp 44 url string 45 uuid string 46 bp api.BaseParams 47 } 48 AISbp struct { 49 t core.TargetPut 50 remote map[string]*remAis // by UUID 51 alias cos.StrKVs // alias => UUID 52 mu sync.RWMutex 53 appliedCfgVer int64 54 base 55 } 56 ) 57 58 // interface guard 59 var _ core.Backend = (*AISbp)(nil) 60 61 var ( 62 preg, treg *regexp.Regexp 63 ) 64 65 func NewAIS(t core.TargetPut) *AISbp { 66 suff := regexp.QuoteMeta(meta.SnameSuffix) 67 preg = regexp.MustCompile(regexp.QuoteMeta(meta.PnamePrefix) + `\S*` + suff + ": ") 68 treg = regexp.MustCompile(regexp.QuoteMeta(meta.TnamePrefix) + `\S*` + suff + ": ") 69 return &AISbp{ 70 t: t, 71 remote: make(map[string]*remAis), 72 alias: make(cos.StrKVs), 73 base: base{apc.AIS}, 74 } 75 } 76 77 func (r *remAis) String() string { 78 var alias string 79 for a, uuid := range r.m.alias { 80 if uuid == r.smap.UUID { 81 alias = a 82 break 83 } 84 } 85 return fmt.Sprintf("remote cluster (%s, %q, %q, %s)", r.url, alias, r.smap.UUID, r.smap) 86 } 87 88 func unsetUUID(bck *cmn.Bck) { bck.Ns.UUID = "" } 89 90 func extractErrCode(e error, uuid string) (int, error) { 91 if e == nil { 92 return http.StatusOK, nil 93 } 94 herr := cmn.Err2HTTPErr(e) 95 if herr == nil { 96 return http.StatusInternalServerError, e 97 } 98 if herr.Status == http.StatusRequestedRangeNotSatisfiable { 99 return http.StatusRequestedRangeNotSatisfiable, cmn.NewErrRangeNotSatisfiable(herr, nil, 0) 100 } 101 if uuid != "" { 102 msg := herr.Message 103 loc := preg.FindStringIndex(msg) 104 if loc == nil { 105 loc = treg.FindStringIndex(msg) 106 } 107 if loc != nil && loc[1] > loc[0]+2 { 108 herr.Message = msg[loc[0]:loc[1]-2] + "@" + uuid + ": " + msg[loc[1]:] 109 } 110 } 111 return herr.Status, herr 112 } 113 114 // apply new or updated (attach, detach) cmn.BackendConfAIS configuration 115 func (m *AISbp) Apply(v any, action string, cfg *cmn.ClusterConfig) (err error) { 116 conf := cmn.BackendConfAIS{} 117 if err = cos.MorphMarshal(v, &conf); err != nil { 118 err = fmt.Errorf("%s: invalid ais backend config (%+v, %T): %v", m.t, v, v, err) 119 debug.AssertNoErr(err) 120 return 121 } 122 nlog.Infof("%s: apply %q %+v Conf v%d", m.t, action, conf, cfg.Version) 123 m.mu.Lock() 124 err = m._apply(cfg, conf, action) 125 if err == nil { 126 m.appliedCfgVer = cfg.Version 127 } 128 m.mu.Unlock() 129 return 130 } 131 132 func (m *AISbp) _apply(cfg *cmn.ClusterConfig, clusterConf cmn.BackendConfAIS, action string) error { 133 // detach 134 if action == apc.ActDetachRemAis { 135 for alias, uuid := range m.alias { 136 if _, ok := clusterConf[alias]; !ok { 137 if _, ok = clusterConf[uuid]; !ok { 138 delete(m.alias, alias) 139 delete(m.remote, uuid) 140 } 141 } 142 } 143 return nil 144 } 145 146 // validate aliases 147 for alias := range clusterConf { 148 if err := cmn.ValidateRemAlias(alias); err != nil { 149 return err 150 } 151 } 152 153 // init and attach 154 for alias, clusterURLs := range clusterConf { 155 remAis := &remAis{} 156 if offline, err := remAis.init(alias, clusterURLs, cfg); err != nil { // and check connectivity 157 if offline { 158 continue 159 } 160 return err 161 } 162 if err := m.add(remAis, alias); err != nil { 163 return err 164 } 165 } 166 return nil 167 } 168 169 // return (m.remote + m.alias) in-memory info wo/ connecting to remote cluster(s) 170 // (compare with GetInfo() below) 171 // TODO: caller to pass its cached version to optimize-out allocations 172 func (m *AISbp) GetInfoInternal() (res meta.RemAisVec) { 173 m.mu.RLock() 174 res.A = make([]*meta.RemAis, 0, len(m.remote)) 175 for uuid, remAis := range m.remote { 176 out := &meta.RemAis{UUID: uuid, URL: remAis.url} 177 for a, u := range m.alias { 178 if uuid == u { 179 out.Alias = a 180 break 181 } 182 } 183 res.A = append(res.A, out) 184 } 185 res.Ver = m.appliedCfgVer 186 m.mu.RUnlock() 187 return 188 } 189 190 // At the same time a cluster may have registered both types of remote AIS 191 // clusters(HTTP and HTTPS). So, the method must use both kinds of clients and 192 // select the correct one at the moment it sends a request. 193 // See also: GetInfoInternal() 194 // TODO: ditto 195 func (m *AISbp) GetInfo(clusterConf cmn.BackendConfAIS) (res meta.RemAisVec) { 196 var ( 197 cfg = cmn.GCO.Get() 198 cliPlain, cliTLS = remaisClients(&cfg.Client) 199 ) 200 m.mu.RLock() 201 res.A = make([]*meta.RemAis, 0, len(m.remote)) 202 for uuid, remAis := range m.remote { 203 var ( 204 out = &meta.RemAis{UUID: uuid, URL: remAis.url} 205 client = cliPlain 206 ) 207 if cos.IsHTTPS(remAis.url) { 208 client = cliTLS 209 } 210 for a, u := range m.alias { 211 if uuid == u { 212 out.Alias = a 213 break 214 } 215 } 216 217 // online? 218 if smap, err := api.GetClusterMap(api.BaseParams{Client: client, URL: remAis.url, UA: ua}); err == nil { 219 if smap.UUID != uuid { 220 nlog.Errorf("%s: UUID has changed %q", remAis, smap.UUID) 221 continue 222 } 223 if smap.Version < remAis.smap.Version { 224 nlog.Errorf("%s: detected older Smap %s - proceeding to override anyway", remAis, smap) 225 } 226 remAis.smap = smap 227 } 228 out.Smap = remAis.smap 229 res.A = append(res.A, out) 230 } 231 // defunct (cluster config not updated yet locally?) 232 for alias, clusterURLs := range clusterConf { 233 if _, ok := m.alias[alias]; !ok { 234 if _, ok = m.remote[alias]; !ok { 235 out := &meta.RemAis{Alias: alias, UUID: remAisDefunct} 236 out.URL = fmt.Sprintf("%v", clusterURLs) 237 res.A = append(res.A, out) 238 } 239 } 240 } 241 m.mu.RUnlock() 242 return 243 } 244 245 func remaisClients(clientConf *cmn.ClientConf) (client, clientTLS *http.Client) { 246 return cmn.NewDefaultClients(clientConf.Timeout.D()) 247 } 248 249 // A list of remote AIS URLs can contains both HTTP and HTTPS links at the 250 // same time. So, the method must use both kind of clients and select the 251 // correct one at the moment it sends a request. First successful request 252 // saves the good client for the future usage. 253 func (r *remAis) init(alias string, confURLs []string, cfg *cmn.ClusterConfig) (offline bool, err error) { 254 var ( 255 url string 256 remSmap, smap *meta.Smap 257 cliH, cliTLS = remaisClients(&cfg.Client) 258 ) 259 for _, u := range confURLs { 260 client := cliH 261 if cos.IsHTTPS(u) { 262 client = cliTLS 263 } 264 if smap, err = api.GetClusterMap(api.BaseParams{Client: client, URL: u, UA: ua}); err != nil { 265 nlog.Warningf("remote cluster failing to reach %q via %s: %v", alias, u, err) 266 continue 267 } 268 if remSmap == nil { 269 remSmap, url = smap, u 270 continue 271 } 272 if remSmap.UUID != smap.UUID { 273 err = fmt.Errorf("%q(%v) references two different clusters: uuid=%q vs uuid=%q", 274 alias, confURLs, remSmap.UUID, smap.UUID) 275 return 276 } 277 if remSmap.Version < smap.Version { 278 remSmap, url = smap, u 279 } 280 } 281 if remSmap == nil { 282 err = fmt.Errorf("remote cluster failed to reach %q via any/all of the configured URLs %v", alias, confURLs) 283 offline = true 284 return 285 } 286 r.smap, r.url = remSmap, url 287 if cos.IsHTTPS(url) { 288 r.bp = api.BaseParams{Client: cliTLS, URL: url, UA: ua} 289 } else { 290 r.bp = api.BaseParams{Client: cliH, URL: url, UA: ua} 291 } 292 r.uuid = remSmap.UUID 293 return 294 } 295 296 // NOTE: supporting remote attachments both by alias and by UUID interchangeably, 297 // with mappings: 1(uuid) to 1(cluster) and 1(alias) to 1(cluster) 298 func (m *AISbp) add(newAis *remAis, newAlias string) (err error) { 299 if remAis, ok := m.remote[newAlias]; ok { 300 return fmt.Errorf("cannot attach %s: alias %q is already in use as uuid for %s", 301 newAlias, newAlias, remAis) 302 } 303 newAis.m = m 304 tag := "added" 305 if newAlias == newAis.smap.UUID { 306 // not an alias 307 goto ad 308 } 309 // existing 310 if remAis, ok := m.remote[newAis.smap.UUID]; ok { 311 // can re-alias existing remote cluster 312 for alias, uuid := range m.alias { 313 if uuid == newAis.smap.UUID { 314 delete(m.alias, alias) 315 } 316 } 317 m.alias[newAlias] = newAis.smap.UUID // alias 318 if newAis.url != remAis.url { 319 nlog.Warningf("%s: different new URL %s - overriding", remAis, newAis) 320 } 321 if newAis.smap.Version < remAis.smap.Version { 322 nlog.Errorf("%s: detected older Smap %s - proceeding to override anyway", remAis, newAis) 323 } 324 tag = "updated" 325 goto ad 326 } 327 if uuid, ok := m.alias[newAlias]; ok { 328 remAis, ok := m.remote[uuid] 329 if ok { 330 return fmt.Errorf("cannot attach %s: alias %q is already in use for %s", 331 newAis, newAlias, remAis) 332 } 333 delete(m.alias, newAlias) 334 } 335 m.alias[newAlias] = newAis.smap.UUID 336 ad: 337 m.remote[newAis.smap.UUID] = newAis 338 nlog.Infof("%s %s", newAis, tag) 339 return 340 } 341 342 func (m *AISbp) getRemAis(aliasOrUUID string) (remAis *remAis, err error) { 343 m.mu.RLock() 344 remAis, _, err = m.resolve(aliasOrUUID) 345 m.mu.RUnlock() 346 return 347 } 348 349 func (m *AISbp) headRemAis(aliasOrUUID string) (remAis *remAis, alias, uuid string, err error) { 350 m.mu.RLock() 351 remAis, uuid, err = m.resolve(aliasOrUUID) 352 if err != nil { 353 m.mu.RUnlock() 354 return 355 } 356 for a, u := range m.alias { 357 if u == uuid { 358 alias = a 359 break 360 } 361 } 362 m.mu.RUnlock() 363 return 364 } 365 366 // resolve (alias | UUID) => remAis, UUID 367 // is called under lock 368 func (m *AISbp) resolve(uuid string) (*remAis, string, error) { 369 remAis, ok := m.remote[uuid] 370 if ok { 371 return remAis, uuid, nil 372 } 373 alias := uuid 374 if uuid, ok = m.alias[alias]; !ok { 375 return nil, "", cos.NewErrNotFound(m.t, "remote cluster \""+alias+"\"") 376 } 377 remAis, ok = m.remote[uuid] 378 debug.Assertf(ok, "%q vs %q", alias, uuid) 379 return remAis, uuid, nil 380 } 381 382 ///////////////////// 383 // BackendProvider // 384 ///////////////////// 385 386 func (*AISbp) CreateBucket(_ *meta.Bck) (ecode int, err error) { 387 debug.Assert(false) // Bucket creation happens only with reverse proxy to AIS cluster. 388 return 0, nil 389 } 390 391 // TODO: remote AIS clusters provide native frontend API with additional capabilities 392 // that, in particular, include `dontAddRemote` = (true | false). 393 // Here we have to hardcode the value to keep HeadBucket() consistent across all backends. 394 // For similar limitations, see also ListBuckets() below. 395 func (m *AISbp) HeadBucket(_ context.Context, remoteBck *meta.Bck) (bckProps cos.StrKVs, ecode int, err error) { 396 var ( 397 remAis *remAis 398 p *cmn.Bprops 399 alias, uuid string 400 ) 401 if remAis, alias, uuid, err = m.headRemAis(remoteBck.Ns.UUID); err != nil { 402 return 403 } 404 debug.Assert(uuid == remAis.uuid) 405 bck := remoteBck.Clone() 406 unsetUUID(&bck) 407 if p, err = api.HeadBucket(remAis.bp, bck, false /*dontAddRemote*/); err != nil { 408 ecode, err = extractErrCode(err, remAis.uuid) 409 return 410 } 411 412 bckProps = make(cos.StrKVs, 32) 413 err = cmn.IterFields(p, func(uniqueTag string, field cmn.IterField) (e error, b bool) { 414 bckProps[uniqueTag] = fmt.Sprintf("%v", field.Value()) 415 return nil, false 416 }) 417 // an extra 418 bckProps[apc.HdrBackendProvider] = apc.AIS 419 bckProps[apc.HdrRemAisUUID] = remAis.uuid 420 bckProps[apc.HdrRemAisAlias] = alias 421 bckProps[apc.HdrRemAisURL] = remAis.url 422 423 return 424 } 425 426 func (m *AISbp) ListObjects(remoteBck *meta.Bck, msg *apc.LsoMsg, lst *cmn.LsoRes) (ecode int, err error) { 427 var remAis *remAis 428 if remAis, err = m.getRemAis(remoteBck.Ns.UUID); err != nil { 429 return 430 } 431 remoteMsg := msg.Clone() 432 remoteMsg.PageSize = calcPageSize(remoteMsg.PageSize, remoteBck.MaxPageSize()) 433 434 // TODO: 435 // Currently, not encoding xaction (aka request) `UUID` from the remote cluster 436 // in the `ContinuationToken` (note below). 437 remoteMsg.UUID = "" 438 439 // likewise, let remote ais gateway decide 440 remoteMsg.SID = "" 441 442 bck := remoteBck.Clone() 443 unsetUUID(&bck) 444 445 var lstRes *cmn.LsoRes 446 if lstRes, err = api.ListObjectsPage(remAis.bp, bck, remoteMsg, api.ListArgs{}); err != nil { 447 ecode, err = extractErrCode(err, remAis.uuid) 448 return 449 } 450 *lst = *lstRes 451 452 // Restore the original request UUID (UUID of the remote cluster is already inside `ContinuationToken`). 453 lst.UUID = msg.UUID 454 return 455 } 456 457 func (m *AISbp) ListBuckets(qbck cmn.QueryBcks) (bcks cmn.Bcks, ecode int, err error) { 458 if !qbck.Ns.IsAnyRemote() { 459 // caller provided uuid (or alias) 460 bcks, err = m.blist(qbck.Ns.UUID, qbck) 461 ecode, err = extractErrCode(err, qbck.Ns.UUID) 462 return 463 } 464 465 // all attached 466 m.mu.RLock() 467 uuids := make([]string, 0, len(m.remote)) 468 for u := range m.remote { 469 uuids = append(uuids, u) 470 } 471 m.mu.RUnlock() 472 if len(uuids) == 0 { 473 return 474 } 475 for _, uuid := range uuids { 476 remoteBcks, errV := m.blist(uuid, qbck) 477 bcks = append(bcks, remoteBcks...) 478 if errV != nil && err == nil { 479 err = errV 480 } 481 } 482 if len(uuids) == 1 { 483 ecode, err = extractErrCode(err, uuids[0]) 484 } else { 485 ecode, err = extractErrCode(err, "") 486 } 487 return 488 } 489 490 // NOTE: 491 // remote AIS clusters provide native frontend with additional capabilities which 492 // also include apc.Flt* _location_ specifier. Here we simply hardcode the `apc.FltExists` 493 // to keep `ListBuckets` consistent across (aws, gcp, etc.) backends. 494 495 func (m *AISbp) blist(uuid string, qbck cmn.QueryBcks) (bcks cmn.Bcks, err error) { 496 var ( 497 remAis *remAis 498 remoteQuery = cmn.QueryBcks{Provider: apc.AIS, Ns: cmn.Ns{Name: qbck.Ns.Name}} 499 ) 500 if remAis, err = m.getRemAis(uuid); err != nil { 501 return 502 } 503 bcks, err = api.ListBuckets(remAis.bp, remoteQuery, apc.FltExists) 504 if err != nil { 505 _, err = extractErrCode(err, uuid) 506 return nil, err 507 } 508 for i, bck := range bcks { 509 bck.Ns.UUID = uuid // if user-provided `uuid` is in fact an alias - keep it 510 bcks[i] = bck 511 } 512 return bcks, nil 513 } 514 515 // TODO: remote AIS clusters provide native frontend API with additional capabilities 516 // in part including apc.Flt* location specifier. 517 // Here, and elsewhere down below, we hardcode (the default) `apc.FltPresent` to, eesentially, 518 // keep HeadObj() consistent across backends. 519 func (m *AISbp) HeadObj(_ context.Context, lom *core.LOM, _ *http.Request) (oa *cmn.ObjAttrs, ecode int, err error) { 520 var ( 521 remAis *remAis 522 op *cmn.ObjectProps 523 remoteBck = lom.Bck().Clone() 524 ) 525 if remAis, err = m.getRemAis(remoteBck.Ns.UUID); err != nil { 526 return 527 } 528 unsetUUID(&remoteBck) 529 if op, err = api.HeadObject(remAis.bp, remoteBck, lom.ObjName, apc.FltPresent, true /*silent*/); err != nil { 530 ecode, err = extractErrCode(err, remAis.uuid) 531 return 532 } 533 oa = &cmn.ObjAttrs{} 534 *oa = op.ObjAttrs 535 oa.SetCustomKey(cmn.SourceObjMD, apc.AIS) 536 return 537 } 538 539 func (m *AISbp) GetObj(_ context.Context, lom *core.LOM, owt cmn.OWT, _ *http.Request) (ecode int, err error) { 540 var ( 541 remAis *remAis 542 r io.ReadCloser 543 size int64 544 remoteBck = lom.Bck().Clone() 545 ) 546 if remAis, err = m.getRemAis(remoteBck.Ns.UUID); err != nil { 547 return 548 } 549 unsetUUID(&remoteBck) 550 if r, size, err = api.GetObjectReader(remAis.bp, remoteBck, lom.ObjName, nil /*api.GetArgs*/); err != nil { 551 return extractErrCode(err, remAis.uuid) 552 } 553 params := core.AllocPutParams() 554 { 555 params.WorkTag = fs.WorkfileColdget 556 params.Reader = r 557 params.OWT = owt 558 params.Size = size 559 params.Atime = time.Now() 560 } 561 err = m.t.PutObject(lom, params) 562 core.FreePutParams(params) 563 return extractErrCode(err, remAis.uuid) 564 } 565 566 func (m *AISbp) GetObjReader(_ context.Context, lom *core.LOM, offset, length int64) (res core.GetReaderResult) { 567 var ( 568 remAis *remAis 569 op *cmn.ObjectProps 570 args *api.GetArgs 571 remoteBck = lom.Bck().Clone() 572 ) 573 if remAis, res.Err = m.getRemAis(remoteBck.Ns.UUID); res.Err != nil { 574 return 575 } 576 unsetUUID(&remoteBck) 577 578 // reader 579 if length > 0 { 580 rng := cmn.MakeRangeHdr(offset, length) 581 args = &api.GetArgs{ 582 Header: http.Header{cos.HdrRange: []string{rng}}, 583 Query: url.Values{apc.QparamSilent: []string{"true"}}, 584 } 585 } else { 586 if op, res.Err = api.HeadObject(remAis.bp, remoteBck, lom.ObjName, apc.FltPresent, true /*silent*/); res.Err != nil { 587 res.ErrCode, res.Err = extractErrCode(res.Err, remAis.uuid) 588 return 589 } 590 oa := lom.ObjAttrs() 591 *oa = op.ObjAttrs 592 res.Size = oa.Size 593 oa.SetCustomKey(cmn.SourceObjMD, apc.AIS) 594 res.ExpCksum = oa.Cksum 595 lom.SetCksum(nil) 596 } 597 res.R, res.Size, res.Err = api.GetObjectReader(remAis.bp, remoteBck, lom.ObjName, args) 598 res.ErrCode, res.Err = extractErrCode(res.Err, remAis.uuid) 599 return 600 } 601 602 func (m *AISbp) PutObj(r io.ReadCloser, lom *core.LOM, _ *http.Request) (ecode int, err error) { 603 var ( 604 oah api.ObjAttrs 605 remAis *remAis 606 remoteBck = lom.Bck().Clone() 607 ) 608 if remAis, err = m.getRemAis(remoteBck.Ns.UUID); err != nil { 609 cos.Close(r) 610 return 611 } 612 unsetUUID(&remoteBck) 613 size := lom.SizeBytes(true) // _special_ as it's still a workfile at this point 614 args := api.PutArgs{ 615 BaseParams: remAis.bp, 616 Bck: remoteBck, 617 ObjName: lom.ObjName, 618 Cksum: lom.Checksum(), 619 Reader: r.(cos.ReadOpenCloser), 620 Size: uint64(size), 621 } 622 if oah, err = api.PutObject(&args); err != nil { 623 ecode, err = extractErrCode(err, remAis.uuid) 624 return 625 } 626 // compare w/ lom.CopyAttrs 627 oa := lom.ObjAttrs() 628 *oa = oah.Attrs() 629 630 // NOTE: restore back into the lom as PUT response header does not contain "Content-Length" (cos.HdrContentLength) 631 oa.Size = size 632 633 oa.SetCustomKey(cmn.SourceObjMD, apc.AIS) 634 return 635 } 636 637 func (m *AISbp) DeleteObj(lom *core.LOM) (ecode int, err error) { 638 var ( 639 remAis *remAis 640 remoteBck = lom.Bck().Clone() 641 ) 642 if remAis, err = m.getRemAis(remoteBck.Ns.UUID); err != nil { 643 return 644 } 645 unsetUUID(&remoteBck) 646 err = api.DeleteObject(remAis.bp, remoteBck, lom.ObjName) 647 return extractErrCode(err, remAis.uuid) 648 }