github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/proxy.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "errors" 9 "fmt" 10 "io" 11 "net" 12 "net/http" 13 "net/url" 14 "os" 15 "path" 16 "path/filepath" 17 "strconv" 18 "strings" 19 "sync" 20 "syscall" 21 "time" 22 23 "github.com/NVIDIA/aistore/ais/s3" 24 "github.com/NVIDIA/aistore/api/apc" 25 "github.com/NVIDIA/aistore/cmn" 26 "github.com/NVIDIA/aistore/cmn/archive" 27 "github.com/NVIDIA/aistore/cmn/atomic" 28 "github.com/NVIDIA/aistore/cmn/cifl" 29 "github.com/NVIDIA/aistore/cmn/cos" 30 "github.com/NVIDIA/aistore/cmn/debug" 31 "github.com/NVIDIA/aistore/cmn/feat" 32 "github.com/NVIDIA/aistore/cmn/fname" 33 "github.com/NVIDIA/aistore/cmn/k8s" 34 "github.com/NVIDIA/aistore/cmn/mono" 35 "github.com/NVIDIA/aistore/cmn/nlog" 36 "github.com/NVIDIA/aistore/core" 37 "github.com/NVIDIA/aistore/core/meta" 38 "github.com/NVIDIA/aistore/ext/dsort" 39 "github.com/NVIDIA/aistore/memsys" 40 "github.com/NVIDIA/aistore/nl" 41 "github.com/NVIDIA/aistore/stats" 42 "github.com/NVIDIA/aistore/xact" 43 "github.com/NVIDIA/aistore/xact/xreg" 44 jsoniter "github.com/json-iterator/go" 45 ) 46 47 const ( 48 lsotag = "list-objects" 49 ) 50 51 type ( 52 ClusterMountpathsRaw struct { 53 Targets cos.JSONRawMsgs `json:"targets"` 54 } 55 56 // proxy runner 57 proxy struct { 58 htrun 59 authn *authManager 60 metasyncer *metasyncer 61 ic ic 62 qm lsobjMem 63 rproxy reverseProxy 64 notifs notifs 65 lstca lstca 66 reg struct { 67 pool nodeRegPool 68 mu sync.RWMutex 69 } 70 remais struct { 71 meta.RemAisVec 72 old []*meta.RemAis // to facilitate a2u resolution (and, therefore, offline access) 73 mu sync.RWMutex 74 in atomic.Bool 75 } 76 settingNewPrimary atomic.Bool // primary executing "set new primary" request (state) 77 readyToFastKalive atomic.Bool // primary can accept fast keepalives 78 } 79 ) 80 81 // interface guard 82 var _ cos.Runner = (*proxy)(nil) 83 84 func (*proxy) Name() string { return apc.Proxy } // as cos.Runner 85 86 func (p *proxy) initClusterCIDR() { 87 if nodeCIDR := os.Getenv("AIS_CLUSTER_CIDR"); nodeCIDR != "" { 88 _, network, err := net.ParseCIDR(nodeCIDR) 89 p.si.LocalNet = network 90 cos.AssertNoErr(err) 91 nlog.Infof("local network: %+v", *network) 92 } 93 } 94 95 func (p *proxy) init(config *cmn.Config) { 96 p.initSnode(config) 97 98 // (a) get node ID from command-line or env var (see envDaemonID()) 99 // (b) load existing ID from config file stored under local config `confdir` (compare w/ target) 100 // (c) generate a new one (genDaemonID()) 101 // - in that sequence 102 p.si.Init(initPID(config), apc.Proxy) 103 104 memsys.Init(p.SID(), p.SID(), config) 105 106 cos.InitShortID(p.si.Digest()) 107 108 p.initClusterCIDR() 109 daemon.rg.add(p) 110 111 ps := &stats.Prunner{} 112 startedUp := ps.Init(p) 113 daemon.rg.add(ps) 114 p.statsT = ps 115 116 k := newPalive(p, ps, startedUp) 117 daemon.rg.add(k) 118 p.keepalive = k 119 120 m := newMetasyncer(p) 121 daemon.rg.add(m) 122 p.metasyncer = m 123 } 124 125 func initPID(config *cmn.Config) (pid string) { 126 // 1. ID from env 127 if pid = envDaemonID(apc.Proxy); pid != "" { 128 if err := cos.ValidateDaemonID(pid); err != nil { 129 nlog.Errorf("Warning: %v", err) 130 } 131 return 132 } 133 134 // 2. proxy, K8s 135 if k8s.IsK8s() { 136 // NOTE: always generate i.e., compute 137 if net.ParseIP(k8s.NodeName) != nil { // does not parse as IP 138 nlog.Warningf("using K8s node name %q, an IP addr, to compute _persistent_ proxy ID", k8s.NodeName) 139 } 140 return cos.HashK8sProxyID(k8s.NodeName) 141 } 142 143 // 3. try to read ID 144 if pid = readProxyID(config); pid != "" { 145 nlog.Infof("p[%s] from %q", pid, fname.ProxyID) 146 return 147 } 148 149 // 4. initial deployment 150 pid = genDaemonID(apc.Proxy, config) 151 err := cos.ValidateDaemonID(pid) 152 debug.AssertNoErr(err) 153 154 // store ID on disk 155 err = os.WriteFile(filepath.Join(config.ConfigDir, fname.ProxyID), []byte(pid), cos.PermRWR) 156 debug.AssertNoErr(err) 157 nlog.Infof("p[%s] ID randomly generated", pid) 158 return 159 } 160 161 func readProxyID(config *cmn.Config) (id string) { 162 if b, err := os.ReadFile(filepath.Join(config.ConfigDir, fname.ProxyID)); err == nil { 163 id = string(b) 164 } else if !os.IsNotExist(err) { 165 nlog.Errorln(err) 166 } 167 return 168 } 169 170 func (p *proxy) pready(smap *smapX, withRR bool /* also check readiness to rebalance */) error { 171 const msg = "%s primary: not ready yet " 172 debug.Assert(smap == nil || smap.IsPrimary(p.si)) 173 174 if !p.ClusterStarted() { 175 return fmt.Errorf(msg+"(cluster is starting up)", p) 176 } 177 if withRR && p.owner.rmd.starting.Load() { 178 return fmt.Errorf(msg+"(finalizing global rebalancing state)", p) 179 } 180 return nil 181 } 182 183 // start proxy runner 184 func (p *proxy) Run() error { 185 config := cmn.GCO.Get() 186 p.htrun.init(config) 187 p.owner.bmd = newBMDOwnerPrx(config) 188 p.owner.etl = newEtlMDOwnerPrx(config) 189 190 p.owner.bmd.init() // initialize owner and load BMD 191 p.owner.etl.init() // initialize owner and load EtlMD 192 193 core.Pinit() 194 195 p.statsT.RegMetrics(p.si) // reg target metrics to common; init Prometheus if used 196 197 // startup sequence - see earlystart.go for the steps and commentary 198 p.bootstrap() 199 200 p.authn = newAuthManager() 201 202 p.rproxy.init() 203 204 p.notifs.init(p) 205 p.ic.init(p) 206 p.qm.init() 207 208 // 209 // REST API: register proxy handlers and start listening 210 // 211 networkHandlers := []networkHandler{ 212 {r: apc.Reverse, h: p.reverseHandler, net: accessNetPublic}, 213 214 // pubnet handlers: cluster must be started 215 {r: apc.Buckets, h: p.bucketHandler, net: accessNetPublic}, 216 {r: apc.Objects, h: p.objectHandler, net: accessNetPublic}, 217 {r: apc.Download, h: p.downloadHandler, net: accessNetPublic}, 218 {r: apc.ETL, h: p.etlHandler, net: accessNetPublic}, 219 {r: apc.Sort, h: p.dsortHandler, net: accessNetPublic}, 220 221 {r: apc.IC, h: p.ic.handler, net: accessNetIntraControl}, 222 {r: apc.Daemon, h: p.daemonHandler, net: accessNetPublicControl}, 223 {r: apc.Cluster, h: p.clusterHandler, net: accessNetPublicControl}, 224 {r: apc.Tokens, h: p.tokenHandler, net: accessNetPublic}, 225 226 {r: apc.Metasync, h: p.metasyncHandler, net: accessNetIntraControl}, 227 {r: apc.Health, h: p.healthHandler, net: accessNetPublicControl}, 228 {r: apc.Vote, h: p.voteHandler, net: accessNetIntraControl}, 229 230 {r: apc.Notifs, h: p.notifs.handler, net: accessNetIntraControl}, 231 232 // S3 compatibility 233 {r: "/" + apc.S3, h: p.s3Handler, net: accessNetPublic}, 234 235 // "easy URL" 236 {r: "/" + apc.GSScheme, h: p.easyURLHandler, net: accessNetPublic}, 237 {r: "/" + apc.AZScheme, h: p.easyURLHandler, net: accessNetPublic}, 238 {r: "/" + apc.AISScheme, h: p.easyURLHandler, net: accessNetPublic}, 239 240 // ht:// _or_ S3 compatibility, depending on feature flag 241 {r: "/", h: p.rootHandler, net: accessNetPublic}, 242 } 243 p.regNetHandlers(networkHandlers) 244 245 nlog.Infoln(cmn.NetPublic+":", "\t\t", p.si.PubNet.URL) 246 if p.si.PubNet.URL != p.si.ControlNet.URL { 247 nlog.Infoln(cmn.NetIntraControl+":", "\t", p.si.ControlNet.URL) 248 } 249 if p.si.PubNet.URL != p.si.DataNet.URL { 250 nlog.Infoln(cmn.NetIntraData+":", "\t", p.si.DataNet.URL) 251 } 252 253 dsort.Pinit(p, config) 254 255 return p.htrun.run(config) 256 } 257 258 func (p *proxy) joinCluster(action string, primaryURLs ...string) (status int, err error) { 259 var query url.Values 260 if smap := p.owner.smap.get(); smap.isPrimary(p.si) { 261 return 0, fmt.Errorf("%s should not be joining: is primary, %s", p, smap.StringEx()) 262 } 263 if cmn.GCO.Get().Proxy.NonElectable { 264 query = url.Values{apc.QparamNonElectable: []string{"true"}} 265 } 266 res := p.join(query, nil /*htext*/, primaryURLs...) 267 defer freeCR(res) 268 if res.err != nil { 269 status, err = res.status, res.err 270 return 271 } 272 // not being sent at cluster startup and keepalive 273 if len(res.bytes) == 0 { 274 return 275 } 276 err = p.recvCluMetaBytes(action, res.bytes, "") 277 return 278 } 279 280 // apart from minor, albeit subtle, differences between `t.joinCluster` vs `p.joinCluster` 281 // this method is otherwise identical to t.gojoin (TODO: unify) 282 func (p *proxy) gojoin(config *cmn.Config) { 283 var ( 284 smap = p.owner.smap.get() 285 pub, ctrl string 286 ) 287 if smap.Primary != nil && smap.Version > 0 { 288 pub = smap.Primary.URL(cmn.NetPublic) 289 ctrl = smap.Primary.URL(cmn.NetIntraControl) 290 } 291 cii := p.pollClusterStarted(config, smap.Primary) 292 if nlog.Stopping() { 293 return 294 } 295 296 if cii != nil { 297 // (primary changed) 298 pub, ctrl = cii.Smap.Primary.PubURL, cii.Smap.Primary.CtrlURL 299 if status, err := p.joinCluster(apc.ActSelfJoinProxy, ctrl, pub); err != nil { 300 nlog.Errorf(fmtFailedRejoin, p, err, status) 301 return 302 } 303 } 304 305 // normally, immediately return with "is ready"; 306 // otherwise, handle: (not present in cluster map | net-info changed) 307 i, sleep, total := 2, config.Timeout.MaxKeepalive.D(), config.Timeout.MaxHostBusy.D() 308 for total >= 0 { 309 smap = p.owner.smap.get() 310 si := smap.GetNode(p.SID()) 311 if si == nil { 312 nlog.Errorf(fmtSelfNotPresent, p, smap.StringEx()) 313 } else { 314 nerr := si.NetEq(p.si) 315 if nerr == nil { 316 p.markClusterStarted() 317 nlog.Infoln(p.String(), "is ready") 318 return // ok --- 319 } 320 nlog.Warningln(p.String(), "- trying to rejoin and, simultaneously, have the primary to update net-info:") 321 nlog.Warningln("\t", nerr, smap.StringEx()) 322 } 323 324 if nlog.Stopping() { 325 return 326 } 327 time.Sleep(sleep) 328 i++ 329 total -= sleep 330 smap = p.owner.smap.get() 331 if ctrl == "" && smap.Primary != nil && smap.Version > 0 { 332 pub = smap.Primary.URL(cmn.NetPublic) 333 ctrl = smap.Primary.URL(cmn.NetIntraControl) 334 } 335 nlog.Warningln(p.String(), "- attempt number", i, "to join") 336 if status, err := p.joinCluster(apc.ActSelfJoinProxy, ctrl, pub); err != nil { 337 nlog.Errorf(fmtFailedRejoin, p, err, status) 338 return 339 } 340 } 341 342 p.markClusterStarted() 343 nlog.Infoln(p.String(), "is ready(?)") 344 } 345 346 func (p *proxy) recvCluMetaBytes(action string, body []byte, caller string) error { 347 var cm cluMeta 348 if err := jsoniter.Unmarshal(body, &cm); err != nil { 349 return fmt.Errorf(cmn.FmtErrUnmarshal, p, "reg-meta", cos.BHead(body), err) 350 } 351 return p.recvCluMeta(&cm, action, caller) 352 } 353 354 // TODO: unify w/ t.recvCluMetaBytes 355 func (p *proxy) recvCluMeta(cm *cluMeta, action, caller string) error { 356 var ( 357 msg = p.newAmsgStr(action, cm.BMD) 358 self = p.String() + ":" 359 errs []error 360 ) 361 if cm.PrimeTime != 0 { 362 xreg.PrimeTime.Store(cm.PrimeTime) 363 xreg.MyTime.Store(time.Now().UnixNano()) 364 } 365 // Config 366 if cm.Config == nil { 367 err := fmt.Errorf(self+" invalid %T (nil config): %+v", cm, cm) 368 nlog.Errorln(err) 369 return err 370 } 371 if err := p.receiveConfig(cm.Config, msg, nil, caller); err != nil { 372 if !isErrDowngrade(err) { 373 errs = append(errs, err) 374 nlog.Errorln(err) 375 } 376 } else { 377 nlog.Infoln(self, tagCM, action, cm.Config.String()) 378 } 379 // Smap 380 if err := p.receiveSmap(cm.Smap, msg, nil /*ms payload*/, caller, p.smapOnUpdate); err != nil { 381 if !isErrDowngrade(err) { 382 errs = append(errs, err) 383 nlog.Errorln(err) 384 } 385 } else if cm.Smap != nil { 386 nlog.Infoln(self, tagCM, action, cm.Smap.String()) 387 } 388 // BMD 389 if err := p.receiveBMD(cm.BMD, msg, nil, caller); err != nil { 390 if !isErrDowngrade(err) { 391 errs = append(errs, err) 392 nlog.Errorln(err) 393 } 394 } else { 395 nlog.Infoln(self, tagCM, action, cm.BMD.String()) 396 } 397 // RMD 398 if err := p.receiveRMD(cm.RMD, msg, caller); err != nil { 399 if !isErrDowngrade(err) { 400 errs = append(errs, err) 401 nlog.Errorln(err) 402 } 403 } else { 404 nlog.Infoln(self, tagCM, action, cm.RMD.String()) 405 } 406 // EtlMD 407 if err := p.receiveEtlMD(cm.EtlMD, msg, nil, caller, nil); err != nil { 408 if !isErrDowngrade(err) { 409 errs = append(errs, err) 410 nlog.Errorln(err) 411 } 412 } else if cm.EtlMD != nil { 413 nlog.Infoln(self, tagCM, action, cm.EtlMD.String()) 414 } 415 416 switch { 417 case errs == nil: 418 return nil 419 case len(errs) == 1: 420 return errs[0] 421 default: 422 s := fmt.Sprintf("%v", errs) 423 return cmn.NewErrFailedTo(p, action, tagCM, errors.New(s)) 424 } 425 } 426 427 // parse request + init/lookup bucket (combo) 428 func (p *proxy) _parseReqTry(w http.ResponseWriter, r *http.Request, bckArgs *bctx) (bck *meta.Bck, 429 objName string, err error) { 430 apireq := apiReqAlloc(2, apc.URLPathObjects.L, false /*dpq*/) 431 if err = p.parseReq(w, r, apireq); err != nil { 432 apiReqFree(apireq) 433 return 434 } 435 bckArgs.bck, bckArgs.query = apireq.bck, apireq.query 436 bck, err = bckArgs.initAndTry() 437 objName = apireq.items[1] 438 439 apiReqFree(apireq) 440 freeBctx(bckArgs) // caller does alloc 441 return 442 } 443 444 // verb /v1/buckets/ 445 func (p *proxy) bucketHandler(w http.ResponseWriter, r *http.Request) { 446 if !p.cluStartedWithRetry() { 447 w.WriteHeader(http.StatusServiceUnavailable) 448 return 449 } 450 switch r.Method { 451 case http.MethodGet: 452 dpq := dpqAlloc() 453 p.httpbckget(w, r, dpq) 454 dpqFree(dpq) 455 case http.MethodDelete: 456 apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false /*dpq*/) 457 p.httpbckdelete(w, r, apireq) 458 apiReqFree(apireq) 459 case http.MethodPut: 460 p.httpbckput(w, r) 461 case http.MethodPost: 462 p.httpbckpost(w, r) 463 case http.MethodHead: 464 apireq := apiReqAlloc(1, apc.URLPathBuckets.L, true /*dpq*/) 465 p.httpbckhead(w, r, apireq) 466 apiReqFree(apireq) 467 case http.MethodPatch: 468 apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false /*dpq*/) 469 p.httpbckpatch(w, r, apireq) 470 apiReqFree(apireq) 471 default: 472 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead, 473 http.MethodPatch, http.MethodPost) 474 } 475 } 476 477 // verb /v1/objects/ 478 func (p *proxy) objectHandler(w http.ResponseWriter, r *http.Request) { 479 switch r.Method { 480 case http.MethodGet: 481 p.httpobjget(w, r) 482 case http.MethodPut: 483 apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/) 484 p.httpobjput(w, r, apireq) 485 apiReqFree(apireq) 486 case http.MethodDelete: 487 p.httpobjdelete(w, r) 488 case http.MethodPost: 489 apireq := apiReqAlloc(1, apc.URLPathObjects.L, false /*dpq*/) 490 p.httpobjpost(w, r, apireq) 491 apiReqFree(apireq) 492 case http.MethodHead: 493 p.httpobjhead(w, r) 494 case http.MethodPatch: 495 p.httpobjpatch(w, r) 496 default: 497 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead, 498 http.MethodPost, http.MethodPut) 499 } 500 } 501 502 // "Easy URL" (feature) is a simple alternative mapping of the AIS API to handle 503 // URLs paths that look as follows: 504 // 505 // /gs/mybucket/myobject - to access Google Cloud buckets 506 // /az/mybucket/myobject - Azure Blob Storage 507 // /ais/mybucket/myobject - AIS 508 // 509 // In other words, easy URL is a convenience feature that allows reading, writing, 510 // deleting, and listing objects as follows: 511 // 512 // # Example: GET 513 // $ curl -L -X GET 'http://aistore/gs/my-google-bucket/abc-train-0001.tar' 514 // # Example: PUT 515 // $ curl -L -X PUT 'http://aistore/gs/my-google-bucket/abc-train-9999.tar -T /tmp/9999.tar' 516 // # Example: LIST 517 // $ curl -L -X GET 'http://aistore/gs/my-google-bucket' 518 // 519 // NOTE: 520 // 521 // Amazon S3 is missing in the list that includes GCP and Azure. The reason 522 // for this is that AIS provides S3 compatibility layer via its "/s3" endpoint. 523 // S3 compatibility (see https://github.com/NVIDIA/aistore/blob/main/docs/s3compat.md) 524 // shall not be confused with a simple alternative URL Path mapping via easyURLHandler, 525 // whereby a path (e.g.) "gs/mybucket/myobject" gets replaced with 526 // "v1/objects/mybucket/myobject?provider=gcp" with _no_ other changes to the request 527 // and response parameters and components. 528 func (p *proxy) easyURLHandler(w http.ResponseWriter, r *http.Request) { 529 apiItems, err := p.parseURL(w, r, nil, 1, true) 530 if err != nil { 531 return 532 } 533 provider := apiItems[0] 534 if provider, err = cmn.NormalizeProvider(provider); err != nil { 535 p.writeErr(w, r, err) 536 return 537 } 538 // num items: 1 539 if len(apiItems) == 1 { 540 // list buckets for a given provider 541 // NOTE two differences between this implementation and `p.bckNamesFromBMD` (s3 API): 542 // - `/s3` is an API endpoint rather than a namesake provider 543 // (the API must "cover" all providers) 544 // - `/s3` and its subordinate URL paths can only "see" buckets that are already present 545 // in the BMD, while native API, when given sufficient permissions, can immediately 546 // access (read, write, list) any remote buckets, while adding them to the BMD "on the fly". 547 r.URL.Path = apc.URLPathBuckets.S 548 if r.URL.RawQuery == "" { 549 qbck := cmn.QueryBcks{Provider: provider} 550 query := qbck.NewQuery() 551 r.URL.RawQuery = query.Encode() 552 } else if !strings.Contains(r.URL.RawQuery, apc.QparamProvider) { 553 r.URL.RawQuery += "&" + apc.QparamProvider + "=" + provider 554 } 555 p.bucketHandler(w, r) 556 return 557 } 558 // num items: 2 559 bucket := apiItems[1] 560 bck := cmn.Bck{Name: bucket, Provider: provider} 561 if err := bck.ValidateName(); err != nil { 562 p.writeErr(w, r, err) 563 return 564 } 565 566 var objName string 567 if len(apiItems) > 2 { 568 // num items: 3 569 objName = apiItems[2] 570 r.URL.Path = apc.URLPathObjects.Join(bucket, objName) 571 r.URL.Path += path.Join(apiItems[3:]...) 572 } else { 573 if r.Method == http.MethodPut { 574 p.writeErrMsg(w, r, "missing destination object name in the \"easy URL\"") 575 return 576 } 577 r.URL.Path = apc.URLPathBuckets.Join(bucket) 578 } 579 580 if r.URL.RawQuery == "" { 581 query := bck.NewQuery() 582 r.URL.RawQuery = query.Encode() 583 } else if !strings.Contains(r.URL.RawQuery, apc.QparamProvider) { 584 r.URL.RawQuery += "&" + apc.QparamProvider + "=" + bck.Provider 585 } 586 // and finally 587 if objName != "" { 588 p.objectHandler(w, r) 589 } else { 590 p.bucketHandler(w, r) 591 } 592 } 593 594 // GET /v1/buckets[/bucket-name] 595 func (p *proxy) httpbckget(w http.ResponseWriter, r *http.Request, dpq *dpq) { 596 var ( 597 msg *apc.ActMsg 598 bckName string 599 qbck *cmn.QueryBcks 600 ) 601 apiItems, err := p.parseURL(w, r, apc.URLPathBuckets.L, 0, true) 602 if err != nil { 603 return 604 } 605 if len(apiItems) > 0 { 606 bckName = apiItems[0] 607 } 608 ctype := r.Header.Get(cos.HdrContentType) 609 if r.ContentLength == 0 && !strings.HasPrefix(ctype, cos.ContentJSON) { 610 // e.g. "easy URL" request: curl -L -X GET 'http://aistore/ais/abc' 611 msg = &apc.ActMsg{Action: apc.ActList, Value: &apc.LsoMsg{}} 612 } else if msg, err = p.readActionMsg(w, r); err != nil { 613 return 614 } 615 if err := dpq.parse(r.URL.RawQuery); err != nil { 616 p.writeErr(w, r, err) 617 return 618 } 619 if qbck, err = newQbckFromQ(bckName, nil, dpq); err != nil { 620 p.writeErr(w, r, err) 621 return 622 } 623 624 // switch (I) through (IV) -------------------------- 625 626 // (I) summarize buckets 627 if msg.Action == apc.ActSummaryBck { 628 var summMsg apc.BsummCtrlMsg 629 if err := cos.MorphMarshal(msg.Value, &summMsg); err != nil { 630 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 631 return 632 } 633 if qbck.IsBucket() { 634 bck := (*meta.Bck)(qbck) 635 bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: apc.AceBckHEAD, bck: bck, dpq: dpq} 636 bckArgs.createAIS = false 637 bckArgs.dontHeadRemote = summMsg.BckPresent 638 if _, err := bckArgs.initAndTry(); err != nil { 639 return 640 } 641 } 642 p.bsummact(w, r, qbck, &summMsg) 643 return 644 } 645 646 // (II) invalid action 647 if msg.Action != apc.ActList { 648 p.writeErrAct(w, r, msg.Action) 649 return 650 } 651 652 // (III) list buckets 653 if msg.Value == nil { 654 if qbck.Name != "" && qbck.Name != msg.Name { 655 p.writeErrf(w, r, "bad list-buckets request: %q vs %q (%+v, %+v)", qbck.Name, msg.Name, qbck, msg) 656 return 657 } 658 qbck.Name = msg.Name 659 if qbck.IsRemoteAIS() { 660 qbck.Ns.UUID = p.a2u(qbck.Ns.UUID) 661 } 662 if err := p.checkAccess(w, r, nil, apc.AceListBuckets); err == nil { 663 p.listBuckets(w, r, qbck, msg, dpq) 664 } 665 return 666 } 667 668 // (IV) list objects (NOTE -- TODO: currently, always forwarding) 669 if !qbck.IsBucket() { 670 p.writeErrf(w, r, "bad list-objects request: %q is not a bucket (is a bucket query?)", qbck) 671 return 672 } 673 if p.forwardCP(w, r, msg, lsotag+" "+qbck.String()) { 674 return 675 } 676 677 // lsmsg 678 var ( 679 lsmsg apc.LsoMsg 680 bck = meta.CloneBck((*cmn.Bck)(qbck)) 681 ) 682 if err = cos.MorphMarshal(msg.Value, &lsmsg); err != nil { 683 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 684 return 685 } 686 if lsmsg.Prefix != "" && strings.Contains(lsmsg.Prefix, "../") { 687 p.writeErrf(w, r, "bad list-objects request: invalid prefix %q", lsmsg.Prefix) 688 return 689 } 690 bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: apc.AceObjLIST, bck: bck, dpq: dpq} 691 bckArgs.createAIS = false 692 693 if lsmsg.IsFlagSet(apc.LsBckPresent) { 694 bckArgs.dontHeadRemote = true 695 bckArgs.dontAddRemote = true 696 } else { 697 bckArgs.tryHeadRemote = lsmsg.IsFlagSet(apc.LsDontHeadRemote) 698 bckArgs.dontAddRemote = lsmsg.IsFlagSet(apc.LsDontAddRemote) 699 } 700 701 // do 702 if bck, err = bckArgs.initAndTry(); err == nil { 703 p.listObjects(w, r, bck, msg /*amsg*/, &lsmsg) 704 } 705 } 706 707 // GET /v1/objects/bucket-name/object-name 708 func (p *proxy) httpobjget(w http.ResponseWriter, r *http.Request, origURLBck ...string) { 709 // 1. request 710 apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/) 711 if err := p.parseReq(w, r, apireq); err != nil { 712 apiReqFree(apireq) 713 return 714 } 715 716 // 2. bucket 717 bckArgs := allocBctx() 718 { 719 bckArgs.p = p 720 bckArgs.w = w 721 bckArgs.r = r 722 bckArgs.bck = apireq.bck 723 bckArgs.dpq = apireq.dpq 724 bckArgs.perms = apc.AceGET 725 bckArgs.createAIS = false 726 } 727 if len(origURLBck) > 0 { 728 bckArgs.origURLBck = origURLBck[0] 729 } 730 bck, err := bckArgs.initAndTry() 731 freeBctx(bckArgs) 732 733 objName := apireq.items[1] 734 apiReqFree(apireq) 735 if err != nil { 736 return 737 } 738 739 // 3. redirect 740 smap := p.owner.smap.get() 741 tsi, netPub, err := smap.HrwMultiHome(bck.MakeUname(objName)) 742 if err != nil { 743 p.writeErr(w, r, err) 744 return 745 } 746 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 747 nlog.Infoln("GET " + bck.Cname(objName) + " => " + tsi.String()) 748 } 749 redirectURL := p.redirectURL(r, tsi, time.Now() /*started*/, cmn.NetIntraData, netPub) 750 http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) 751 752 // 4. stats 753 p.statsT.Inc(stats.GetCount) 754 } 755 756 // PUT /v1/objects/bucket-name/object-name 757 func (p *proxy) httpobjput(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 758 var ( 759 nodeID string 760 perms apc.AccessAttrs 761 ) 762 // 1. request 763 if err := p.parseReq(w, r, apireq); err != nil { 764 return 765 } 766 appendTyProvided := apireq.dpq.apnd.ty != "" // apc.QparamAppendType 767 if !appendTyProvided { 768 perms = apc.AcePUT 769 } else { 770 perms = apc.AceAPPEND 771 if apireq.dpq.apnd.hdl != "" { 772 items, err := preParse(apireq.dpq.apnd.hdl) // apc.QparamAppendHandle 773 if err != nil { 774 p.writeErr(w, r, err) 775 return 776 } 777 nodeID = items[0] // nodeID; compare w/ apndOI.parse 778 } 779 } 780 781 // 2. bucket 782 bckArgs := allocBctx() 783 { 784 bckArgs.p = p 785 bckArgs.w = w 786 bckArgs.r = r 787 bckArgs.perms = perms 788 bckArgs.createAIS = false 789 } 790 bckArgs.bck, bckArgs.dpq = apireq.bck, apireq.dpq 791 bck, err := bckArgs.initAndTry() 792 freeBctx(bckArgs) 793 if err != nil { 794 return 795 } 796 797 // 3. redirect 798 var ( 799 tsi *meta.Snode 800 smap = p.owner.smap.get() 801 started = time.Now() 802 objName = apireq.items[1] 803 netPub = cmn.NetPublic 804 ) 805 if nodeID == "" { 806 tsi, netPub, err = smap.HrwMultiHome(bck.MakeUname(objName)) 807 if err != nil { 808 p.writeErr(w, r, err) 809 return 810 } 811 } else { 812 if tsi = smap.GetTarget(nodeID); tsi == nil { 813 err = &errNodeNotFound{"PUT failure:", nodeID, p.si, smap} 814 p.writeErr(w, r, err) 815 return 816 } 817 } 818 819 // verbose 820 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 821 verb, s := "PUT", "" 822 if appendTyProvided { 823 verb = "APPEND" 824 } 825 if bck.Props.Mirror.Enabled { 826 s = " (put-mirror)" 827 } 828 nlog.Infof("%s %s => %s%s", verb, bck.Cname(objName), tsi.StringEx(), s) 829 } 830 831 redirectURL := p.redirectURL(r, tsi, started, cmn.NetIntraData, netPub) 832 http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect) 833 834 // 4. stats 835 if !appendTyProvided { 836 p.statsT.Inc(stats.PutCount) 837 } else { 838 p.statsT.Inc(stats.AppendCount) 839 } 840 } 841 842 // DELETE /v1/objects/bucket-name/object-name 843 func (p *proxy) httpobjdelete(w http.ResponseWriter, r *http.Request) { 844 bckArgs := allocBctx() 845 { 846 bckArgs.p = p 847 bckArgs.w = w 848 bckArgs.r = r 849 bckArgs.perms = apc.AceObjDELETE 850 bckArgs.createAIS = false 851 } 852 bck, objName, err := p._parseReqTry(w, r, bckArgs) 853 if err != nil { 854 return 855 } 856 smap := p.owner.smap.get() 857 tsi, err := smap.HrwName2T(bck.MakeUname(objName)) 858 if err != nil { 859 p.writeErr(w, r, err) 860 return 861 } 862 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 863 nlog.Infoln("DELETE " + bck.Cname(objName) + " => " + tsi.StringEx()) 864 } 865 redirectURL := p.redirectURL(r, tsi, time.Now() /*started*/, cmn.NetIntraControl) 866 http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect) 867 868 p.statsT.Inc(stats.DeleteCount) 869 } 870 871 // DELETE { action } /v1/buckets 872 func (p *proxy) httpbckdelete(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 873 // 1. request 874 if err := p.parseReq(w, r, apireq); err != nil { 875 return 876 } 877 msg, err := p.readActionMsg(w, r) 878 if err != nil { 879 return 880 } 881 perms := apc.AceDestroyBucket 882 if msg.Action == apc.ActDeleteObjects || msg.Action == apc.ActEvictObjects { 883 perms = apc.AceObjDELETE 884 } 885 886 // 2. bucket 887 bck := apireq.bck 888 bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: perms, bck: bck, dpq: apireq.dpq, query: apireq.query} 889 bckArgs.createAIS = false 890 if msg.Action == apc.ActEvictRemoteBck { 891 var ecode int 892 bckArgs.dontHeadRemote = true // unconditionally 893 ecode, err = bckArgs.init() 894 if err != nil { 895 if ecode != http.StatusNotFound && !cmn.IsErrRemoteBckNotFound(err) { 896 p.writeErr(w, r, err, ecode) 897 } 898 return 899 } 900 } else if bck, err = bckArgs.initAndTry(); err != nil { 901 return 902 } 903 904 // 3. action 905 switch msg.Action { 906 case apc.ActEvictRemoteBck: 907 if err := cmn.ValidateRemoteBck(apc.ActEvictRemoteBck, bck.Bucket()); err != nil { 908 p.writeErr(w, r, err) 909 return 910 } 911 keepMD := cos.IsParseBool(apireq.query.Get(apc.QparamKeepRemote)) 912 if keepMD { 913 if err := p.destroyBucketData(msg, bck); err != nil { 914 p.writeErr(w, r, err) 915 } 916 return 917 } 918 if p.forwardCP(w, r, msg, bck.Name) { 919 return 920 } 921 if err := p.destroyBucket(msg, bck); err != nil { 922 p.writeErr(w, r, err) 923 } 924 case apc.ActDestroyBck: 925 if p.forwardCP(w, r, msg, bck.Name) { 926 return 927 } 928 if bck.IsRemoteAIS() { 929 if err := p.destroyBucket(msg, bck); err != nil { 930 if !cmn.IsErrBckNotFound(err) { 931 p.writeErr(w, r, err) 932 return 933 } 934 } 935 // having removed bucket from BMD ask remote to do the same 936 p.reverseRemAis(w, r, msg, bck.Bucket(), apireq.query) 937 return 938 } 939 if err := p.destroyBucket(msg, bck); err != nil { 940 if cmn.IsErrBckNotFound(err) { 941 nlog.Infof("%s: %s already %q-ed, nothing to do", p, bck, msg.Action) 942 } else { 943 p.writeErr(w, r, err) 944 } 945 } 946 case apc.ActDeleteObjects, apc.ActEvictObjects: 947 if msg.Action == apc.ActEvictObjects { 948 if err := cmn.ValidateRemoteBck(apc.ActEvictRemoteBck, bck.Bucket()); err != nil { 949 p.writeErr(w, r, err) 950 return 951 } 952 } 953 xid, err := p.listrange(r.Method, bck.Name, msg, apireq.query) 954 if err != nil { 955 p.writeErr(w, r, err) 956 return 957 } 958 w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid))) 959 w.Write([]byte(xid)) 960 default: 961 p.writeErrAct(w, r, msg.Action) 962 } 963 } 964 965 // PUT /v1/metasync 966 // (compare with p.recvCluMeta and t.metasyncHandlerPut) 967 func (p *proxy) metasyncHandler(w http.ResponseWriter, r *http.Request) { 968 var ( 969 err = &errMsync{} 970 cii = &err.Cii 971 ) 972 if r.Method != http.MethodPut { 973 cmn.WriteErr405(w, r, http.MethodPut) 974 return 975 } 976 smap := p.owner.smap.get() 977 978 if smap.isPrimary(p.si) { 979 const txt = "cannot be on the receiving side of metasync" 980 xctn := voteInProgress() 981 maps := smap.StringEx() 982 p.ciiFill(cii) 983 switch { 984 case !p.ClusterStarted(): 985 err.Message = fmt.Sprintf("%s(self) %s, %s", p, "is starting up as primary, "+txt, maps) 986 case xctn != nil: 987 err.Message = fmt.Sprintf("%s(self) %s, %s", p, "is still primary while voting is in progress, "+txt, maps) 988 default: 989 err.Message = fmt.Sprintf("%s(self) %s, %s", p, "is primary, "+txt, maps) 990 } 991 nlog.Errorln(err.Message) 992 // marshal along with cii 993 p.writeErr(w, r, errors.New(cos.MustMarshalToString(err)), http.StatusConflict, Silent) 994 return 995 } 996 997 payload := make(msPayload) 998 if errP := payload.unmarshal(r.Body, "metasync put"); errP != nil { 999 cmn.WriteErr(w, r, errP) 1000 return 1001 } 1002 // 1. extract 1003 var ( 1004 caller = r.Header.Get(apc.HdrCallerName) 1005 newConf, msgConf, errConf = p.extractConfig(payload, caller) 1006 newSmap, msgSmap, errSmap = p.extractSmap(payload, caller, false /*skip validation*/) 1007 newBMD, msgBMD, errBMD = p.extractBMD(payload, caller) 1008 newRMD, msgRMD, errRMD = p.extractRMD(payload, caller) 1009 newEtlMD, msgEtlMD, errEtlMD = p.extractEtlMD(payload, caller) 1010 revokedTokens, errTokens = p.extractRevokedTokenList(payload, caller) 1011 ) 1012 // 2. apply 1013 if errConf == nil && newConf != nil { 1014 errConf = p.receiveConfig(newConf, msgConf, payload, caller) 1015 } 1016 if errSmap == nil && newSmap != nil { 1017 errSmap = p.receiveSmap(newSmap, msgSmap, payload, caller, p.smapOnUpdate) 1018 } 1019 if errBMD == nil && newBMD != nil { 1020 errBMD = p.receiveBMD(newBMD, msgBMD, payload, caller) 1021 } 1022 if errRMD == nil && newRMD != nil { 1023 errRMD = p.receiveRMD(newRMD, msgRMD, caller) 1024 } 1025 if errEtlMD == nil && newEtlMD != nil { 1026 errEtlMD = p.receiveEtlMD(newEtlMD, msgEtlMD, payload, caller, nil) 1027 } 1028 if errTokens == nil && revokedTokens != nil { 1029 _ = p.authn.updateRevokedList(revokedTokens) 1030 } 1031 // 3. respond 1032 if errConf == nil && errSmap == nil && errBMD == nil && errRMD == nil && errTokens == nil && errEtlMD == nil { 1033 return 1034 } 1035 p.ciiFill(cii) 1036 retErr := err.message(errConf, errSmap, errBMD, errRMD, errEtlMD, errTokens) 1037 p.writeErr(w, r, retErr, http.StatusConflict) 1038 } 1039 1040 func (p *proxy) syncNewICOwners(smap, newSmap *smapX) { 1041 if !smap.IsIC(p.si) || !newSmap.IsIC(p.si) { 1042 return 1043 } 1044 // async - not waiting 1045 for _, psi := range newSmap.Pmap { 1046 if p.SID() != psi.ID() && newSmap.IsIC(psi) && !smap.IsIC(psi) { 1047 go func(psi *meta.Snode) { 1048 if err := p.ic.sendOwnershipTbl(psi, newSmap); err != nil { 1049 nlog.Errorln(p.String()+": failed to send ownership table to", psi.String()+":", err) 1050 } 1051 }(psi) 1052 } 1053 } 1054 } 1055 1056 // GET /v1/health 1057 func (p *proxy) healthHandler(w http.ResponseWriter, r *http.Request) { 1058 if !p.NodeStarted() { 1059 w.WriteHeader(http.StatusServiceUnavailable) 1060 return 1061 } 1062 1063 p.uptime2hdr(w.Header()) 1064 1065 var ( 1066 prr, getCii, askPrimary bool 1067 ) 1068 if r.URL.RawQuery != "" { 1069 query := r.URL.Query() 1070 prr = cos.IsParseBool(query.Get(apc.QparamPrimaryReadyReb)) 1071 getCii = cos.IsParseBool(query.Get(apc.QparamClusterInfo)) 1072 askPrimary = cos.IsParseBool(query.Get(apc.QparamAskPrimary)) 1073 } 1074 1075 if !prr { 1076 if responded := p.externalWD(w, r); responded { 1077 return 1078 } 1079 } 1080 // piggy-backing cluster info on health 1081 if getCii { 1082 debug.Assert(!prr) 1083 cii := &cifl.Info{} 1084 p.ciiFill(cii) 1085 p.writeJSON(w, r, cii, "cluster-info") 1086 return 1087 } 1088 smap := p.owner.smap.get() 1089 if err := smap.validate(); err != nil { 1090 if !p.ClusterStarted() { 1091 w.WriteHeader(http.StatusServiceUnavailable) 1092 } else { 1093 p.writeErr(w, r, err, http.StatusServiceUnavailable) 1094 } 1095 return 1096 } 1097 1098 callerID := r.Header.Get(apc.HdrCallerID) 1099 if smap.GetProxy(callerID) != nil { 1100 p.keepalive.heardFrom(callerID) 1101 } 1102 1103 // primary 1104 if smap.isPrimary(p.si) { 1105 if prr { 1106 if err := p.pready(smap, true); err != nil { 1107 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 1108 p.writeErr(w, r, err, http.StatusServiceUnavailable) 1109 } else { 1110 p.writeErr(w, r, err, http.StatusServiceUnavailable, Silent) 1111 } 1112 return 1113 } 1114 } 1115 w.WriteHeader(http.StatusOK) 1116 return 1117 } 1118 // non-primary 1119 if !p.ClusterStarted() { 1120 // keep returning 503 until cluster starts up 1121 w.WriteHeader(http.StatusServiceUnavailable) 1122 return 1123 } 1124 if prr || askPrimary { 1125 caller := r.Header.Get(apc.HdrCallerName) 1126 p.writeErrf(w, r, "%s (non-primary): misdirected health-of-primary request from %s, %s", 1127 p, caller, smap.StringEx()) 1128 return 1129 } 1130 w.WriteHeader(http.StatusOK) 1131 } 1132 1133 // PUT { action } /v1/buckets/bucket-name 1134 func (p *proxy) httpbckput(w http.ResponseWriter, r *http.Request) { 1135 var ( 1136 msg *apc.ActMsg 1137 query = r.URL.Query() 1138 apiItems, err = p.parseURL(w, r, apc.URLPathBuckets.L, 1, true) 1139 ) 1140 if err != nil { 1141 return 1142 } 1143 bucket := apiItems[0] 1144 bck, err := newBckFromQ(bucket, query, nil) 1145 if err != nil { 1146 p.writeErr(w, r, err) 1147 return 1148 } 1149 if msg, err = p.readActionMsg(w, r); err != nil { 1150 return 1151 } 1152 bckArgs := bctx{p: p, w: w, r: r, bck: bck, msg: msg, query: query} 1153 bckArgs.createAIS = false 1154 if bck, err = bckArgs.initAndTry(); err != nil { 1155 return 1156 } 1157 switch msg.Action { 1158 case apc.ActArchive: 1159 var ( 1160 bckFrom = bck 1161 archMsg = &cmn.ArchiveBckMsg{} 1162 ) 1163 if err := cos.MorphMarshal(msg.Value, archMsg); err != nil { 1164 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 1165 return 1166 } 1167 bckTo := meta.CloneBck(&archMsg.ToBck) 1168 if bckTo.IsEmpty() { 1169 bckTo = bckFrom 1170 } else { 1171 bckToArgs := bctx{p: p, w: w, r: r, bck: bckTo, msg: msg, perms: apc.AcePUT, query: query} 1172 bckToArgs.createAIS = false 1173 if bckTo, err = bckToArgs.initAndTry(); err != nil { 1174 return 1175 } 1176 } 1177 // 1178 // NOTE: strict enforcement of the standard & supported file extensions 1179 // 1180 if _, err := archive.Strict(archMsg.Mime, archMsg.ArchName); err != nil { 1181 p.writeErr(w, r, err) 1182 return 1183 } 1184 xid, err := p.createArchMultiObj(bckFrom, bckTo, msg) 1185 if err == nil { 1186 w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid))) 1187 w.Write([]byte(xid)) 1188 } else { 1189 p.writeErr(w, r, err) 1190 } 1191 default: 1192 p.writeErrAct(w, r, msg.Action) 1193 } 1194 } 1195 1196 // POST { action } /v1/buckets[/bucket-name] 1197 func (p *proxy) httpbckpost(w http.ResponseWriter, r *http.Request) { 1198 var msg *apc.ActMsg 1199 apiItems, err := p.parseURL(w, r, apc.URLPathBuckets.L, 1, true) 1200 if err != nil { 1201 return 1202 } 1203 if msg, err = p.readActionMsg(w, r); err != nil { 1204 return 1205 } 1206 bucket := apiItems[0] 1207 if len(apiItems) > 1 { 1208 err := cmn.InitErrHTTP(r, fmt.Errorf("invalid request URI %q", r.URL.Path), 0) 1209 p.writeErr(w, r, err) 1210 return 1211 } 1212 p._bckpost(w, r, msg, bucket) 1213 } 1214 1215 func (p *proxy) _bckpost(w http.ResponseWriter, r *http.Request, msg *apc.ActMsg, bucket string) { 1216 const ( 1217 warnDstNotExist = "%s: destination %s doesn't exist and will be created with the %s (source bucket) props" 1218 errPrependSync = "prepend option (%q) is incompatible with the request to synchronize buckets" 1219 ) 1220 var ( 1221 query = r.URL.Query() 1222 bck, err = newBckFromQ(bucket, query, nil) 1223 ) 1224 if err != nil { 1225 p.writeErr(w, r, err) 1226 return 1227 } 1228 1229 if msg.Action == apc.ActCreateBck { 1230 if bck.IsRemoteAIS() { 1231 // create bucket (remais) 1232 p.reverseRemAis(w, r, msg, bck.Bucket(), query) 1233 return 1234 } 1235 // create bucket (this cluster) 1236 p._bcr(w, r, query, msg, bck) 1237 return 1238 } 1239 1240 // only the primary can do metasync 1241 dtor := xact.Table[msg.Action] 1242 if dtor.Metasync { 1243 if p.forwardCP(w, r, msg, bucket) { 1244 return 1245 } 1246 } 1247 1248 bckArgs := bctx{p: p, w: w, r: r, bck: bck, perms: apc.AceObjLIST | apc.AceGET, msg: msg, query: query} 1249 bckArgs.createAIS = false 1250 if bck, err = bckArgs.initAndTry(); err != nil { 1251 return 1252 } 1253 1254 // 1255 // POST {action} on bucket 1256 // 1257 var xid string 1258 switch msg.Action { 1259 case apc.ActMoveBck: 1260 bckFrom := bck 1261 bckTo, err := newBckFromQuname(query, true /*required*/) 1262 if err != nil { 1263 p.writeErr(w, r, err) 1264 return 1265 } 1266 if !bckFrom.IsAIS() && bckFrom.Backend() == nil { 1267 p.writeErrf(w, r, "can only rename AIS ('ais://') bucket (%q is not)", bckFrom) 1268 return 1269 } 1270 if bckTo.IsRemote() { 1271 p.writeErrf(w, r, "can only rename to AIS ('ais://') bucket (%q is remote)", bckTo) 1272 return 1273 } 1274 if bckFrom.Equal(bckTo, false, false) { 1275 p.writeErrf(w, r, "cannot rename bucket %q to itself (%q)", bckFrom, bckTo) 1276 return 1277 } 1278 bckFrom.Provider, bckTo.Provider = apc.AIS, apc.AIS 1279 if _, present := p.owner.bmd.get().Get(bckTo); present { 1280 err := cmn.NewErrBckAlreadyExists(bckTo.Bucket()) 1281 p.writeErr(w, r, err) 1282 return 1283 } 1284 if err := p.checkAccess(w, r, nil, apc.AceMoveBucket); err != nil { 1285 return 1286 } 1287 nlog.Infof("%s bucket %s => %s", msg.Action, bckFrom, bckTo) 1288 if xid, err = p.renameBucket(bckFrom, bckTo, msg); err != nil { 1289 p.writeErr(w, r, err) 1290 return 1291 } 1292 case apc.ActCopyBck, apc.ActETLBck: 1293 var ( 1294 bckFrom = bck 1295 bckTo *meta.Bck 1296 tcbmsg = &apc.TCBMsg{} 1297 ecode int 1298 fltPresence = apc.FltPresent 1299 ) 1300 switch msg.Action { 1301 case apc.ActETLBck: 1302 if err := cos.MorphMarshal(msg.Value, tcbmsg); err != nil { 1303 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 1304 return 1305 } 1306 if err := tcbmsg.Validate(true); err != nil { 1307 p.writeErr(w, r, err) 1308 return 1309 } 1310 case apc.ActCopyBck: 1311 if err = cos.MorphMarshal(msg.Value, &tcbmsg.CopyBckMsg); err != nil { 1312 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 1313 return 1314 } 1315 } 1316 if tcbmsg.Sync && tcbmsg.Prepend != "" { 1317 p.writeErrf(w, r, errPrependSync, tcbmsg.Prepend) 1318 return 1319 } 1320 bckTo, err = newBckFromQuname(query, true /*required*/) 1321 if err != nil { 1322 p.writeErr(w, r, err) 1323 return 1324 } 1325 if bckFrom.Equal(bckTo, true, true) { 1326 if !bckFrom.IsRemote() { 1327 p.writeErrf(w, r, "cannot %s bucket %q onto itself", msg.Action, bckFrom) 1328 return 1329 } 1330 nlog.Infoln("proceeding to copy remote", bckFrom.String()) 1331 } 1332 1333 bckTo, ecode, err = p.initBckTo(w, r, query, bckTo) 1334 if err != nil { 1335 return 1336 } 1337 if ecode == http.StatusNotFound { 1338 if p.forwardCP(w, r, msg, bucket) { // to create 1339 return 1340 } 1341 if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil { 1342 return 1343 } 1344 nlog.Infof(warnDstNotExist, p, bckTo, bckFrom) 1345 } 1346 1347 // start x-tcb or x-tco 1348 if v := query.Get(apc.QparamFltPresence); v != "" { 1349 fltPresence, _ = strconv.Atoi(v) 1350 } 1351 debug.Assertf(fltPresence != apc.FltExistsOutside, "(flt %d=\"outside\") not implemented yet", fltPresence) 1352 if !apc.IsFltPresent(fltPresence) && (bckFrom.IsCloud() || bckFrom.IsRemoteAIS()) { 1353 lstcx := &lstcx{ 1354 p: p, 1355 bckFrom: bckFrom, 1356 bckTo: bckTo, 1357 amsg: msg, 1358 config: cmn.GCO.Get(), 1359 } 1360 lstcx.tcomsg.TCBMsg = *tcbmsg 1361 xid, err = lstcx.do() 1362 } else { 1363 nlog.Infoln("x-tcb:", bckFrom.String(), "=>", bckTo.String()) 1364 xid, err = p.tcb(bckFrom, bckTo, msg, tcbmsg.DryRun) 1365 } 1366 if err != nil { 1367 p.writeErr(w, r, err) 1368 return 1369 } 1370 case apc.ActCopyObjects, apc.ActETLObjects: 1371 var ( 1372 tcomsg = &cmn.TCObjsMsg{} 1373 bckTo *meta.Bck 1374 ecode int 1375 eq bool 1376 ) 1377 if err = cos.MorphMarshal(msg.Value, tcomsg); err != nil { 1378 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 1379 return 1380 } 1381 if tcomsg.Sync && tcomsg.Prepend != "" { 1382 p.writeErrf(w, r, errPrependSync, tcomsg.Prepend) 1383 return 1384 } 1385 bckTo = meta.CloneBck(&tcomsg.ToBck) 1386 1387 if bck.Equal(bckTo, true, true) { 1388 eq = true 1389 nlog.Warningf("multi-object operation %q within the same bucket %q", msg.Action, bck) 1390 } 1391 if bckTo.IsHTTP() { 1392 p.writeErrf(w, r, "cannot %s to HTTP bucket %q", msg.Action, bckTo) 1393 return 1394 } 1395 if !eq { 1396 bckTo, ecode, err = p.initBckTo(w, r, query, bckTo) 1397 if err != nil { 1398 return 1399 } 1400 if ecode == http.StatusNotFound { 1401 if p.forwardCP(w, r, msg, bucket) { // to create 1402 return 1403 } 1404 if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil { 1405 return 1406 } 1407 nlog.Infof(warnDstNotExist, p, bckTo, bck) 1408 } 1409 } 1410 1411 xid, err = p.tcobjs(bck, bckTo, cmn.GCO.Get(), msg, tcomsg) 1412 if err != nil { 1413 p.writeErr(w, r, err) 1414 return 1415 } 1416 case apc.ActAddRemoteBck: 1417 if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil { 1418 return 1419 } 1420 if err := p.createBucket(msg, bck, nil); err != nil { 1421 p.writeErr(w, r, err, crerrStatus(err)) 1422 } 1423 return 1424 case apc.ActPrefetchObjects: 1425 // TODO: GET vs SYNC? 1426 if err := cmn.ValidateRemoteBck(apc.ActPrefetchObjects, bck.Bucket()); err != nil { 1427 p.writeErr(w, r, err) 1428 return 1429 } 1430 if xid, err = p.listrange(r.Method, bucket, msg, query); err != nil { 1431 p.writeErr(w, r, err) 1432 return 1433 } 1434 case apc.ActInvalListCache: 1435 p.qm.c.invalidate(bck.Bucket()) 1436 return 1437 case apc.ActMakeNCopies: 1438 if xid, err = p.makeNCopies(msg, bck); err != nil { 1439 p.writeErr(w, r, err) 1440 return 1441 } 1442 case apc.ActECEncode: 1443 if xid, err = p.ecEncode(bck, msg); err != nil { 1444 p.writeErr(w, r, err) 1445 return 1446 } 1447 default: 1448 p.writeErrAct(w, r, msg.Action) 1449 return 1450 } 1451 1452 debug.Assertf(xact.IsValidUUID(xid) || strings.IndexByte(xid, ',') > 0, "%q: %q", msg.Action, xid) 1453 w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid))) 1454 w.Write([]byte(xid)) 1455 } 1456 1457 // init existing or create remote 1458 // not calling `initAndTry` - delegating ais:from// props cloning to the separate method 1459 func (p *proxy) initBckTo(w http.ResponseWriter, r *http.Request, query url.Values, bckTo *meta.Bck) (*meta.Bck, int, error) { 1460 bckToArgs := bctx{p: p, w: w, r: r, bck: bckTo, perms: apc.AcePUT, query: query} 1461 bckToArgs.createAIS = true 1462 1463 ecode, err := bckToArgs.init() 1464 if err != nil && ecode != http.StatusNotFound { 1465 p.writeErr(w, r, err, ecode) 1466 return nil, 0, err 1467 } 1468 1469 // remote bucket: create it (BMD-wise) on the fly 1470 if ecode == http.StatusNotFound && bckTo.IsRemote() { 1471 if bckTo, err = bckToArgs.try(); err != nil { 1472 return nil, 0, err 1473 } 1474 ecode = 0 1475 } 1476 return bckTo, ecode, nil 1477 } 1478 1479 // POST { apc.ActCreateBck } /v1/buckets/bucket-name 1480 func (p *proxy) _bcr(w http.ResponseWriter, r *http.Request, query url.Values, msg *apc.ActMsg, bck *meta.Bck) { 1481 var ( 1482 remoteHdr http.Header 1483 bucket = bck.Name 1484 ) 1485 if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil { 1486 return 1487 } 1488 if err := bck.Validate(); err != nil { 1489 p.writeErr(w, r, err) 1490 return 1491 } 1492 if p.forwardCP(w, r, msg, bucket) { 1493 return 1494 } 1495 if bck.Provider == "" { 1496 bck.Provider = apc.AIS 1497 } 1498 1499 if bck.IsRemote() { 1500 // (feature) add Cloud bucket to BMD, to further set its `Props.Extra` 1501 // with alternative access profile and/or endpoint 1502 // TODO: 1503 // change bucket props - and the BMD meta-version - to have Flags int64 for 1504 // the bits that'll include "renamed" (instead of the current `Props.Renamed`) 1505 // and "added-with-no-head"; use the latter to synchronize Cloud props once 1506 // connected 1507 if cos.IsParseBool(query.Get(apc.QparamDontHeadRemote)) { 1508 if !bck.IsCloud() { 1509 p.writeErr(w, r, cmn.NewErrUnsupp("skip lookup for the", bck.Provider+":// bucket")) 1510 return 1511 } 1512 msg.Action = apc.ActAddRemoteBck // NOTE: substituting action in the message 1513 1514 // NOTE: inherit cluster defaults 1515 config := cmn.GCO.Get() 1516 bprops := bck.Bucket().DefaultProps(&config.ClusterConfig) 1517 bprops.SetProvider(bck.Provider) 1518 1519 if err := p._createBucketWithProps(msg, bck, bprops); err != nil { 1520 p.writeErr(w, r, err, crerrStatus(err)) 1521 } 1522 return 1523 } 1524 1525 // remote: check existence and get (cloud) props 1526 rhdr, statusCode, err := p.headRemoteBck(bck.RemoteBck(), nil) 1527 if err != nil { 1528 if bck.IsCloud() { 1529 statusCode = http.StatusNotImplemented 1530 err = cmn.NewErrNotImpl("create", bck.Provider+"(cloud) bucket") 1531 } else if !bck.IsRemoteAIS() { 1532 err = cmn.NewErrUnsupp("create", bck.Provider+":// bucket") 1533 } 1534 p.writeErr(w, r, err, statusCode) 1535 return 1536 } 1537 remoteHdr = rhdr 1538 msg.Action = apc.ActAddRemoteBck // ditto 1539 } 1540 // props-to-update at creation time 1541 if msg.Value != nil { 1542 propsToUpdate := cmn.BpropsToSet{} 1543 if err := cos.MorphMarshal(msg.Value, &propsToUpdate); err != nil { 1544 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 1545 return 1546 } 1547 // Make and validate new bucket props. 1548 bck.Props = defaultBckProps(bckPropsArgs{bck: bck}) 1549 nprops, err := p.makeNewBckProps(bck, &propsToUpdate, true /*creating*/) 1550 if err != nil { 1551 p.writeErr(w, r, err) 1552 return 1553 } 1554 bck.Props = nprops 1555 if backend := bck.Backend(); backend != nil { 1556 if err := backend.Validate(); err != nil { 1557 p.writeErrf(w, r, "cannot create %s: invalid backend %s, err: %v", bck, backend, err) 1558 return 1559 } 1560 // Initialize backend bucket. 1561 if err := backend.InitNoBackend(p.owner.bmd); err != nil { 1562 if !cmn.IsErrRemoteBckNotFound(err) { 1563 p.writeErrf(w, r, "cannot create %s: failing to initialize backend %s, err: %v", 1564 bck, backend, err) 1565 return 1566 } 1567 args := bctx{p: p, w: w, r: r, bck: backend, msg: msg, query: query} 1568 args.createAIS = false 1569 if _, err = args.try(); err != nil { 1570 return 1571 } 1572 } 1573 } 1574 // Send all props to the target 1575 msg.Value = bck.Props 1576 } 1577 if err := p.createBucket(msg, bck, remoteHdr); err != nil { 1578 p.writeErr(w, r, err, crerrStatus(err)) 1579 } 1580 } 1581 1582 func crerrStatus(err error) (ecode int) { 1583 switch err.(type) { 1584 case *cmn.ErrBucketAlreadyExists: 1585 ecode = http.StatusConflict 1586 case *cmn.ErrNotImpl: 1587 ecode = http.StatusNotImplemented 1588 } 1589 return 1590 } 1591 1592 // one page => msgpack rsp 1593 func (p *proxy) listObjects(w http.ResponseWriter, r *http.Request, bck *meta.Bck, amsg *apc.ActMsg, lsmsg *apc.LsoMsg) { 1594 // LsVerChanged a.k.a. '--check-versions' limitations 1595 if lsmsg.IsFlagSet(apc.LsVerChanged) { 1596 const a = "cannot perform remote versions check" 1597 if !bck.HasVersioningMD() { 1598 p.writeErrMsg(w, r, a+": bucket "+bck.Cname("")+" does not provide (remote) versioning info") 1599 return 1600 } 1601 if lsmsg.IsFlagSet(apc.LsNameOnly) || lsmsg.IsFlagSet(apc.LsNameSize) { 1602 p.writeErrMsg(w, r, a+": flag 'LsVerChanged' is incompatible with 'LsNameOnly', 'LsNameSize'") 1603 return 1604 } 1605 if !lsmsg.WantProp(apc.GetPropsCustom) { 1606 p.writeErrf(w, r, a+" without listing %q (object property)", apc.GetPropsCustom) 1607 return 1608 } 1609 } 1610 1611 // default props & flags => user-provided message 1612 switch { 1613 case lsmsg.Props == "": 1614 if lsmsg.IsFlagSet(apc.LsObjCached) { 1615 lsmsg.AddProps(apc.GetPropsDefaultAIS...) 1616 } else { 1617 lsmsg.AddProps(apc.GetPropsMinimal...) 1618 lsmsg.SetFlag(apc.LsNameSize) 1619 } 1620 case lsmsg.Props == apc.GetPropsName: 1621 lsmsg.SetFlag(apc.LsNameOnly) 1622 case lsmsg.Props == apc.GetPropsNameSize: 1623 lsmsg.SetFlag(apc.LsNameSize) 1624 } 1625 if bck.IsHTTP() || lsmsg.IsFlagSet(apc.LsArchDir) { 1626 lsmsg.SetFlag(apc.LsObjCached) 1627 } 1628 1629 // do page 1630 beg := mono.NanoTime() 1631 lst, err := p.lsPage(bck, amsg, lsmsg, r.Header, p.owner.smap.get()) 1632 if err != nil { 1633 p.writeErr(w, r, err) 1634 return 1635 } 1636 p.statsT.AddMany( 1637 cos.NamedVal64{Name: stats.ListCount, Value: 1}, 1638 cos.NamedVal64{Name: stats.ListLatency, Value: mono.SinceNano(beg)}, 1639 ) 1640 1641 var ok bool 1642 if strings.Contains(r.Header.Get(cos.HdrAccept), cos.ContentMsgPack) { 1643 ok = p.writeMsgPack(w, lst, lsotag) 1644 } else { 1645 ok = p.writeJS(w, r, lst, lsotag) 1646 } 1647 if !ok && cmn.Rom.FastV(4, cos.SmoduleAIS) { 1648 nlog.Errorln("failed to transmit list-objects page (TCP RST?)") 1649 } 1650 1651 // GC 1652 clear(lst.Entries) 1653 lst.Entries = lst.Entries[:0] 1654 lst.Entries = nil 1655 lst = nil 1656 } 1657 1658 // one page; common code (native, s3 api) 1659 func (p *proxy) lsPage(bck *meta.Bck, amsg *apc.ActMsg, lsmsg *apc.LsoMsg, hdr http.Header, smap *smapX) (*cmn.LsoRes, error) { 1660 var ( 1661 nl nl.Listener 1662 err error 1663 tsi *meta.Snode 1664 lst *cmn.LsoRes 1665 newls bool 1666 listRemote bool 1667 wantOnlyRemote bool 1668 ) 1669 if lsmsg.UUID == "" { 1670 lsmsg.UUID = cos.GenUUID() 1671 newls = true 1672 } 1673 tsi, listRemote, wantOnlyRemote, err = p._lsofc(bck, lsmsg, smap) 1674 if err != nil { 1675 return nil, err 1676 } 1677 if newls { 1678 if wantOnlyRemote { 1679 nl = xact.NewXactNL(lsmsg.UUID, apc.ActList, &smap.Smap, meta.NodeMap{tsi.ID(): tsi}, bck.Bucket()) 1680 } else { 1681 // bcast 1682 nl = xact.NewXactNL(lsmsg.UUID, apc.ActList, &smap.Smap, nil, bck.Bucket()) 1683 } 1684 // NOTE #2: TODO: currently, always primary - hrw redirect vs scenarios*** 1685 nl.SetOwner(smap.Primary.ID()) 1686 p.ic.registerEqual(regIC{nl: nl, smap: smap, msg: amsg}) 1687 } 1688 1689 if listRemote { 1690 if lsmsg.StartAfter != "" { 1691 // TODO: remote AIS first, then Cloud 1692 return nil, fmt.Errorf("%s option --start_after (%s) not yet supported for remote buckets (%s)", 1693 lsotag, lsmsg.StartAfter, bck) 1694 } 1695 // verbose log 1696 if cmn.Rom.FastV(4, cos.SmoduleAIS) { 1697 var s string 1698 if lsmsg.ContinuationToken != "" { 1699 s = " cont=" + lsmsg.ContinuationToken 1700 } 1701 if lsmsg.SID != "" { 1702 s += " via " + tsi.StringEx() 1703 } 1704 nlog.Infof("%s[%s] %s%s", amsg.Action, lsmsg.UUID, bck.Cname(""), s) 1705 } 1706 1707 config := cmn.GCO.Get() 1708 lst, err = p.lsObjsR(bck, lsmsg, hdr, smap, tsi, config, wantOnlyRemote) 1709 1710 // TODO: `status == http.StatusGone`: at this point we know that this 1711 // remote bucket exists and is offline. We should somehow try to list 1712 // cached objects. This isn't easy as we basically need to start a new 1713 // xaction and return a new `UUID`. 1714 } else { 1715 lst, err = p.lsObjsA(bck, lsmsg) 1716 } 1717 1718 return lst, err 1719 } 1720 1721 // list-objects flow control helper 1722 func (p *proxy) _lsofc(bck *meta.Bck, lsmsg *apc.LsoMsg, smap *smapX) (tsi *meta.Snode, listRemote, wantOnlyRemote bool, err error) { 1723 listRemote = bck.IsRemote() && !lsmsg.IsFlagSet(apc.LsObjCached) 1724 if !listRemote { 1725 return 1726 } 1727 if bck.Props.BID == 0 { 1728 // remote bucket outside cluster (not in BMD) that hasn't been added ("on the fly") by the caller 1729 // (lsmsg flag below) 1730 debug.Assert(bck.IsRemote()) 1731 debug.Assert(lsmsg.IsFlagSet(apc.LsDontAddRemote)) 1732 wantOnlyRemote = true 1733 if !lsmsg.WantOnlyRemoteProps() { 1734 err = fmt.Errorf("cannot list remote not-in-cluster bucket %s for not-only-remote object properties: %q", 1735 bck.Cname(""), lsmsg.Props) 1736 return 1737 } 1738 } else { 1739 // default 1740 wantOnlyRemote = lsmsg.WantOnlyRemoteProps() 1741 } 1742 1743 // designate one target to carry-out backend.list-objects 1744 if lsmsg.SID != "" { 1745 tsi = smap.GetTarget(lsmsg.SID) 1746 if tsi == nil || tsi.InMaintOrDecomm() { 1747 err = &errNodeNotFound{lsotag + " failure:", lsmsg.SID, p.si, smap} 1748 nlog.Errorln(err) 1749 if smap.CountActiveTs() == 1 { 1750 // (walk an extra mile) 1751 orig := err 1752 tsi, err = smap.HrwTargetTask(lsmsg.UUID) 1753 if err == nil { 1754 nlog.Warningf("ignoring [%v] - utilizing the last (or the only) active target %s", orig, tsi) 1755 lsmsg.SID = tsi.ID() 1756 } 1757 } 1758 } 1759 return 1760 } 1761 // if listing using bucket inventory (`apc.HdrInventory`) is requested 1762 // target selection can change - see lsObjsR below 1763 if tsi, err = smap.HrwTargetTask(lsmsg.UUID); err == nil { 1764 lsmsg.SID = tsi.ID() 1765 } 1766 return 1767 } 1768 1769 // POST { action } /v1/objects/bucket-name[/object-name] 1770 func (p *proxy) httpobjpost(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 1771 msg, err := p.readActionMsg(w, r) 1772 if err != nil { 1773 return 1774 } 1775 if msg.Action == apc.ActRenameObject { 1776 apireq.after = 2 1777 } 1778 if err := p.parseReq(w, r, apireq); err != nil { 1779 return 1780 } 1781 1782 bck := apireq.bck 1783 bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: apc.AcePUT, bck: bck} 1784 bckArgs.createAIS = false 1785 bckArgs.dontHeadRemote = true 1786 if _, err := bckArgs.initAndTry(); err != nil { 1787 return 1788 } 1789 1790 switch msg.Action { 1791 case apc.ActRenameObject: 1792 if err := p.checkAccess(w, r, bck, apc.AceObjMOVE); err != nil { 1793 return 1794 } 1795 if bck.IsRemote() { 1796 p.writeErrActf(w, r, msg.Action, "not supported for remote buckets (%s)", bck) 1797 return 1798 } 1799 if bck.Props.EC.Enabled { 1800 p.writeErrActf(w, r, msg.Action, "not supported for erasure-coded buckets (%s)", bck) 1801 return 1802 } 1803 objName, objNameTo := apireq.items[1], msg.Name 1804 if objName == objNameTo { 1805 p.writeErrMsg(w, r, "cannot rename "+bck.Cname(objName)+" to self, nothing to do") 1806 return 1807 } 1808 if !p.isValidObjname(w, r, objNameTo) { 1809 return 1810 } 1811 p.redirectObjAction(w, r, bck, apireq.items[1], msg) 1812 case apc.ActPromote: 1813 if err := p.checkAccess(w, r, bck, apc.AcePromote); err != nil { 1814 return 1815 } 1816 // ActionMsg.Name is the source 1817 if !filepath.IsAbs(msg.Name) { 1818 if msg.Name == "" { 1819 p.writeErrMsg(w, r, "promoted source pathname is empty") 1820 } else { 1821 p.writeErrf(w, r, "promoted source must be an absolute path (got %q)", msg.Name) 1822 } 1823 return 1824 } 1825 args := &apc.PromoteArgs{} 1826 if err := cos.MorphMarshal(msg.Value, args); err != nil { 1827 p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err) 1828 return 1829 } 1830 var tsi *meta.Snode 1831 if args.DaemonID != "" { 1832 smap := p.owner.smap.get() 1833 if tsi = smap.GetTarget(args.DaemonID); tsi == nil { 1834 err := &errNodeNotFound{apc.ActPromote + " failure:", args.DaemonID, p.si, smap} 1835 p.writeErr(w, r, err) 1836 return 1837 } 1838 } 1839 xid, err := p.promote(bck, msg, tsi) 1840 if err != nil { 1841 p.writeErr(w, r, err) 1842 return 1843 } 1844 w.Write([]byte(xid)) 1845 case apc.ActBlobDl: 1846 if err := p.checkAccess(w, r, bck, apc.AccessRW); err != nil { 1847 return 1848 } 1849 if err := cmn.ValidateRemoteBck(apc.ActBlobDl, bck.Bucket()); err != nil { 1850 p.writeErr(w, r, err) 1851 return 1852 } 1853 objName := msg.Name 1854 p.redirectObjAction(w, r, bck, objName, msg) 1855 default: 1856 p.writeErrAct(w, r, msg.Action) 1857 } 1858 } 1859 1860 // HEAD /v1/buckets/bucket-name 1861 func (p *proxy) httpbckhead(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 1862 err := p.parseReq(w, r, apireq) 1863 if err != nil { 1864 return 1865 } 1866 bckArgs := bctx{p: p, w: w, r: r, bck: apireq.bck, perms: apc.AceBckHEAD, dpq: apireq.dpq, query: apireq.query} 1867 bckArgs.dontAddRemote = apireq.dpq.dontAddRemote // QparamDontAddRemote 1868 1869 var ( 1870 info *cmn.BsummResult 1871 dpq = apireq.dpq 1872 msg apc.BsummCtrlMsg 1873 fltPresence int 1874 status int 1875 ) 1876 if dpq.fltPresence != "" { 1877 fltPresence, err = strconv.Atoi(dpq.fltPresence) 1878 if err != nil { 1879 p.writeErrf(w, r, "%s: parse 'flt-presence': %w", p, err) 1880 return 1881 } 1882 bckArgs.dontHeadRemote = bckArgs.dontHeadRemote || apc.IsFltPresent(fltPresence) 1883 } 1884 if dpq.binfo != "" { // QparamBinfoWithOrWithoutRemote 1885 msg = apc.BsummCtrlMsg{ 1886 UUID: dpq.uuid, 1887 ObjCached: !cos.IsParseBool(dpq.binfo), 1888 BckPresent: apc.IsFltPresent(fltPresence), 1889 DontAddRemote: dpq.dontAddRemote, 1890 } 1891 bckArgs.dontAddRemote = msg.DontAddRemote 1892 } 1893 bckArgs.createAIS = false 1894 1895 bck, err := bckArgs.initAndTry() 1896 if err != nil { 1897 return 1898 } 1899 1900 // 1. bucket is present (and was present prior to this call), and we are done with it here 1901 if bckArgs.isPresent { 1902 if fltPresence == apc.FltExistsOutside { 1903 nlog.Warningf("bucket %s is present, flt %d=\"outside\" not implemented yet", bck.Cname(""), fltPresence) 1904 } 1905 if dpq.binfo != "" { 1906 info, status, err = p.bsummhead(bck, &msg) 1907 if err != nil { 1908 p.writeErr(w, r, err) 1909 return 1910 } 1911 if info != nil { 1912 info.IsBckPresent = true 1913 } 1914 } 1915 toHdr(w, bck, info, status, msg.UUID) 1916 return 1917 } 1918 1919 // 2. bucket is remote and does exist 1920 debug.Assert(bck.IsRemote(), bck.String()) 1921 debug.Assert(bckArgs.exists) 1922 1923 // [filtering] when the bucket that must be present is not 1924 if apc.IsFltPresent(fltPresence) { 1925 toHdr(w, bck, nil, 0, "") 1926 return 1927 } 1928 1929 var ( 1930 bprops *cmn.Bprops 1931 bmd = p.owner.bmd.get() 1932 ) 1933 bprops, bckArgs.isPresent = bmd.Get(bck) 1934 if bprops != nil { 1935 // just added via bckArgs.initAndTry() above, with dontAdd == false 1936 bck.Props = bprops 1937 } // otherwise, keep bck.Props as per (#18995) 1938 1939 if dpq.binfo != "" { 1940 info, status, err = p.bsummhead(bck, &msg) 1941 if err != nil { 1942 p.writeErr(w, r, err) 1943 return 1944 } 1945 if info != nil { 1946 info.IsBckPresent = true 1947 } 1948 } 1949 toHdr(w, bck, info, status, msg.UUID) 1950 } 1951 1952 func toHdr(w http.ResponseWriter, bck *meta.Bck, info *cmn.BsummResult, status int, xid string) { 1953 hdr := w.Header() 1954 if bck.Props != nil { 1955 hdr.Set(apc.HdrBucketProps, cos.MustMarshalToString(bck.Props)) 1956 } 1957 if info != nil { 1958 hdr.Set(apc.HdrBucketSumm, cos.MustMarshalToString(info)) 1959 } 1960 if xid != "" { 1961 hdr.Set(apc.HdrXactionID, xid) 1962 } 1963 if status > 0 { 1964 w.WriteHeader(status) 1965 } 1966 } 1967 1968 // PATCH /v1/buckets/bucket-name 1969 func (p *proxy) httpbckpatch(w http.ResponseWriter, r *http.Request, apireq *apiRequest) { 1970 var ( 1971 err error 1972 msg *apc.ActMsg 1973 propsToUpdate cmn.BpropsToSet 1974 xid string 1975 nprops *cmn.Bprops // complete instance of bucket props with propsToUpdate changes 1976 ) 1977 if err = p.parseReq(w, r, apireq); err != nil { 1978 return 1979 } 1980 if msg, err = p.readActionMsg(w, r); err != nil { 1981 return 1982 } 1983 if err := cos.MorphMarshal(msg.Value, &propsToUpdate); err != nil { 1984 p.writeErrMsg(w, r, "invalid props-to-update value in apireq: "+msg.String()) 1985 return 1986 } 1987 bck := apireq.bck 1988 if p.forwardCP(w, r, msg, "patch "+bck.String()) { 1989 return 1990 } 1991 perms := apc.AcePATCH 1992 if propsToUpdate.Access != nil { 1993 perms |= apc.AceBckSetACL 1994 } 1995 bckArgs := bctx{p: p, w: w, r: r, bck: bck, msg: msg, skipBackend: true, 1996 perms: perms, dpq: apireq.dpq, query: apireq.query} 1997 bckArgs.createAIS = false 1998 if bck, err = bckArgs.initAndTry(); err != nil { 1999 return 2000 } 2001 if err = _checkAction(msg, apc.ActSetBprops, apc.ActResetBprops); err != nil { 2002 p.writeErr(w, r, err) 2003 return 2004 } 2005 // make and validate new props 2006 if nprops, err = p.makeNewBckProps(bck, &propsToUpdate); err != nil { 2007 p.writeErr(w, r, err) 2008 return 2009 } 2010 if !nprops.BackendBck.IsEmpty() { 2011 // backend must exist, must init itself 2012 backendBck := meta.CloneBck(&nprops.BackendBck) 2013 backendBck.Props = nil 2014 2015 args := bctx{p: p, w: w, r: r, bck: backendBck, msg: msg, dpq: apireq.dpq, query: apireq.query} 2016 args.createAIS = false 2017 if _, err = args.initAndTry(); err != nil { 2018 return 2019 } 2020 // init and validate 2021 if err = p.initBackendProp(nprops); err != nil { 2022 p.writeErr(w, r, err) 2023 return 2024 } 2025 } 2026 if xid, err = p.setBprops(msg, bck, nprops); err != nil { 2027 p.writeErr(w, r, err) 2028 return 2029 } 2030 w.Write([]byte(xid)) 2031 } 2032 2033 // HEAD /v1/objects/bucket-name/object-name 2034 func (p *proxy) httpobjhead(w http.ResponseWriter, r *http.Request, origURLBck ...string) { 2035 bckArgs := allocBctx() 2036 { 2037 bckArgs.p = p 2038 bckArgs.w = w 2039 bckArgs.r = r 2040 bckArgs.perms = apc.AceObjHEAD 2041 bckArgs.createAIS = false 2042 } 2043 if len(origURLBck) > 0 { 2044 bckArgs.origURLBck = origURLBck[0] 2045 } 2046 bck, objName, err := p._parseReqTry(w, r, bckArgs) 2047 if err != nil { 2048 return 2049 } 2050 smap := p.owner.smap.get() 2051 si, err := smap.HrwName2T(bck.MakeUname(objName)) 2052 if err != nil { 2053 p.writeErr(w, r, err, http.StatusInternalServerError) 2054 return 2055 } 2056 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 2057 nlog.Infof("%s %s => %s", r.Method, bck.Cname(objName), si.StringEx()) 2058 } 2059 redirectURL := p.redirectURL(r, si, time.Now() /*started*/, cmn.NetIntraControl) 2060 http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect) 2061 } 2062 2063 // PATCH /v1/objects/bucket-name/object-name 2064 func (p *proxy) httpobjpatch(w http.ResponseWriter, r *http.Request) { 2065 started := time.Now() 2066 bckArgs := allocBctx() 2067 { 2068 bckArgs.p = p 2069 bckArgs.w = w 2070 bckArgs.r = r 2071 bckArgs.perms = apc.AceObjHEAD 2072 bckArgs.createAIS = false 2073 } 2074 bck, objName, err := p._parseReqTry(w, r, bckArgs) 2075 if err != nil { 2076 return 2077 } 2078 smap := p.owner.smap.get() 2079 si, err := smap.HrwName2T(bck.MakeUname(objName)) 2080 if err != nil { 2081 p.writeErr(w, r, err, http.StatusInternalServerError) 2082 return 2083 } 2084 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 2085 nlog.Infof("%s %s => %s", r.Method, bck.Cname(objName), si.StringEx()) 2086 } 2087 redirectURL := p.redirectURL(r, si, started, cmn.NetIntraControl) 2088 http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect) 2089 } 2090 2091 func (p *proxy) listBuckets(w http.ResponseWriter, r *http.Request, qbck *cmn.QueryBcks, msg *apc.ActMsg, dpq *dpq) { 2092 var ( 2093 bmd = p.owner.bmd.get() 2094 present bool 2095 ) 2096 if qbck.IsAIS() || qbck.IsHTTP() { 2097 bcks := bmd.Select(qbck) 2098 p.writeJSON(w, r, bcks, "list-buckets") 2099 return 2100 } 2101 2102 // present-only filtering 2103 if dpq.fltPresence != "" { 2104 if v, err := strconv.Atoi(dpq.fltPresence); err == nil { 2105 present = apc.IsFltPresent(v) 2106 } 2107 } 2108 if present { 2109 bcks := bmd.Select(qbck) 2110 p.writeJSON(w, r, bcks, "list-buckets") 2111 return 2112 } 2113 2114 // via random target 2115 smap := p.owner.smap.get() 2116 si, err := smap.GetRandTarget() 2117 if err != nil { 2118 p.writeErr(w, r, err) 2119 return 2120 } 2121 2122 cargs := allocCargs() 2123 { 2124 cargs.si = si 2125 cargs.req = cmn.HreqArgs{ 2126 Method: r.Method, 2127 Path: r.URL.Path, 2128 RawQuery: r.URL.RawQuery, 2129 Header: r.Header, 2130 Body: cos.MustMarshal(msg), 2131 } 2132 cargs.timeout = apc.DefaultTimeout 2133 } 2134 res := p.call(cargs, smap) 2135 freeCargs(cargs) 2136 2137 if res.err != nil { 2138 err = res.toErr() 2139 p.writeErr(w, r, err, res.status) 2140 return 2141 } 2142 2143 hdr := w.Header() 2144 hdr.Set(cos.HdrContentType, res.header.Get(cos.HdrContentType)) 2145 hdr.Set(cos.HdrContentLength, strconv.Itoa(len(res.bytes))) 2146 _, err = w.Write(res.bytes) 2147 debug.AssertNoErr(err) 2148 } 2149 2150 func (p *proxy) redirectURL(r *http.Request, si *meta.Snode, ts time.Time, netIntra string, netPubs ...string) (redirect string) { 2151 var ( 2152 nodeURL string 2153 netPub = cmn.NetPublic 2154 ) 2155 if len(netPubs) > 0 { 2156 netPub = netPubs[0] 2157 } 2158 if p.si.LocalNet == nil { 2159 nodeURL = si.URL(netPub) 2160 } else { 2161 var local bool 2162 remote := r.RemoteAddr 2163 if colon := strings.Index(remote, ":"); colon != -1 { 2164 remote = remote[:colon] 2165 } 2166 if ip := net.ParseIP(remote); ip != nil { 2167 local = p.si.LocalNet.Contains(ip) 2168 } 2169 if local { 2170 nodeURL = si.URL(netIntra) 2171 } else { 2172 nodeURL = si.URL(netPub) 2173 } 2174 } 2175 redirect = nodeURL + r.URL.Path + "?" 2176 if r.URL.RawQuery != "" { 2177 redirect += r.URL.RawQuery + "&" 2178 } 2179 2180 query := url.Values{ 2181 apc.QparamProxyID: []string{p.SID()}, 2182 apc.QparamUnixTime: []string{cos.UnixNano2S(ts.UnixNano())}, 2183 } 2184 redirect += query.Encode() 2185 return 2186 } 2187 2188 // lsObjsA reads object list from all targets, combines, sorts and returns 2189 // the final list. Excess of object entries from each target is remembered in the 2190 // buffer (see: `queryBuffers`) so we won't request the same objects again. 2191 func (p *proxy) lsObjsA(bck *meta.Bck, lsmsg *apc.LsoMsg) (allEntries *cmn.LsoRes, err error) { 2192 var ( 2193 aisMsg *aisMsg 2194 args *bcastArgs 2195 entries cmn.LsoEntries 2196 results sliceResults 2197 smap = p.owner.smap.get() 2198 cacheID = cacheReqID{bck: bck.Bucket(), prefix: lsmsg.Prefix} 2199 token = lsmsg.ContinuationToken 2200 props = lsmsg.PropsSet() 2201 hasEnough bool 2202 flags uint32 2203 ) 2204 if lsmsg.PageSize == 0 { 2205 lsmsg.PageSize = apc.MaxPageSizeAIS 2206 } 2207 pageSize := lsmsg.PageSize 2208 2209 // TODO: Before checking cache and buffer we should check if there is another 2210 // request in-flight that asks for the same page - if true wait for the cache 2211 // to get populated. 2212 2213 if lsmsg.IsFlagSet(apc.UseListObjsCache) { 2214 entries, hasEnough = p.qm.c.get(cacheID, token, pageSize) 2215 if hasEnough { 2216 goto end 2217 } 2218 } 2219 entries, hasEnough = p.qm.b.get(lsmsg.UUID, token, pageSize) 2220 if hasEnough { 2221 // We have enough in the buffer to fulfill the request. 2222 goto endWithCache 2223 } 2224 2225 // User requested some page but we don't have enough (but we may have part 2226 // of the full page). Therefore, we must ask targets for page starting from 2227 // what we have locally, so we don't re-request the objects. 2228 lsmsg.ContinuationToken = p.qm.b.last(lsmsg.UUID, token) 2229 2230 aisMsg = p.newAmsgActVal(apc.ActList, &lsmsg) 2231 args = allocBcArgs() 2232 args.req = cmn.HreqArgs{ 2233 Method: http.MethodGet, 2234 Path: apc.URLPathBuckets.Join(bck.Name), 2235 Query: bck.NewQuery(), 2236 Body: cos.MustMarshal(aisMsg), 2237 } 2238 args.timeout = apc.LongTimeout 2239 args.smap = smap 2240 args.cresv = cresLso{} // -> cmn.LsoRes 2241 2242 // Combine the results. 2243 results = p.bcastGroup(args) 2244 freeBcArgs(args) 2245 for _, res := range results { 2246 if res.err != nil { 2247 err = res.toErr() 2248 freeBcastRes(results) 2249 return nil, err 2250 } 2251 objList := res.v.(*cmn.LsoRes) 2252 flags |= objList.Flags 2253 p.qm.b.set(lsmsg.UUID, res.si.ID(), objList.Entries, pageSize) 2254 } 2255 freeBcastRes(results) 2256 entries, hasEnough = p.qm.b.get(lsmsg.UUID, token, pageSize) 2257 debug.Assert(hasEnough) 2258 2259 endWithCache: 2260 if lsmsg.IsFlagSet(apc.UseListObjsCache) { 2261 p.qm.c.set(cacheID, token, entries, pageSize) 2262 } 2263 end: 2264 if lsmsg.IsFlagSet(apc.UseListObjsCache) && !props.All(apc.GetPropsAll...) { 2265 // Since cache keeps entries with whole subset props we must create copy 2266 // of the entries with smaller subset of props (if we would change the 2267 // props of the `entries` it would also affect entries inside cache). 2268 propsEntries := make(cmn.LsoEntries, len(entries)) 2269 for idx := range entries { 2270 propsEntries[idx] = entries[idx].CopyWithProps(props) 2271 } 2272 entries = propsEntries 2273 } 2274 2275 allEntries = &cmn.LsoRes{ 2276 UUID: lsmsg.UUID, 2277 Entries: entries, 2278 Flags: flags, 2279 } 2280 if len(entries) >= int(pageSize) { 2281 allEntries.ContinuationToken = entries[len(entries)-1].Name 2282 } 2283 2284 // when recursion is disabled (i.e., lsmsg.IsFlagSet(apc.LsNoRecursion)) 2285 // the (`cmn.LsoRes`) result _may_ include duplicated names of the virtual subdirectories 2286 // - that's why: 2287 if lsmsg.IsFlagSet(apc.LsNoRecursion) { 2288 allEntries.Entries = cmn.DedupLso(allEntries.Entries, len(entries)) 2289 } 2290 2291 return allEntries, nil 2292 } 2293 2294 func (p *proxy) lsObjsR(bck *meta.Bck, lsmsg *apc.LsoMsg, hdr http.Header, smap *smapX, tsi *meta.Snode, config *cmn.Config, 2295 wantOnlyRemote bool) (*cmn.LsoRes, error) { 2296 var ( 2297 results sliceResults 2298 aisMsg = p.newAmsgActVal(apc.ActList, &lsmsg) 2299 args = allocBcArgs() 2300 timeout = config.Client.ListObjTimeout.D() 2301 ) 2302 if cos.IsParseBool(hdr.Get(apc.HdrInventory)) { 2303 // TODO: extend to other Clouds or, more precisely, other list-objects supporting backends 2304 if !bck.IsRemoteS3() { 2305 return nil, cmn.NewErrUnsupp("list (via bucket inventory) non-S3 bucket", bck.Cname("")) 2306 } 2307 if lsmsg.ContinuationToken == "" /*first page*/ { 2308 timeout = config.Client.TimeoutLong.D() 2309 2310 // override _lsofc selection (see above) 2311 _, objName := s3.InvPrefObjname(bck.Bucket(), hdr.Get(apc.HdrInvName), hdr.Get(apc.HdrInvID)) 2312 tsi, err := smap.HrwName2T(bck.MakeUname(objName)) 2313 if err != nil { 2314 return nil, err 2315 } 2316 lsmsg.SID = tsi.ID() 2317 } 2318 } 2319 args.req = cmn.HreqArgs{ 2320 Method: http.MethodGet, 2321 Path: apc.URLPathBuckets.Join(bck.Name), 2322 Header: hdr, 2323 Query: bck.NewQuery(), 2324 Body: cos.MustMarshal(aisMsg), 2325 } 2326 if wantOnlyRemote { 2327 cargs := allocCargs() 2328 { 2329 cargs.si = tsi 2330 cargs.req = args.req 2331 cargs.timeout = timeout 2332 cargs.cresv = cresLso{} // -> cmn.LsoRes 2333 } 2334 // duplicate via query to have target ignoring an (early) failure to initialize bucket 2335 if lsmsg.IsFlagSet(apc.LsDontHeadRemote) { 2336 cargs.req.Query.Set(apc.QparamDontHeadRemote, "true") 2337 } 2338 if lsmsg.IsFlagSet(apc.LsDontAddRemote) { 2339 cargs.req.Query.Set(apc.QparamDontAddRemote, "true") 2340 } 2341 res := p.call(cargs, smap) 2342 freeCargs(cargs) 2343 results = make(sliceResults, 1) 2344 results[0] = res 2345 } else { 2346 args.timeout = timeout 2347 args.smap = smap 2348 args.cresv = cresLso{} // -> cmn.LsoRes 2349 results = p.bcastGroup(args) 2350 } 2351 2352 freeBcArgs(args) 2353 2354 // Combine the results. 2355 resLists := make([]*cmn.LsoRes, 0, len(results)) 2356 for _, res := range results { 2357 if res.err != nil { 2358 err := res.toErr() 2359 freeBcastRes(results) 2360 return nil, err 2361 } 2362 resLists = append(resLists, res.v.(*cmn.LsoRes)) 2363 } 2364 freeBcastRes(results) 2365 2366 return cmn.MergeLso(resLists, 0), nil 2367 } 2368 2369 func (p *proxy) redirectObjAction(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string, msg *apc.ActMsg) { 2370 started := time.Now() 2371 smap := p.owner.smap.get() 2372 si, err := smap.HrwName2T(bck.MakeUname(objName)) 2373 if err != nil { 2374 p.writeErr(w, r, err) 2375 return 2376 } 2377 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 2378 nlog.Infof("%q %s => %s", msg.Action, bck.Cname(objName), si.StringEx()) 2379 } 2380 2381 // NOTE: Code 307 is the only way to http-redirect with the original JSON payload. 2382 redirectURL := p.redirectURL(r, si, started, cmn.NetIntraControl) 2383 http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect) 2384 2385 p.statsT.Inc(stats.RenameCount) 2386 } 2387 2388 func (p *proxy) listrange(method, bucket string, msg *apc.ActMsg, query url.Values) (xid string, err error) { 2389 var ( 2390 smap = p.owner.smap.get() 2391 aisMsg = p.newAmsg(msg, nil, cos.GenUUID()) 2392 body = cos.MustMarshal(aisMsg) 2393 path = apc.URLPathBuckets.Join(bucket) 2394 ) 2395 nlb := xact.NewXactNL(aisMsg.UUID, aisMsg.Action, &smap.Smap, nil) 2396 nlb.SetOwner(equalIC) 2397 p.ic.registerEqual(regIC{smap: smap, query: query, nl: nlb}) 2398 args := allocBcArgs() 2399 args.req = cmn.HreqArgs{Method: method, Path: path, Query: query, Body: body} 2400 args.smap = smap 2401 args.timeout = apc.DefaultTimeout 2402 results := p.bcastGroup(args) 2403 freeBcArgs(args) 2404 for _, res := range results { 2405 if res.err == nil { 2406 continue 2407 } 2408 err = res.errorf("%s failed to %q List/Range", res.si, msg.Action) 2409 break 2410 } 2411 freeBcastRes(results) 2412 xid = aisMsg.UUID 2413 return 2414 } 2415 2416 func (p *proxy) reverseHandler(w http.ResponseWriter, r *http.Request) { 2417 apiItems, err := p.parseURL(w, r, apc.URLPathReverse.L, 1, false) 2418 if err != nil { 2419 return 2420 } 2421 2422 // rewrite URL path (removing `apc.Reverse`) 2423 r.URL.Path = cos.JoinWords(apc.Version, apiItems[0]) 2424 2425 nodeID := r.Header.Get(apc.HdrNodeID) 2426 if nodeID == "" { 2427 p.writeErrMsg(w, r, "missing node ID") 2428 return 2429 } 2430 smap := p.owner.smap.get() 2431 si := smap.GetNode(nodeID) 2432 if si != nil && si.InMaintOrDecomm() { 2433 daeStatus := "inactive" 2434 switch { 2435 case si.Flags.IsSet(meta.SnodeMaint): 2436 daeStatus = apc.NodeMaintenance 2437 case si.Flags.IsSet(meta.SnodeDecomm): 2438 daeStatus = apc.NodeDecommission 2439 } 2440 if r.Method == http.MethodGet { 2441 what := r.URL.Query().Get(apc.QparamWhat) 2442 if what == apc.WhatNodeStatsAndStatus { 2443 // skip reversing, return status as per Smap 2444 msg := &stats.NodeStatus{ 2445 Node: stats.Node{Snode: si}, 2446 Status: daeStatus, 2447 } 2448 p.writeJSON(w, r, msg, what) 2449 return 2450 } 2451 } 2452 // otherwise, warn and go ahead 2453 // (e.g. scenario: shutdown when transitioning through states) 2454 nlog.Warningf("%s: %s status is: %s", p, si.StringEx(), daeStatus) 2455 } 2456 2457 // access control 2458 switch r.Method { 2459 case http.MethodGet: 2460 // must be consistent with httpdaeget, httpcluget 2461 err = p.checkAccess(w, r, nil, apc.AceShowCluster) 2462 case http.MethodPost: 2463 // (ditto) httpdaepost, httpclupost 2464 err = p.checkAccess(w, r, nil, apc.AceAdmin) 2465 case http.MethodPut, http.MethodDelete: 2466 // (ditto) httpdaeput/delete and httpcluput/delete 2467 err = p.checkAccess(w, r, nil, apc.AceAdmin) 2468 default: 2469 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodPost, http.MethodPut) 2470 return 2471 } 2472 if err != nil { 2473 return 2474 } 2475 2476 // do 2477 if si != nil { 2478 p.reverseNodeRequest(w, r, si) 2479 return 2480 } 2481 // special case when the target self-removed itself from cluster map 2482 // after having lost all mountpaths. 2483 nodeURL := r.Header.Get(apc.HdrNodeURL) 2484 if nodeURL == "" { 2485 err = &errNodeNotFound{"cannot rproxy to", nodeID, p.si, smap} 2486 p.writeErr(w, r, err, http.StatusNotFound) 2487 return 2488 } 2489 parsedURL, err := url.Parse(nodeURL) 2490 if err != nil { 2491 p.writeErrf(w, r, "%s: invalid URL %q for node %s", p.si, nodeURL, nodeID) 2492 return 2493 } 2494 2495 p.reverseRequest(w, r, nodeID, parsedURL) 2496 } 2497 2498 // 2499 // /daemon handlers 2500 // 2501 2502 // [METHOD] /v1/daemon 2503 func (p *proxy) daemonHandler(w http.ResponseWriter, r *http.Request) { 2504 switch r.Method { 2505 case http.MethodGet: 2506 p.httpdaeget(w, r) 2507 case http.MethodPut: 2508 p.httpdaeput(w, r) 2509 case http.MethodPost: 2510 p.httpdaepost(w, r) 2511 default: 2512 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodPost, http.MethodPut) 2513 } 2514 } 2515 2516 func (p *proxy) handlePendingRenamedLB(renamedBucket string) { 2517 ctx := &bmdModifier{ 2518 pre: p.bmodPostMv, 2519 final: p.bmodSync, 2520 msg: &apc.ActMsg{Value: apc.ActMoveBck}, 2521 bcks: []*meta.Bck{meta.NewBck(renamedBucket, apc.AIS, cmn.NsGlobal)}, 2522 } 2523 _, err := p.owner.bmd.modify(ctx) 2524 debug.AssertNoErr(err) 2525 } 2526 2527 func (p *proxy) bmodPostMv(ctx *bmdModifier, clone *bucketMD) error { 2528 var ( 2529 bck = ctx.bcks[0] 2530 props, present = clone.Get(bck) 2531 ) 2532 if !present { 2533 ctx.terminate = true 2534 // Already removed via the the very first target calling here. 2535 return nil 2536 } 2537 if props.Renamed == "" { 2538 nlog.Errorf("%s: renamed bucket %s: unexpected props %+v", p, bck.Name, *bck.Props) 2539 ctx.terminate = true 2540 return nil 2541 } 2542 clone.del(bck) 2543 return nil 2544 } 2545 2546 func (p *proxy) httpdaeget(w http.ResponseWriter, r *http.Request) { 2547 var ( 2548 query = r.URL.Query() 2549 what = query.Get(apc.QparamWhat) 2550 ) 2551 if err := p.checkAccess(w, r, nil, apc.AceShowCluster); err != nil { 2552 return 2553 } 2554 switch what { 2555 case apc.WhatBMD: 2556 if renamedBucket := query.Get(whatRenamedLB); renamedBucket != "" { 2557 p.handlePendingRenamedLB(renamedBucket) 2558 } 2559 fallthrough // fallthrough 2560 case apc.WhatNodeConfig, apc.WhatSmapVote, apc.WhatSnode, apc.WhatLog, 2561 apc.WhatNodeStats, apc.WhatNodeStatsV322, apc.WhatMetricNames, 2562 apc.WhatNodeStatsAndStatusV322: 2563 p.htrun.httpdaeget(w, r, query, nil /*htext*/) 2564 2565 case apc.WhatNodeStatsAndStatus: 2566 ds := p.statsAndStatus() 2567 daeStats := p.statsT.GetStats() 2568 ds.Tracker = daeStats.Tracker 2569 p.ciiFill(&ds.Cluster) 2570 p.writeJSON(w, r, ds, what) 2571 2572 case apc.WhatSysInfo: 2573 p.writeJSON(w, r, apc.GetMemCPU(), what) 2574 case apc.WhatSmap: 2575 const retries = 16 2576 var ( 2577 smap = p.owner.smap.get() 2578 sleep = cmn.Rom.CplaneOperation() / 2 2579 ) 2580 for i := 0; smap.validate() != nil && i < retries; i++ { 2581 if !p.NodeStarted() { 2582 time.Sleep(sleep) 2583 smap = p.owner.smap.get() 2584 if err := smap.validate(); err != nil { 2585 nlog.Errorf("%s is starting up, cannot return %s yet: %v", p, smap, err) 2586 } 2587 break 2588 } 2589 smap = p.owner.smap.get() 2590 time.Sleep(sleep) 2591 } 2592 if err := smap.validate(); err != nil { 2593 nlog.Errorf("%s: startup is taking unusually long time: %s (%v)", p, smap, err) 2594 w.WriteHeader(http.StatusServiceUnavailable) 2595 return 2596 } 2597 p.writeJSON(w, r, smap, what) 2598 default: 2599 p.htrun.httpdaeget(w, r, query, nil /*htext*/) 2600 } 2601 } 2602 2603 func (p *proxy) httpdaeput(w http.ResponseWriter, r *http.Request) { 2604 apiItems, err := p.parseURL(w, r, apc.URLPathDae.L, 0, true) 2605 if err != nil { 2606 return 2607 } 2608 if err := p.checkAccess(w, r, nil, apc.AceAdmin); err != nil { 2609 return 2610 } 2611 // urlpath-based actions 2612 if len(apiItems) > 0 { 2613 action := apiItems[0] 2614 p.daePathAction(w, r, action) 2615 return 2616 } 2617 // message-based actions 2618 query := r.URL.Query() 2619 msg, err := p.readActionMsg(w, r) 2620 if err != nil { 2621 return 2622 } 2623 switch msg.Action { 2624 case apc.ActSetConfig: // set-config #2 - via action message 2625 p.setDaemonConfigMsg(w, r, msg, query) 2626 case apc.ActResetConfig: 2627 if err := p.owner.config.resetDaemonConfig(); err != nil { 2628 p.writeErr(w, r, err) 2629 } 2630 case apc.ActRotateLogs: 2631 nlog.Flush(nlog.ActRotate) 2632 case apc.ActResetStats: 2633 errorsOnly := msg.Value.(bool) 2634 p.statsT.ResetStats(errorsOnly) 2635 2636 case apc.ActStartMaintenance: 2637 if !p.ensureIntraControl(w, r, true /* from primary */) { 2638 return 2639 } 2640 p.termKalive(msg.Action) 2641 case apc.ActDecommissionCluster, apc.ActDecommissionNode: 2642 if !p.ensureIntraControl(w, r, true /* from primary */) { 2643 return 2644 } 2645 var opts apc.ActValRmNode 2646 if err := cos.MorphMarshal(msg.Value, &opts); err != nil { 2647 p.writeErr(w, r, err) 2648 return 2649 } 2650 p.termKalive(msg.Action) 2651 p.decommission(msg.Action, &opts) 2652 case apc.ActShutdownNode: 2653 if !p.ensureIntraControl(w, r, true /* from primary */) { 2654 return 2655 } 2656 p.termKalive(msg.Action) 2657 p.shutdown(msg.Action) 2658 case apc.ActShutdownCluster: 2659 smap := p.owner.smap.get() 2660 isPrimary := smap.isPrimary(p.si) 2661 if !isPrimary { 2662 if !p.ensureIntraControl(w, r, true /* from primary */) { 2663 return 2664 } 2665 p.Stop(&errNoUnregister{msg.Action}) 2666 return 2667 } 2668 force := cos.IsParseBool(query.Get(apc.QparamForce)) 2669 if !force { 2670 p.writeErrf(w, r, "cannot shutdown primary %s (consider %s=true option)", 2671 p.si, apc.QparamForce) 2672 return 2673 } 2674 _ = syscall.Kill(syscall.Getpid(), syscall.SIGINT) 2675 default: 2676 p.writeErrAct(w, r, msg.Action) 2677 } 2678 } 2679 2680 func (p *proxy) daePathAction(w http.ResponseWriter, r *http.Request, action string) { 2681 switch action { 2682 case apc.Proxy: 2683 p.daeSetPrimary(w, r) 2684 case apc.SyncSmap: 2685 newsmap := &smapX{} 2686 if cmn.ReadJSON(w, r, newsmap) != nil { 2687 return 2688 } 2689 if err := newsmap.validate(); err != nil { 2690 p.writeErrf(w, r, "%s: invalid %s: %v", p.si, newsmap, err) 2691 return 2692 } 2693 if err := p.owner.smap.synchronize(p.si, newsmap, nil /*ms payload*/, p.htrun.smapUpdatedCB); err != nil { 2694 p.writeErr(w, r, cmn.NewErrFailedTo(p, "synchronize", newsmap, err)) 2695 return 2696 } 2697 nlog.Infof("%s: %s %s done", p, apc.SyncSmap, newsmap) 2698 case apc.ActSetConfig: // set-config #1 - via query parameters and "?n1=v1&n2=v2..." 2699 p.setDaemonConfigQuery(w, r) 2700 default: 2701 p.writeErrAct(w, r, action) 2702 } 2703 } 2704 2705 func (p *proxy) httpdaepost(w http.ResponseWriter, r *http.Request) { 2706 apiItems, err := p.parseURL(w, r, apc.URLPathDae.L, 0, true) 2707 if err != nil { 2708 return 2709 } 2710 if len(apiItems) == 0 || apiItems[0] != apc.AdminJoin { 2711 p.writeErrURL(w, r) 2712 return 2713 } 2714 if err := p.checkAccess(w, r, nil, apc.AceAdmin); err != nil { 2715 return 2716 } 2717 if !p.keepalive.paused() { 2718 nlog.Warningf("%s: keepalive is already active - proceeding to resume (and reset) anyway", p) 2719 } 2720 p.keepalive.ctrl(kaResumeMsg) 2721 body, err := cmn.ReadBytes(r) 2722 if err != nil { 2723 p.writeErr(w, r, err) 2724 return 2725 } 2726 caller := r.Header.Get(apc.HdrCallerName) 2727 if err := p.recvCluMetaBytes(apc.ActAdminJoinProxy, body, caller); err != nil { 2728 p.writeErr(w, r, err) 2729 } 2730 } 2731 2732 func (p *proxy) smapFromURL(baseURL string) (smap *smapX, err error) { 2733 cargs := allocCargs() 2734 { 2735 cargs.req = cmn.HreqArgs{ 2736 Method: http.MethodGet, 2737 Base: baseURL, 2738 Path: apc.URLPathDae.S, 2739 Query: url.Values{apc.QparamWhat: []string{apc.WhatSmap}}, 2740 } 2741 cargs.timeout = apc.DefaultTimeout 2742 cargs.cresv = cresSM{} // -> smapX 2743 } 2744 res := p.call(cargs, p.owner.smap.get()) 2745 if res.err != nil { 2746 err = res.errorf("failed to get Smap from %s", baseURL) 2747 } else { 2748 smap = res.v.(*smapX) 2749 if err = smap.validate(); err != nil { 2750 err = fmt.Errorf("%s: invalid %s from %s: %v", p, smap, baseURL, err) 2751 smap = nil 2752 } 2753 } 2754 freeCargs(cargs) 2755 freeCR(res) 2756 return 2757 } 2758 2759 // forceful primary change - is used when the original primary network is down 2760 // for a while and the remained nodes selected a new primary. After the 2761 // original primary is back it does not attach automatically to the new primary 2762 // and the cluster gets into split-brain mode. This request makes original 2763 // primary connect to the new primary 2764 func (p *proxy) forcefulJoin(w http.ResponseWriter, r *http.Request, proxyID string) { 2765 newPrimaryURL := r.URL.Query().Get(apc.QparamPrimaryCandidate) 2766 nlog.Infof("%s: force new primary %s (URL: %s)", p, proxyID, newPrimaryURL) 2767 2768 if p.SID() == proxyID { 2769 nlog.Warningf("%s is already primary", p) 2770 return 2771 } 2772 smap := p.owner.smap.get() 2773 psi := smap.GetProxy(proxyID) 2774 if psi == nil && newPrimaryURL == "" { 2775 err := &errNodeNotFound{"failed to find new primary", proxyID, p.si, smap} 2776 p.writeErr(w, r, err, http.StatusNotFound) 2777 return 2778 } 2779 if newPrimaryURL == "" { 2780 newPrimaryURL = psi.ControlNet.URL 2781 } 2782 if newPrimaryURL == "" { 2783 err := &errNodeNotFound{"failed to get new primary's direct URL", proxyID, p.si, smap} 2784 p.writeErr(w, r, err) 2785 return 2786 } 2787 newSmap, err := p.smapFromURL(newPrimaryURL) 2788 if err != nil { 2789 p.writeErr(w, r, err) 2790 return 2791 } 2792 primary := newSmap.Primary 2793 if proxyID != primary.ID() { 2794 p.writeErrf(w, r, "%s: proxy %s is not the primary, current %s", p.si, proxyID, newSmap.pp()) 2795 return 2796 } 2797 2798 p.metasyncer.becomeNonPrimary() // metasync to stop syncing and cancel all pending requests 2799 p.owner.smap.put(newSmap) 2800 res := p.regTo(primary.ControlNet.URL, primary, apc.DefaultTimeout, nil, nil, false /*keepalive*/) 2801 if res.err != nil { 2802 p.writeErr(w, r, res.toErr()) 2803 } 2804 } 2805 2806 func (p *proxy) daeSetPrimary(w http.ResponseWriter, r *http.Request) { 2807 apiItems, err := p.parseURL(w, r, apc.URLPathDae.L, 2, false) 2808 if err != nil { 2809 return 2810 } 2811 proxyID := apiItems[1] 2812 query := r.URL.Query() 2813 force := cos.IsParseBool(query.Get(apc.QparamForce)) 2814 2815 // force primary change 2816 if force && apiItems[0] == apc.Proxy { 2817 if smap := p.owner.smap.get(); !smap.isPrimary(p.si) { 2818 p.writeErr(w, r, newErrNotPrimary(p.si, smap)) 2819 } 2820 p.forcefulJoin(w, r, proxyID) 2821 return 2822 } 2823 prepare, err := cos.ParseBool(query.Get(apc.QparamPrepare)) 2824 if err != nil { 2825 p.writeErrf(w, r, "failed to parse URL query %q: %v", apc.QparamPrepare, err) 2826 return 2827 } 2828 if p.owner.smap.get().isPrimary(p.si) { 2829 p.writeErrf(w, r, "%s: am PRIMARY, expecting '/v1/cluster/...' when designating a new one", p) 2830 return 2831 } 2832 if prepare { 2833 var cluMeta cluMeta 2834 if err := cmn.ReadJSON(w, r, &cluMeta); err != nil { 2835 return 2836 } 2837 if err := p.recvCluMeta(&cluMeta, "set-primary", cluMeta.SI.String()); err != nil { 2838 p.writeErrf(w, r, "%s: failed to receive clu-meta: %v", p, err) 2839 return 2840 } 2841 } 2842 2843 // self 2844 if p.SID() == proxyID { 2845 smap := p.owner.smap.get() 2846 if smap.GetActiveNode(proxyID) == nil { 2847 p.writeErrf(w, r, "%s: in maintenance or decommissioned", p) 2848 return 2849 } 2850 if !prepare { 2851 p.becomeNewPrimary("") 2852 } 2853 return 2854 } 2855 2856 // other 2857 smap := p.owner.smap.get() 2858 psi := smap.GetProxy(proxyID) 2859 if psi == nil { 2860 err := &errNodeNotFound{"cannot set new primary", proxyID, p.si, smap} 2861 p.writeErr(w, r, err) 2862 return 2863 } 2864 if prepare { 2865 if cmn.Rom.FastV(4, cos.SmoduleAIS) { 2866 nlog.Infoln("Preparation step: do nothing") 2867 } 2868 return 2869 } 2870 ctx := &smapModifier{pre: func(_ *smapModifier, clone *smapX) error { clone.Primary = psi; return nil }} 2871 err = p.owner.smap.modify(ctx) 2872 debug.AssertNoErr(err) 2873 } 2874 2875 func (p *proxy) becomeNewPrimary(proxyIDToRemove string) { 2876 ctx := &smapModifier{ 2877 pre: p._becomePre, 2878 final: p._becomeFinal, 2879 sid: proxyIDToRemove, 2880 } 2881 err := p.owner.smap.modify(ctx) 2882 cos.AssertNoErr(err) 2883 } 2884 2885 func (p *proxy) _becomePre(ctx *smapModifier, clone *smapX) error { 2886 if !clone.isPresent(p.si) { 2887 cos.Assertf(false, "%s must always be present in the %s", p.si, clone.pp()) 2888 } 2889 if ctx.sid != "" && clone.GetNode(ctx.sid) != nil { 2890 // decision is made: going ahead to remove 2891 nlog.Infof("%s: removing failed primary %s", p, ctx.sid) 2892 clone.delProxy(ctx.sid) 2893 2894 // Remove reverse proxy entry for the node. 2895 p.rproxy.nodes.Delete(ctx.sid) 2896 } 2897 2898 clone.Primary = clone.GetProxy(p.SID()) 2899 clone.Version += 100 2900 clone.staffIC() 2901 return nil 2902 } 2903 2904 func (p *proxy) _becomeFinal(ctx *smapModifier, clone *smapX) { 2905 var ( 2906 bmd = p.owner.bmd.get() 2907 rmd = p.owner.rmd.get() 2908 msg = p.newAmsgStr(apc.ActNewPrimary, bmd) 2909 pairs = []revsPair{{clone, msg}, {bmd, msg}, {rmd, msg}} 2910 ) 2911 nlog.Infof("%s: distributing (%s, %s, %s) with newly elected primary (self)", p, clone, bmd, rmd) 2912 config, err := p.ensureConfigURLs() 2913 if err != nil { 2914 nlog.Errorln(err) 2915 } 2916 if config != nil { 2917 pairs = append(pairs, revsPair{config, msg}) 2918 nlog.Infof("%s: plus %s", p, config) 2919 } 2920 etl := p.owner.etl.get() 2921 if etl != nil && etl.version() > 0 { 2922 pairs = append(pairs, revsPair{etl, msg}) 2923 nlog.Infof("%s: plus %s", p, etl) 2924 } 2925 // metasync 2926 debug.Assert(clone._sgl != nil) 2927 _ = p.metasyncer.sync(pairs...) 2928 2929 // synchronize IC tables 2930 p.syncNewICOwners(ctx.smap, clone) 2931 } 2932 2933 func (p *proxy) ensureConfigURLs() (config *globalConfig, err error) { 2934 config, err = p.owner.config.modify(&configModifier{pre: p._configURLs}) 2935 if err != nil { 2936 err = cmn.NewErrFailedTo(p, "update config (primary, original, discovery) URLs", config, err) 2937 } 2938 return config, err 2939 } 2940 2941 // using cmn.NetIntraControl network for all three: PrimaryURL, OriginalURL, and DiscoveryURL 2942 func (p *proxy) _configURLs(_ *configModifier, clone *globalConfig) (updated bool, _ error) { 2943 smap := p.owner.smap.get() 2944 debug.Assert(smap.isPrimary(p.si)) 2945 2946 if prim := smap.Primary.URL(cmn.NetIntraControl); clone.Proxy.PrimaryURL != prim { 2947 clone.Proxy.PrimaryURL = prim 2948 updated = true 2949 } 2950 orig, disc := smap.configURLsIC(clone.Proxy.OriginalURL, clone.Proxy.DiscoveryURL) 2951 if orig != "" && orig != clone.Proxy.OriginalURL { 2952 clone.Proxy.OriginalURL = orig 2953 updated = true 2954 } 2955 if disc != "" && disc != clone.Proxy.DiscoveryURL { 2956 clone.Proxy.DiscoveryURL = disc 2957 updated = true 2958 } 2959 return updated, nil 2960 } 2961 2962 // [METHOD] /v1/sort 2963 func (p *proxy) dsortHandler(w http.ResponseWriter, r *http.Request) { 2964 if !p.cluStartedWithRetry() { 2965 w.WriteHeader(http.StatusServiceUnavailable) 2966 return 2967 } 2968 if err := p.checkAccess(w, r, nil, apc.AceAdmin); err != nil { 2969 return 2970 } 2971 apiItems, err := cmn.ParseURL(r.URL.Path, apc.URLPathdSort.L, 0, true) 2972 if err != nil { 2973 p.writeErrURL(w, r) 2974 return 2975 } 2976 2977 switch r.Method { 2978 case http.MethodPost: 2979 // - validate request, check input_bck and output_bck 2980 // - start dsort 2981 body, err := io.ReadAll(r.Body) 2982 if err != nil { 2983 p.writeErrStatusf(w, r, http.StatusInternalServerError, "failed to receive dsort request: %v", err) 2984 return 2985 } 2986 rs := &dsort.RequestSpec{} 2987 if err := jsoniter.Unmarshal(body, rs); err != nil { 2988 err = fmt.Errorf(cmn.FmtErrUnmarshal, p, "dsort request", cos.BHead(body), err) 2989 p.writeErr(w, r, err) 2990 return 2991 } 2992 parsc, err := rs.ParseCtx() 2993 if err != nil { 2994 p.writeErr(w, r, err) 2995 return 2996 } 2997 bck := meta.CloneBck(&parsc.InputBck) 2998 args := bctx{p: p, w: w, r: r, bck: bck, perms: apc.AceObjLIST | apc.AceGET} 2999 if _, err = args.initAndTry(); err != nil { 3000 return 3001 } 3002 if !parsc.OutputBck.Equal(&parsc.InputBck) { 3003 bckTo := meta.CloneBck(&parsc.OutputBck) 3004 bckTo, ecode, err := p.initBckTo(w, r, nil /*query*/, bckTo) 3005 if err != nil { 3006 return 3007 } 3008 if ecode == http.StatusNotFound { 3009 if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil { 3010 return 3011 } 3012 naction := "dsort-create-output-bck" 3013 warnfmt := "%s: %screate 'output_bck' %s with the 'input_bck' (%s) props" 3014 if p.forwardCP(w, r, nil /*msg*/, naction, body /*orig body*/) { // to create 3015 return 3016 } 3017 ctx := &bmdModifier{ 3018 pre: bmodCpProps, 3019 final: p.bmodSync, 3020 msg: &apc.ActMsg{Action: naction}, 3021 txnID: "", 3022 bcks: []*meta.Bck{bck, bckTo}, 3023 wait: true, 3024 } 3025 if _, err = p.owner.bmd.modify(ctx); err != nil { 3026 debug.AssertNoErr(err) 3027 err = fmt.Errorf(warnfmt+": %w", p, "failed to ", bckTo, bck, err) 3028 p.writeErr(w, r, err) 3029 return 3030 } 3031 nlog.Warningf(warnfmt, p, "", bckTo, bck) 3032 } 3033 } 3034 dsort.PstartHandler(w, r, parsc) 3035 case http.MethodGet: 3036 dsort.PgetHandler(w, r) 3037 case http.MethodDelete: 3038 if len(apiItems) == 1 && apiItems[0] == apc.Abort { 3039 dsort.PabortHandler(w, r) 3040 } else if len(apiItems) == 0 { 3041 dsort.PremoveHandler(w, r) 3042 } else { 3043 p.writeErrURL(w, r) 3044 } 3045 default: 3046 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodPost) 3047 } 3048 } 3049 3050 func (p *proxy) rootHandler(w http.ResponseWriter, r *http.Request) { 3051 const fs3 = "/" + apc.S3 3052 if !p.cluStartedWithRetry() { 3053 w.WriteHeader(http.StatusServiceUnavailable) 3054 return 3055 } 3056 3057 // by default, s3 is serviced at `/s3` 3058 // with `/` root reserved for vanilla http locations via ht:// mechanism 3059 if !cmn.Rom.Features().IsSet(feat.S3APIviaRoot) { 3060 p.htHandler(w, r) 3061 return 3062 } 3063 3064 // prepend /s3 and handle 3065 switch { 3066 case r.URL.Path == "" || r.URL.Path == "/": 3067 r.URL.Path = fs3 3068 case r.URL.Path[0] == '/': 3069 r.URL.Path = fs3 + r.URL.Path 3070 default: 3071 r.URL.Path = fs3 + "/" + r.URL.Path 3072 } 3073 p.s3Handler(w, r) 3074 } 3075 3076 // GET | HEAD vanilla http(s) location via `ht://` bucket with the corresponding `OrigURLBck` 3077 func (p *proxy) htHandler(w http.ResponseWriter, r *http.Request) { 3078 if r.URL.Scheme == "" { 3079 p.writeErrURL(w, r) 3080 return 3081 } 3082 baseURL := r.URL.Scheme + "://" + r.URL.Host 3083 if cmn.Rom.FastV(5, cos.SmoduleAIS) { 3084 nlog.Infof("[HTTP CLOUD] RevProxy handler for: %s -> %s", baseURL, r.URL.Path) 3085 } 3086 if r.Method == http.MethodGet || r.Method == http.MethodHead { 3087 // bck.IsHTTP() 3088 hbo := cmn.NewHTTPObj(r.URL) 3089 q := r.URL.Query() 3090 q.Set(apc.QparamOrigURL, r.URL.String()) 3091 q.Set(apc.QparamProvider, apc.HTTP) 3092 r.URL.Path = apc.URLPathObjects.Join(hbo.Bck.Name, hbo.ObjName) 3093 r.URL.RawQuery = q.Encode() 3094 if r.Method == http.MethodGet { 3095 p.httpobjget(w, r, hbo.OrigURLBck) 3096 } else { 3097 p.httpobjhead(w, r, hbo.OrigURLBck) 3098 } 3099 return 3100 } 3101 p.writeErrf(w, r, "%q provider doesn't support %q", apc.HTTP, r.Method) 3102 } 3103 3104 // 3105 // metasync Rx 3106 // 3107 3108 // compare w/ t.receiveConfig 3109 func (p *proxy) receiveConfig(newConfig *globalConfig, msg *aisMsg, payload msPayload, caller string) (err error) { 3110 oldConfig := cmn.GCO.Get() 3111 logmsync(oldConfig.Version, newConfig, msg, caller) 3112 3113 p.owner.config.Lock() 3114 err = p._recvCfg(newConfig, payload) 3115 p.owner.config.Unlock() 3116 if err != nil { 3117 return 3118 } 3119 3120 if !p.NodeStarted() { 3121 if msg.Action == apc.ActAttachRemAis || msg.Action == apc.ActDetachRemAis { 3122 nlog.Warningf("%s: cannot handle %s (%s => %s) - starting up...", p, msg, oldConfig, newConfig) 3123 } 3124 return 3125 } 3126 3127 if msg.Action != apc.ActAttachRemAis && msg.Action != apc.ActDetachRemAis && 3128 newConfig.Backend.EqualRemAIS(&oldConfig.Backend, p.String()) { 3129 return // nothing to do 3130 } 3131 3132 go p._remais(&newConfig.ClusterConfig, false) 3133 return 3134 } 3135 3136 // refresh local p.remais cache via intra-cluster call to a random target 3137 func (p *proxy) _remais(newConfig *cmn.ClusterConfig, blocking bool) { 3138 const maxretries = 5 3139 if !p.remais.in.CAS(false, true) { 3140 return 3141 } 3142 var ( 3143 sleep = newConfig.Timeout.CplaneOperation.D() 3144 retries = maxretries 3145 over, nver int64 3146 ) 3147 if blocking { 3148 retries = 1 3149 } else { 3150 maxsleep := newConfig.Timeout.MaxKeepalive.D() 3151 if uptime := p.keepalive.cluUptime(mono.NanoTime()); uptime < maxsleep { 3152 sleep = 2 * maxsleep 3153 } 3154 } 3155 for ; retries > 0; retries-- { 3156 time.Sleep(sleep) 3157 all, err := p.getRemAisVec(false /*refresh*/) 3158 if err != nil { 3159 if retries < maxretries { 3160 nlog.Errorf("%s: failed to get remais (%d attempts)", p, retries-1) 3161 } 3162 continue 3163 } 3164 p.remais.mu.Lock() 3165 if over <= 0 { 3166 over = p.remais.Ver 3167 } 3168 if p.remais.Ver < all.Ver { 3169 // keep old/detached clusters to support access to existing ("cached") buckets 3170 // i.e., the ability to resolve remote alias to Ns.UUID (see p.a2u) 3171 for _, a := range p.remais.RemAisVec.A { 3172 var found bool 3173 for _, b := range p.remais.old { 3174 if b.UUID == a.UUID { 3175 *b = *a 3176 found = true 3177 break 3178 } 3179 if b.Alias == a.Alias { 3180 nlog.Errorf("duplicated remais alias: (%q, %q) vs (%q, %q)", a.UUID, a.Alias, b.UUID, b.Alias) 3181 } 3182 } 3183 if !found { 3184 p.remais.old = append(p.remais.old, a) 3185 } 3186 } 3187 3188 p.remais.RemAisVec = *all 3189 nver = p.remais.Ver 3190 p.remais.mu.Unlock() 3191 break 3192 } 3193 p.remais.mu.Unlock() 3194 nlog.Errorf("%s: retrying remais ver=%d (%d attempts)", p, all.Ver, retries-1) 3195 sleep = newConfig.Timeout.CplaneOperation.D() 3196 } 3197 3198 p.remais.in.Store(false) 3199 nlog.Infof("%s: remais v%d => v%d", p, over, nver) 3200 } 3201 3202 func (p *proxy) receiveRMD(newRMD *rebMD, msg *aisMsg, caller string) (err error) { 3203 rmd := p.owner.rmd.get() 3204 logmsync(rmd.Version, newRMD, msg, caller) 3205 3206 p.owner.rmd.Lock() 3207 rmd = p.owner.rmd.get() 3208 if newRMD.version() <= rmd.version() { 3209 p.owner.rmd.Unlock() 3210 if newRMD.version() < rmd.version() { 3211 err = newErrDowngrade(p.si, rmd.String(), newRMD.String()) 3212 } 3213 return 3214 } 3215 p.owner.rmd.put(newRMD) 3216 err = p.owner.rmd.persist(newRMD) 3217 debug.AssertNoErr(err) 3218 p.owner.rmd.Unlock() 3219 3220 // Register `nl` for rebalance/resilver 3221 smap := p.owner.smap.get() 3222 if smap.IsIC(p.si) && smap.CountActiveTs() > 0 && (smap.IsPrimary(p.si) || p.ClusterStarted()) { 3223 nl := xact.NewXactNL(xact.RebID2S(newRMD.Version), apc.ActRebalance, &smap.Smap, nil) 3224 nl.SetOwner(equalIC) 3225 err := p.notifs.add(nl) 3226 debug.AssertNoErr(err) 3227 3228 if newRMD.Resilver != "" { 3229 nl = xact.NewXactNL(newRMD.Resilver, apc.ActResilver, &smap.Smap, nil) 3230 nl.SetOwner(equalIC) 3231 err := p.notifs.add(nl) 3232 debug.AssertNoErr(err) 3233 } 3234 } 3235 return 3236 } 3237 3238 func (p *proxy) smapOnUpdate(newSmap, oldSmap *smapX, nfl, ofl cos.BitFlags) { 3239 // When some node was removed from the cluster we need to clean up the 3240 // reverse proxy structure. 3241 p.rproxy.nodes.Range(func(key, _ any) bool { 3242 nodeID := key.(string) 3243 if oldSmap.GetNode(nodeID) != nil && newSmap.GetNode(nodeID) == nil { 3244 p.rproxy.nodes.Delete(nodeID) 3245 } 3246 return true 3247 }) 3248 p.syncNewICOwners(oldSmap, newSmap) 3249 3250 p.htrun.smapUpdatedCB(newSmap, oldSmap, nfl, ofl) 3251 } 3252 3253 func (p *proxy) receiveBMD(newBMD *bucketMD, msg *aisMsg, payload msPayload, caller string) (err error) { 3254 bmd := p.owner.bmd.get() 3255 logmsync(bmd.Version, newBMD, msg, caller) 3256 3257 p.owner.bmd.Lock() 3258 bmd = p.owner.bmd.get() 3259 if err = bmd.validateUUID(newBMD, p.si, nil, caller); err != nil { 3260 cos.Assert(!p.owner.smap.get().isPrimary(p.si)) 3261 // cluster integrity error: making exception for non-primary proxies 3262 nlog.Errorf("%s (non-primary): %v - proceeding to override BMD", p, err) 3263 } else if newBMD.version() <= bmd.version() { 3264 p.owner.bmd.Unlock() 3265 return newErrDowngrade(p.si, bmd.String(), newBMD.String()) 3266 } 3267 err = p.owner.bmd.putPersist(newBMD, payload) 3268 debug.AssertNoErr(err) 3269 p.owner.bmd.Unlock() 3270 return 3271 } 3272 3273 // getDaemonInfo queries osi for its daemon info and returns it. 3274 func (p *proxy) _getSI(osi *meta.Snode) (si *meta.Snode, err error) { 3275 cargs := allocCargs() 3276 { 3277 cargs.si = osi 3278 cargs.req = cmn.HreqArgs{ 3279 Method: http.MethodGet, 3280 Path: apc.URLPathDae.S, 3281 Query: url.Values{apc.QparamWhat: []string{apc.WhatSnode}}, 3282 } 3283 cargs.timeout = cmn.Rom.CplaneOperation() 3284 cargs.cresv = cresND{} // -> meta.Snode 3285 } 3286 res := p.call(cargs, p.owner.smap.get()) 3287 if res.err != nil { 3288 err = res.err 3289 } else { 3290 si = res.v.(*meta.Snode) 3291 } 3292 freeCargs(cargs) 3293 freeCR(res) 3294 return 3295 } 3296 3297 func (p *proxy) headRemoteBck(bck *cmn.Bck, q url.Values) (header http.Header, statusCode int, err error) { 3298 var ( 3299 tsi *meta.Snode 3300 path = apc.URLPathBuckets.Join(bck.Name) 3301 smap = p.owner.smap.get() 3302 ) 3303 if tsi, err = smap.GetRandTarget(); err != nil { 3304 return 3305 } 3306 if bck.IsCloud() { 3307 config := cmn.GCO.Get() 3308 if config.Backend.Get(bck.Provider) == nil { 3309 err = &cmn.ErrMissingBackend{Provider: bck.Provider} 3310 statusCode = http.StatusNotFound 3311 err = cmn.NewErrFailedTo(p, "lookup Cloud bucket", bck, err, statusCode) 3312 return 3313 } 3314 } 3315 q = bck.AddToQuery(q) 3316 cargs := allocCargs() 3317 { 3318 cargs.si = tsi 3319 cargs.req = cmn.HreqArgs{Method: http.MethodHead, Path: path, Query: q} 3320 cargs.timeout = apc.DefaultTimeout 3321 } 3322 res := p.call(cargs, smap) 3323 if res.status == http.StatusNotFound { 3324 err = cmn.NewErrRemoteBckNotFound(bck) 3325 } else if res.status == http.StatusGone { 3326 err = cmn.NewErrRemoteBckOffline(bck) 3327 } else { 3328 err = res.err 3329 header = res.header 3330 } 3331 statusCode = res.status 3332 freeCargs(cargs) 3333 freeCR(res) 3334 return 3335 } 3336 3337 //////////////// 3338 // misc utils // 3339 //////////////// 3340 3341 func resolveUUIDBMD(bmds bmds) (*bucketMD, error) { 3342 var ( 3343 mlist = make(map[string][]cluMeta) // uuid => list(targetRegMeta) 3344 maxor = make(map[string]*bucketMD) // uuid => max-ver BMD 3345 ) 3346 // results => (mlist, maxor) 3347 for si, bmd := range bmds { 3348 if bmd.Version == 0 { 3349 continue 3350 } 3351 mlist[bmd.UUID] = append(mlist[bmd.UUID], cluMeta{BMD: bmd, SI: si}) 3352 3353 if rbmd, ok := maxor[bmd.UUID]; !ok { 3354 maxor[bmd.UUID] = bmd 3355 } else if rbmd.Version < bmd.Version { 3356 maxor[bmd.UUID] = bmd 3357 } 3358 } 3359 if len(maxor) == 0 { 3360 return nil, errNoBMD 3361 } 3362 // by simple majority 3363 uuid, l := "", 0 3364 for u, lst := range mlist { 3365 if l < len(lst) { 3366 uuid, l = u, len(lst) 3367 } 3368 } 3369 for u, lst := range mlist { 3370 if l == len(lst) && u != uuid { 3371 s := fmt.Sprintf("%s: BMDs have different UUIDs with no simple majority:\n%v", 3372 ciError(60), mlist) 3373 return nil, &errBmdUUIDSplit{s} 3374 } 3375 } 3376 var err error 3377 if len(mlist) > 1 { 3378 s := fmt.Sprintf("%s: BMDs have different UUIDs with simple majority: %s:\n%v", 3379 ciError(70), uuid, mlist) 3380 err = &errTgtBmdUUIDDiffer{s} 3381 } 3382 bmd := maxor[uuid] 3383 cos.Assert(cos.IsValidUUID(bmd.UUID)) 3384 return bmd, err 3385 } 3386 3387 func ciError(num int) string { 3388 return fmt.Sprintf(cmn.FmtErrIntegrity, ciePrefix, num, cmn.GitHubHome) 3389 } 3390 3391 // 3392 // termination(s) 3393 // 3394 3395 func (p *proxy) termKalive(action string) { 3396 p.keepalive.ctrl(kaSuspendMsg) 3397 3398 err := fmt.Errorf("%s: term-kalive by %q", p, action) 3399 xreg.AbortAll(err) 3400 } 3401 3402 func (p *proxy) shutdown(action string) { 3403 p.Stop(&errNoUnregister{action}) 3404 } 3405 3406 func (p *proxy) decommission(action string, opts *apc.ActValRmNode) { 3407 cleanupConfigDir(p.Name(), opts.KeepInitialConfig) 3408 if !opts.NoShutdown { 3409 p.Stop(&errNoUnregister{action}) 3410 } 3411 } 3412 3413 // and return from rungroup.run 3414 func (p *proxy) Stop(err error) { 3415 var ( 3416 s = "Stopping " + p.String() 3417 smap = p.owner.smap.get() 3418 isPrimary = smap.isPrimary(p.si) 3419 e, isEnu = err.(*errNoUnregister) 3420 ) 3421 if isPrimary { 3422 s += "(primary)" 3423 if !isEnu || e.action != apc.ActShutdownCluster { 3424 if npsi, err := smap.HrwProxy(p.SID()); err == nil { 3425 p.notifyCandidate(npsi, smap) 3426 } 3427 } 3428 } 3429 if err == nil { 3430 nlog.Infoln(s) 3431 } else { 3432 nlog.Warningf("%s: %v", s, err) 3433 } 3434 xreg.AbortAll(errors.New("p-stop")) 3435 3436 p.htrun.stop(&sync.WaitGroup{}, !isPrimary && smap.isValid() && !isEnu /*rmFromSmap*/) 3437 } 3438 3439 // on a best-effort basis, ignoring errors and bodyclose 3440 func (p *proxy) notifyCandidate(npsi *meta.Snode, smap *smapX) { 3441 cargs := allocCargs() 3442 cargs.si = npsi 3443 cargs.req = cmn.HreqArgs{Method: http.MethodPut, Base: npsi.URL(cmn.NetIntraControl), Path: apc.URLPathVotePriStop.S} 3444 req, err := cargs.req.Req() 3445 if err != nil { 3446 return 3447 } 3448 req.Header.Set(apc.HdrCallerID, p.SID()) 3449 req.Header.Set(apc.HdrCallerSmapVer, smap.vstr) 3450 g.client.control.Do(req) //nolint:bodyclose // exiting 3451 }